-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Implement FC layer with helper #4726
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f5d9005
5488ec9
153d9a8
e016726
f6570b5
f7cffb7
e017ba2
1cf33cb
cd93f12
3e613de
3ab53e4
a281c39
03fc36c
32cdc7b
d28c2c7
647e1eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
from paddle.v2.framework.framework import Variable, OpProtoHolder, g_program | ||
import paddle.v2.framework.core as core | ||
import copy | ||
import itertools | ||
|
||
|
||
def unique_name(prefix): | ||
uid = core.unique_integer() # unique during whole process. | ||
return "_".join([prefix, str(uid)]) | ||
|
||
|
||
class LayerHelper(object): | ||
def __init__(self, layer_type, **kwargs): | ||
self.kwargs = kwargs | ||
self.layer_type = layer_type | ||
name = self.kwargs.get('name', None) | ||
if name is None: | ||
self.kwargs['name'] = unique_name(self.layer_type) | ||
|
||
@property | ||
def name(self): | ||
return self.kwargs['name'] | ||
|
||
@property | ||
def program(self): | ||
prog = self.kwargs.get('program', None) | ||
if prog is None: | ||
return g_program | ||
else: | ||
return prog | ||
|
||
def append_op(self, *args, **kwargs): | ||
return self.program.current_block().append_op(*args, **kwargs) | ||
|
||
def multiple_input(self, input_param_name='input'): | ||
inputs = self.kwargs.get(input_param_name, []) | ||
type_error = TypeError( | ||
"Input of {0} layer should be Variable or sequence of Variable". | ||
format(self.layer_type)) | ||
if isinstance(inputs, Variable): | ||
inputs = [inputs] | ||
elif not isinstance(inputs, list) and not isinstance(inputs, tuple): | ||
raise type_error | ||
else: | ||
for each in inputs: | ||
if not isinstance(each, Variable): | ||
raise type_error | ||
return inputs | ||
|
||
def input(self, input_param_name='input'): | ||
inputs = self.multiple_input(input_param_name) | ||
if len(inputs) != 1: | ||
raise "{0} layer only takes one input".format(self.layer_type) | ||
return inputs[0] | ||
|
||
@property | ||
def param_attr(self): | ||
default = { | ||
'name': None, | ||
'init_attr': { | ||
'type': 'uniform_random', | ||
'min': -1.0, | ||
'max': 1.0 | ||
} | ||
} | ||
actual = self.kwargs.get('param_attr', None) | ||
return actual if actual is not None else default | ||
|
||
def bias_attr(self, size, dtype): | ||
bias_attr = self.kwargs.get('bias_attr', False) | ||
if bias_attr is None or bias_attr: | ||
bias_attr = { | ||
'name': None, | ||
'init_attr': { | ||
'type': 'fill_constant', | ||
'value': 0.0, | ||
'shape': [size], | ||
'dataType': dtype | ||
} | ||
} | ||
return bias_attr | ||
|
||
def multiple_param_attr(self, length): | ||
param_attr = self.param_attr | ||
if isinstance(param_attr, dict): | ||
param_attr = [param_attr] | ||
|
||
if len(param_attr) != 1 and len(param_attr) != length: | ||
raise ValueError("parameter number mismatch") | ||
elif len(param_attr) == 1 and length != 1: | ||
tmp = [None] * length | ||
for i in xrange(length): | ||
tmp[i] = copy.deepcopy(param_attr[0]) | ||
param_attr = tmp | ||
return param_attr | ||
|
||
def iter_inputs_and_params(self, input_param_name='input'): | ||
inputs = self.multiple_input(input_param_name) | ||
param_attrs = self.multiple_param_attr(len(inputs)) | ||
for ipt, param_attr in itertools.izip(inputs, param_attrs): | ||
yield ipt, param_attr | ||
|
||
def input_dtype(self, input_param_name='input'): | ||
inputs = self.multiple_input(input_param_name) | ||
dtype = None | ||
for each in inputs: | ||
if dtype is None: | ||
dtype = each.data_type | ||
elif dtype != each.data_type: | ||
raise ValueError("Data Type mismatch") | ||
return dtype | ||
|
||
def create_parameter(self, attr, shape, dtype, suffix='w'): | ||
if attr['name'] is None: | ||
attr['name'] = unique_name(".".join([self.name, suffix])) | ||
return self.program.global_block().create_parameter( | ||
name=attr['name'], | ||
dtype=dtype, | ||
shape=shape, | ||
initialize_attr=attr['init_attr']) | ||
|
||
def create_tmp_variable(self, dtype): | ||
return self.program.current_block().create_var( | ||
name=unique_name(".".join([self.name, 'tmp'])), dtype=dtype) | ||
|
||
def create_global_variable(self, *args, **kwargs): | ||
return self.program.global_block().create_var(*args, **kwargs) | ||
|
||
def append_bias_op(self, input_var): | ||
bias_attr = self.bias_attr( | ||
self.kwargs['size'], dtype=input_var.data_type) | ||
if not bias_attr: | ||
return input_var | ||
b = self.create_parameter( | ||
attr=bias_attr, | ||
shape=[self.kwargs['size']], | ||
dtype=input_var.data_type, | ||
suffix='b') | ||
tmp = self.create_tmp_variable(dtype=input_var.data_type) | ||
self.append_op( | ||
type='elementwise_add', | ||
inputs={'X': [input_var], | ||
'Y': [b]}, | ||
outputs={'Out': [tmp]}) | ||
return tmp | ||
|
||
def append_activation(self, input_var): | ||
act = self.kwargs.get('act', None) | ||
if act is None: | ||
return input_var | ||
if isinstance(act, basestring): | ||
act = {'type': act} | ||
tmp = self.create_tmp_variable(dtype=input_var.data_type) | ||
act_type = act.pop('type') | ||
self.append_op( | ||
type=act_type, | ||
inputs={"X": [input_var]}, | ||
outputs={"Y": [tmp]}, | ||
attrs=act) | ||
return tmp |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
from paddle.v2.framework.layer_helper import LayerHelper | ||
import paddle.v2.framework.core as core | ||
from paddle.v2.framework.framework import OpProtoHolder, Variable | ||
import re | ||
|
||
__all__ = ['fc_layer', 'data_layer', 'cross_entropy'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should separate different layers into different files. |
||
|
||
|
||
def fc_layer(input, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we follow the current name convention in v2 api, which is fc() for fc layer |
||
size, | ||
param_attr=None, | ||
bias_attr=True, | ||
name=None, | ||
act=None, | ||
num_flatten_dims=1, | ||
program=None): | ||
# create helper | ||
helper = LayerHelper('fc', **locals()) | ||
|
||
dtype = helper.input_dtype() | ||
|
||
# mul | ||
mul_results = [] | ||
for input_var, param_attr in helper.iter_inputs_and_params(): | ||
input_shape = input_var.shape | ||
param_shape = list(input_shape[num_flatten_dims:]) + [size] | ||
w = helper.create_parameter( | ||
attr=param_attr, shape=param_shape, dtype=dtype) | ||
tmp = helper.create_tmp_variable(dtype) | ||
helper.append_op( | ||
type="mul", | ||
inputs={ | ||
"X": input_var, | ||
"Y": w, | ||
}, | ||
outputs={"Out": tmp}, | ||
attrs={'x_num_col_dims': num_flatten_dims}) | ||
mul_results.append(tmp) | ||
|
||
# sum | ||
if len(mul_results) == 1: | ||
pre_bias = mul_results[0] | ||
else: | ||
pre_bias = helper.create_tmp_variable(dtype) | ||
helper.append_op( | ||
type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) | ||
# add bias | ||
pre_activation = helper.append_bias_op(pre_bias) | ||
# add activation | ||
return helper.append_activation(pre_activation) | ||
|
||
|
||
def data_layer(name, | ||
shape, | ||
data_type='float32', | ||
type=core.VarDesc.VarType.LOD_TENSOR, | ||
program=None): | ||
helper = LayerHelper('data', **locals()) | ||
shape = [-1] + shape # append batch size as -1 | ||
return helper.create_global_variable( | ||
name=name, shape=shape, dtype=data_type, type=type) | ||
|
||
|
||
def _convert_(name): | ||
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) | ||
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() | ||
|
||
|
||
def _create_op_func_(op_type): | ||
op_proto = OpProtoHolder.instance().get_op_proto(op_type) | ||
if len(op_proto.outputs) != 1: | ||
raise ValueError( | ||
"Only one output operator can be automatically generated") | ||
|
||
if op_proto.outputs[0].duplicable: | ||
raise ValueError( | ||
"Only not duplicable op can be automatically generated") | ||
|
||
o_name = op_proto.outputs[0].name | ||
|
||
def func(**kwargs): | ||
helper = LayerHelper(op_type, **kwargs) | ||
inputs = dict() | ||
dtype = None | ||
for ipt in op_proto.inputs: | ||
name = _convert_(ipt.name) | ||
val = kwargs.pop(name, []) | ||
if not isinstance(val, list) and not isinstance(val, tuple): | ||
val = [val] | ||
for each in val: | ||
if not isinstance(each, Variable): | ||
raise ValueError("input of {0} must be variable".format( | ||
op_type)) | ||
|
||
if dtype is None: | ||
dtype = each.data_type | ||
elif dtype != each.data_type: | ||
raise ValueError( | ||
"operator {0} must input same dtype".format(op_type)) | ||
inputs[ipt.name] = val | ||
|
||
out = helper.create_tmp_variable(dtype=dtype) | ||
helper.append_op( | ||
type=op_type, inputs=inputs, outputs={o_name: [out]}, attrs=kwargs) | ||
return out | ||
|
||
func.__name__ = op_type | ||
globals()[op_type] = func | ||
global __all__ | ||
__all__.append(op_type) | ||
|
||
|
||
_create_op_func_('mean') | ||
|
||
|
||
def cross_entropy(input, label, **kwargs): | ||
helper = LayerHelper('cross_entropy', **kwargs) | ||
out = helper.create_tmp_variable(dtype=input.data_type) | ||
helper.append_op( | ||
type='cross_entropy', | ||
inputs={'X': [input], | ||
'Label': [label]}, | ||
outputs={'Y': [out]}, | ||
attrs=kwargs) | ||
return out | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we automatically add python interface for operators? Unlike fc or square_error_cost, the python part of cross_entropy does not provide extra function. |
||
|
||
|
||
def square_error_cost(input, label, **kwargs): | ||
helper = LayerHelper('square_error_cost', **kwargs) | ||
minus_out = helper.create_tmp_variable(dtype=input.data_type) | ||
helper.append_op( | ||
type='elementwise_sub', | ||
inputs={'X': [input], | ||
'Y': [label]}, | ||
outputs={'Out': [minus_out]}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to stop gradient for label |
||
|
||
square_out = helper.create_tmp_variable(dtype=input.data_type) | ||
helper.append_op( | ||
type='pow', | ||
inputs={'X': [minus_out]}, | ||
outputs={'Y': [square_out]}, | ||
attrs={'factor': 2.0}) | ||
return square_out |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from paddle.v2.framework.layers import fc_layer, data_layer, cross_entropy, mean, square_error_cost | ||
from paddle.v2.framework.framework import Program, g_program | ||
import paddle.v2.framework.core as core | ||
import unittest | ||
|
||
|
||
class TestBook(unittest.TestCase): | ||
def test_fit_a_line(self): | ||
pd = core.ProgramDesc.__create_program_desc__() | ||
program = Program(desc=pd) | ||
x = data_layer( | ||
name='x', shape=[13], data_type='float32', program=program) | ||
y_predict = fc_layer(input=x, size=1, act=None, program=program) | ||
|
||
y = data_layer( | ||
name='y', shape=[1], data_type='float32', program=program) | ||
cost = square_error_cost(input=y_predict, label=y, program=program) | ||
|
||
avg_cost = mean(x=cost, program=program) | ||
self.assertIsNotNone(avg_cost) | ||
print str(program) | ||
|
||
def test_recognize_digits_mlp(self): | ||
pd = core.ProgramDesc.__create_program_desc__() | ||
program = Program(desc=pd) | ||
|
||
# Change g_program, so the rest layers use `g_program` | ||
images = data_layer( | ||
name='pixel', shape=[784], data_type='float32', program=program) | ||
label = data_layer( | ||
name='label', shape=[1], data_type='int32', program=program) | ||
hidden1 = fc_layer(input=images, size=128, act='relu', program=program) | ||
hidden2 = fc_layer(input=hidden1, size=64, act='relu', program=program) | ||
predict = fc_layer( | ||
input=hidden2, size=10, act='softmax', program=program) | ||
cost = cross_entropy(input=predict, label=label, program=program) | ||
avg_cost = mean(x=cost, program=program) | ||
self.assertIsNotNone(avg_cost) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. # backward(avg_cost) |
||
print str(program) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since there is more than one
input
, there is also supposed to be more than one parameter. And we need some method to distinguish them.