Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions python/paddle/v2/framework/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ def __init__(self):
self.op_proto_map[proto.type] = proto

def get_op_proto(self, type):
assert type in self.op_proto_map, "Operator \"%s\" has not been registered." % type
if type not in self.op_proto_map:
raise ValueError("Operator \"%s\" has not been registered." % type)
return self.op_proto_map[type]


Expand Down Expand Up @@ -374,10 +375,10 @@ def instance(cls):
cls._instance = cls()
return cls._instance

def __init__(self):
assert not hasattr(self.__class__,
'_instance'), 'Do not call constructor directly!'
self.desc = core.ProgramDesc.instance()
def __init__(self, desc=None):
if desc is None:
desc = core.ProgramDesc.instance()
self.desc = desc
self.blocks = [Block(self, 0)]
self.current_block_idx = 0

Expand Down Expand Up @@ -428,7 +429,6 @@ def __init__(self, block, shape, dtype, **kwargs):
if each < 0:
raise ValueError("Parameter shape should not be related with "
"batch-size")

Variable.__init__(self, block, shape=shape, dtype=dtype, **kwargs)
self.trainable = kwargs.get('trainable', True)
self.init_attr = kwargs.get('initialize_attr', {
Expand All @@ -441,7 +441,7 @@ def __init__(self, block, shape, dtype, **kwargs):
self._append_initialize_ops_()

def _append_initialize_ops_(self):
attr = copy.deepcopy(self.init_attr)
attr = self.init_attr
op_type = attr.pop('type', None)
block = self.block
assert isinstance(block, Block)
Expand Down
160 changes: 160 additions & 0 deletions python/paddle/v2/framework/layer_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
from paddle.v2.framework.framework import Variable, OpProtoHolder, g_program
import paddle.v2.framework.core as core
import copy
import itertools


def unique_name(prefix):
uid = core.unique_integer() # unique during whole process.
return "_".join([prefix, str(uid)])


class LayerHelper(object):
def __init__(self, layer_type, **kwargs):
self.kwargs = kwargs
self.layer_type = layer_type
name = self.kwargs.get('name', None)
if name is None:
self.kwargs['name'] = unique_name(self.layer_type)

@property
def name(self):
return self.kwargs['name']

@property
def program(self):
prog = self.kwargs.get('program', None)
if prog is None:
return g_program
else:
return prog

def append_op(self, *args, **kwargs):
return self.program.current_block().append_op(*args, **kwargs)

def multiple_input(self, input_param_name='input'):
inputs = self.kwargs.get(input_param_name, [])
type_error = TypeError(
"Input of {0} layer should be Variable or sequence of Variable".
format(self.layer_type))
if isinstance(inputs, Variable):
inputs = [inputs]
elif not isinstance(inputs, list) and not isinstance(inputs, tuple):
raise type_error
else:
for each in inputs:
if not isinstance(each, Variable):
raise type_error
return inputs

def input(self, input_param_name='input'):
inputs = self.multiple_input(input_param_name)
if len(inputs) != 1:
raise "{0} layer only takes one input".format(self.layer_type)
return inputs[0]

@property
def param_attr(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since there is more than one input, there is also supposed to be more than one parameter. And we need some method to distinguish them.

default = {
'name': None,
'init_attr': {
'type': 'uniform_random',
'min': -1.0,
'max': 1.0
}
}
actual = self.kwargs.get('param_attr', None)
return actual if actual is not None else default

def bias_attr(self, size, dtype):
bias_attr = self.kwargs.get('bias_attr', False)
if bias_attr is None or bias_attr:
bias_attr = {
'name': None,
'init_attr': {
'type': 'fill_constant',
'value': 0.0,
'shape': [size],
'dataType': dtype
}
}
return bias_attr

def multiple_param_attr(self, length):
param_attr = self.param_attr
if isinstance(param_attr, dict):
param_attr = [param_attr]

if len(param_attr) != 1 and len(param_attr) != length:
raise ValueError("parameter number mismatch")
elif len(param_attr) == 1 and length != 1:
tmp = [None] * length
for i in xrange(length):
tmp[i] = copy.deepcopy(param_attr[0])
param_attr = tmp
return param_attr

def iter_inputs_and_params(self, input_param_name='input'):
inputs = self.multiple_input(input_param_name)
param_attrs = self.multiple_param_attr(len(inputs))
for ipt, param_attr in itertools.izip(inputs, param_attrs):
yield ipt, param_attr

def input_dtype(self, input_param_name='input'):
inputs = self.multiple_input(input_param_name)
dtype = None
for each in inputs:
if dtype is None:
dtype = each.data_type
elif dtype != each.data_type:
raise ValueError("Data Type mismatch")
return dtype

def create_parameter(self, attr, shape, dtype, suffix='w'):
if attr['name'] is None:
attr['name'] = unique_name(".".join([self.name, suffix]))
return self.program.global_block().create_parameter(
name=attr['name'],
dtype=dtype,
shape=shape,
initialize_attr=attr['init_attr'])

def create_tmp_variable(self, dtype):
return self.program.current_block().create_var(
name=unique_name(".".join([self.name, 'tmp'])), dtype=dtype)

def create_global_variable(self, *args, **kwargs):
return self.program.global_block().create_var(*args, **kwargs)

def append_bias_op(self, input_var):
bias_attr = self.bias_attr(
self.kwargs['size'], dtype=input_var.data_type)
if not bias_attr:
return input_var
b = self.create_parameter(
attr=bias_attr,
shape=[self.kwargs['size']],
dtype=input_var.data_type,
suffix='b')
tmp = self.create_tmp_variable(dtype=input_var.data_type)
self.append_op(
type='elementwise_add',
inputs={'X': [input_var],
'Y': [b]},
outputs={'Out': [tmp]})
return tmp

def append_activation(self, input_var):
act = self.kwargs.get('act', None)
if act is None:
return input_var
if isinstance(act, basestring):
act = {'type': act}
tmp = self.create_tmp_variable(dtype=input_var.data_type)
act_type = act.pop('type')
self.append_op(
type=act_type,
inputs={"X": [input_var]},
outputs={"Y": [tmp]},
attrs=act)
return tmp
143 changes: 143 additions & 0 deletions python/paddle/v2/framework/layers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
from paddle.v2.framework.layer_helper import LayerHelper
import paddle.v2.framework.core as core
from paddle.v2.framework.framework import OpProtoHolder, Variable
import re

__all__ = ['fc_layer', 'data_layer', 'cross_entropy']
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should separate different layers into different files.



def fc_layer(input,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we follow the current name convention in v2 api, which is fc() for fc layer

size,
param_attr=None,
bias_attr=True,
name=None,
act=None,
num_flatten_dims=1,
program=None):
# create helper
helper = LayerHelper('fc', **locals())

dtype = helper.input_dtype()

# mul
mul_results = []
for input_var, param_attr in helper.iter_inputs_and_params():
input_shape = input_var.shape
param_shape = list(input_shape[num_flatten_dims:]) + [size]
w = helper.create_parameter(
attr=param_attr, shape=param_shape, dtype=dtype)
tmp = helper.create_tmp_variable(dtype)
helper.append_op(
type="mul",
inputs={
"X": input_var,
"Y": w,
},
outputs={"Out": tmp},
attrs={'x_num_col_dims': num_flatten_dims})
mul_results.append(tmp)

# sum
if len(mul_results) == 1:
pre_bias = mul_results[0]
else:
pre_bias = helper.create_tmp_variable(dtype)
helper.append_op(
type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias})
# add bias
pre_activation = helper.append_bias_op(pre_bias)
# add activation
return helper.append_activation(pre_activation)


def data_layer(name,
shape,
data_type='float32',
type=core.VarDesc.VarType.LOD_TENSOR,
program=None):
helper = LayerHelper('data', **locals())
shape = [-1] + shape # append batch size as -1
return helper.create_global_variable(
name=name, shape=shape, dtype=data_type, type=type)


def _convert_(name):
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()


def _create_op_func_(op_type):
op_proto = OpProtoHolder.instance().get_op_proto(op_type)
if len(op_proto.outputs) != 1:
raise ValueError(
"Only one output operator can be automatically generated")

if op_proto.outputs[0].duplicable:
raise ValueError(
"Only not duplicable op can be automatically generated")

o_name = op_proto.outputs[0].name

def func(**kwargs):
helper = LayerHelper(op_type, **kwargs)
inputs = dict()
dtype = None
for ipt in op_proto.inputs:
name = _convert_(ipt.name)
val = kwargs.pop(name, [])
if not isinstance(val, list) and not isinstance(val, tuple):
val = [val]
for each in val:
if not isinstance(each, Variable):
raise ValueError("input of {0} must be variable".format(
op_type))

if dtype is None:
dtype = each.data_type
elif dtype != each.data_type:
raise ValueError(
"operator {0} must input same dtype".format(op_type))
inputs[ipt.name] = val

out = helper.create_tmp_variable(dtype=dtype)
helper.append_op(
type=op_type, inputs=inputs, outputs={o_name: [out]}, attrs=kwargs)
return out

func.__name__ = op_type
globals()[op_type] = func
global __all__
__all__.append(op_type)


_create_op_func_('mean')


def cross_entropy(input, label, **kwargs):
helper = LayerHelper('cross_entropy', **kwargs)
out = helper.create_tmp_variable(dtype=input.data_type)
helper.append_op(
type='cross_entropy',
inputs={'X': [input],
'Label': [label]},
outputs={'Y': [out]},
attrs=kwargs)
return out
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we automatically add python interface for operators? Unlike fc or square_error_cost, the python part of cross_entropy does not provide extra function.



def square_error_cost(input, label, **kwargs):
helper = LayerHelper('square_error_cost', **kwargs)
minus_out = helper.create_tmp_variable(dtype=input.data_type)
helper.append_op(
type='elementwise_sub',
inputs={'X': [input],
'Y': [label]},
outputs={'Out': [minus_out]})
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to stop gradient for label


square_out = helper.create_tmp_variable(dtype=input.data_type)
helper.append_op(
type='pow',
inputs={'X': [minus_out]},
outputs={'Y': [square_out]},
attrs={'factor': 2.0})
return square_out
43 changes: 43 additions & 0 deletions python/paddle/v2/framework/tests/test_layers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from paddle.v2.framework.layers import fc_layer, data_layer, cross_entropy, mean, square_error_cost
from paddle.v2.framework.framework import Program, g_program
import paddle.v2.framework.core as core
import unittest


class TestBook(unittest.TestCase):
def test_fit_a_line(self):
pd = core.ProgramDesc.__create_program_desc__()
program = Program(desc=pd)
x = data_layer(
name='x', shape=[13], data_type='float32', program=program)
y_predict = fc_layer(input=x, size=1, act=None, program=program)

y = data_layer(
name='y', shape=[1], data_type='float32', program=program)
cost = square_error_cost(input=y_predict, label=y, program=program)

avg_cost = mean(x=cost, program=program)
self.assertIsNotNone(avg_cost)
print str(program)

def test_recognize_digits_mlp(self):
pd = core.ProgramDesc.__create_program_desc__()
program = Program(desc=pd)

# Change g_program, so the rest layers use `g_program`
images = data_layer(
name='pixel', shape=[784], data_type='float32', program=program)
label = data_layer(
name='label', shape=[1], data_type='int32', program=program)
hidden1 = fc_layer(input=images, size=128, act='relu', program=program)
hidden2 = fc_layer(input=hidden1, size=64, act='relu', program=program)
predict = fc_layer(
input=hidden2, size=10, act='softmax', program=program)
cost = cross_entropy(input=predict, label=label, program=program)
avg_cost = mean(x=cost, program=program)
self.assertIsNotNone(avg_cost)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

# backward(avg_cost)

print str(program)


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion python/paddle/v2/framework/tests/test_operator_desc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_error_type(self):
try:
block.append_op(type="no_such_op")
self.assertFail()
except AssertionError as a_err:
except ValueError as a_err:
self.assertEqual(a_err.message,
"Operator \"no_such_op\" has not been registered.")

Expand Down