Skip to content

Use force cpu in fill constant op #8254

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
9115017
init polynomial_decay
jacquesqiao Jan 31, 2018
b591ac7
test polynomial_decay
jacquesqiao Jan 31, 2018
7d09fe6
complete polynomial_decay
jacquesqiao Jan 31, 2018
8652da8
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Jan 31, 2018
e804f06
fix conditional block op
jacquesqiao Feb 1, 2018
9ee8f77
init scalar-switch-case-op
jacquesqiao Feb 1, 2018
9ae65c4
switch op can compile
jacquesqiao Feb 2, 2018
3d5b807
complete forward switch_op
jacquesqiao Feb 2, 2018
0b4e4c9
add GetMatchCaseIndex
jacquesqiao Feb 2, 2018
7af4dda
add switch_grad_op
jacquesqiao Feb 2, 2018
5a659e8
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Feb 5, 2018
bdfb835
init switch Python API
jacquesqiao Feb 5, 2018
33fcaed
add test_switch
jacquesqiao Feb 5, 2018
83e1bc9
support set block list in python
jacquesqiao Feb 5, 2018
5fe5936
fix scope problem
jacquesqiao Feb 5, 2018
942bdcb
complete test
jacquesqiao Feb 5, 2018
9d1385b
optimize test
jacquesqiao Feb 5, 2018
410db57
optimize test
jacquesqiao Feb 5, 2018
511cb49
rm backward part
jacquesqiao Feb 5, 2018
2af2a18
clear grad op
jacquesqiao Feb 5, 2018
d0f2928
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Feb 5, 2018
e91c85d
Merge branch 'impl-scalar-switch-case-op' of ssh://github.com/jacques…
jacquesqiao Feb 5, 2018
1e6f229
polynomial_decay use switch op
jacquesqiao Feb 5, 2018
33079d9
revert conditional_block_op and reshape_op
jacquesqiao Feb 5, 2018
04e8a23
add piecewise_decay and test
jacquesqiao Feb 6, 2018
c29a1cc
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Feb 6, 2018
7d86a0c
fix piecewise_decay
jacquesqiao Feb 6, 2018
061f0b1
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Feb 6, 2018
835dc2f
try to use condition op for switch
jacquesqiao Feb 6, 2018
d3e148f
can work
jacquesqiao Feb 6, 2018
60a45f8
clean old code
jacquesqiao Feb 6, 2018
7b69b0b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Feb 6, 2018
0217065
revert
jacquesqiao Feb 6, 2018
8f02fdf
rm switch_op.cc
jacquesqiao Feb 6, 2018
06d87f9
optimize code
jacquesqiao Feb 6, 2018
edacca8
add attr is_scalar_condition for condition_block_op
jacquesqiao Feb 6, 2018
c2d3207
fix comment
jacquesqiao Feb 6, 2018
59a814a
Merge branch 'impl-scalar-switch-case-op-with-condition-op' of ssh://…
jacquesqiao Feb 7, 2018
7fd322a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Feb 7, 2018
6cbddd7
init use-force-cpu-in-fill_constant_op
jacquesqiao Feb 7, 2018
d8e733f
add init_on_cpu
jacquesqiao Feb 7, 2018
f862c5c
add mix device test in test_label_semantic_roles
jacquesqiao Feb 8, 2018
a92cca0
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Feb 8, 2018
1832e8d
optimize code
jacquesqiao Feb 8, 2018
63b00e8
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Feb 9, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions python/paddle/v2/fluid/initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,37 @@

import framework
import numpy as np
import contextlib

__all__ = [
'Constant',
'Uniform',
'Normal',
'Xavier',
'Constant', 'Uniform', 'Normal', 'Xavier', 'force_init_on_cpu',
'init_on_cpu'
]

_force_init_on_cpu_ = False


def force_init_on_cpu():
return _force_init_on_cpu_


@contextlib.contextmanager
def init_on_cpu():
"""
Switch program with `with` statement

Examples:
>>> with init_on_cpu():
>>> step = layers.create_global_var()

"""
global _force_init_on_cpu_

pre_state = force_init_on_cpu()
_force_init_on_cpu_ = True
yield
_force_init_on_cpu_ = pre_state


class Initializer(object):
"""Base class for variable initializers
Expand Down Expand Up @@ -80,7 +103,7 @@ class ConstantInitializer(Initializer):
"""Implements the constant initializer
"""

def __init__(self, value=0.0):
def __init__(self, value=0.0, force_cpu=False):
"""Constructor for ConstantInitializer

Args:
Expand All @@ -89,6 +112,7 @@ def __init__(self, value=0.0):
assert value is not None
super(ConstantInitializer, self).__init__()
self._value = value
self._force_cpu = force_cpu

def __call__(self, var, block):
"""Add constant initialization ops for a variable
Expand All @@ -110,7 +134,8 @@ def __call__(self, var, block):
attrs={
"shape": var.shape,
"dtype": int(var.dtype),
"value": self._value
"value": float(self._value),
'force_cpu': self._force_cpu or force_init_on_cpu()
})
var.op = op
return op
Expand Down
10 changes: 7 additions & 3 deletions python/paddle/v2/fluid/layers/math_op_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from ..framework import Variable, unique_name
from layer_function_generator import OpProtoHolder
from ..initializer import force_init_on_cpu

__all__ = ['monkey_patch_variable']

Expand All @@ -36,9 +37,12 @@ def create_tensor(block, value, dtype, shape):
block.append_op(
type="fill_constant",
outputs={'Out': [var]},
attrs={'dtype': var.dtype,
'shape': shape,
'value': value})
attrs={
'dtype': var.dtype,
'shape': shape,
'value': value,
'force_cpu': force_init_on_cpu()
})
return var

def create_scalar(block, value, dtype):
Expand Down
27 changes: 23 additions & 4 deletions python/paddle/v2/fluid/layers/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ..param_attr import ParamAttr
from ..framework import convert_np_dtype_to_dtype_
from ..framework import Variable
from ..initializer import Constant
from ..initializer import Constant, force_init_on_cpu
from ..core import DataType
import numpy

Expand Down Expand Up @@ -69,12 +69,30 @@ def create_parameter(shape,
default_initializer)


def create_global_var(shape, value, dtype, persistable=False, name=None):
def create_global_var(shape,
value,
dtype,
persistable=False,
force_cpu=False,
name=None):
"""
Create a global variable. such as global_step
Args:
shape(list[int]): shape of the variable
value(float): the value of the variable
dtype(string): element type of the parameter
persistable(bool): if this variable is persistable
force_cpu(bool): force this variable to be on CPU

Returns:
Variable: the created Variable
"""
helper = LayerHelper("global_var", **locals())
var = helper.create_global_variable(
dtype=dtype, shape=shape, persistable=persistable, name=name)
helper.set_variable_initializer(
var, initializer=Constant(value=float(value)))
var, initializer=Constant(
value=float(value), force_cpu=force_cpu))
return var


Expand Down Expand Up @@ -221,6 +239,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
dtype(np.dtype|core.DataType|str): Data type of the output tensor.
value(float): The constant value used to initialize the output tensor.
out(Variable): The output tensor.
force_cpu(True|False): data should be on CPU if set true.

Returns:
Variable: The tensor variable storing the output.
Expand All @@ -242,7 +261,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
'shape': shape,
'dtype': out.dtype,
'value': float(value),
'force_cpu': force_cpu
'force_cpu': force_cpu or force_init_on_cpu()
})
out.stop_gradient = True
return out
Expand Down
114 changes: 66 additions & 48 deletions python/paddle/v2/fluid/learning_rate_decay.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import layers
from framework import Variable
from initializer import init_on_cpu

__all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
Expand Down Expand Up @@ -54,11 +55,14 @@ def exponential_decay(learning_rate,
if not isinstance(global_step, Variable):
raise ValueError("global_step is required for exponential_decay.")

# update learning_rate
div_res = global_step / decay_steps
if staircase:
div_res = layers.floor(x=div_res)
return learning_rate * (decay_rate**div_res)
with init_on_cpu():
# update learning_rate
div_res = global_step / decay_steps
if staircase:
div_res = layers.floor(x=div_res)
decayed_lr = learning_rate * (decay_rate**div_res)

return decayed_lr


def natural_exp_decay(learning_rate,
Expand Down Expand Up @@ -88,10 +92,13 @@ def natural_exp_decay(learning_rate,
if not isinstance(global_step, Variable):
raise ValueError("global_step is required for natural_exp_decay.")

div_res = global_step / decay_steps
if staircase:
div_res = layers.floor(x=div_res)
return learning_rate * layers.exp(x=(-1 * decay_rate * div_res))
with init_on_cpu():
div_res = global_step / decay_steps
if staircase:
div_res = layers.floor(x=div_res)
decayed_lr = learning_rate * layers.exp(x=(-1 * decay_rate * div_res))

return decayed_lr


def inverse_time_decay(learning_rate,
Expand Down Expand Up @@ -121,11 +128,14 @@ def inverse_time_decay(learning_rate,
if not isinstance(global_step, Variable):
raise ValueError("global_step is required for inverse_time_decay.")

div_res = global_step / decay_steps
if staircase:
div_res = layers.floor(x=div_res)
with init_on_cpu():
div_res = global_step / decay_steps
if staircase:
div_res = layers.floor(x=div_res)

decayed_lr = learning_rate / (1 + decay_rate * div_res)

return learning_rate / (1 + decay_rate * div_res)
return decayed_lr


def polynomial_decay(learning_rate,
Expand Down Expand Up @@ -160,22 +170,27 @@ def polynomial_decay(learning_rate,
if not isinstance(global_step, Variable):
raise ValueError("global_step is required for inverse_time_decay.")

if cycle:
div_res = layers.ceil(x=(global_step / decay_steps))
zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0)
one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0)

with layers.Switch() as switch:
with switch.case(layers.equal(x=global_step, y=zero_var)):
layers.assign(input=one_var, output=div_res)
decay_steps = decay_steps * div_res
else:
decay_steps_var = layers.fill_constant(
shape=[1], dtype='float32', value=float(decay_steps))
global_step = layers.elementwise_min(x=global_step, y=decay_steps_var)

return (learning_rate - end_learning_rate) * \
((1 - global_step / decay_steps) ** power) + end_learning_rate
with init_on_cpu():
if cycle:
div_res = layers.ceil(x=(global_step / decay_steps))
zero_var = layers.fill_constant(
shape=[1], dtype='float32', value=0.0)
one_var = layers.fill_constant(
shape=[1], dtype='float32', value=1.0)

with layers.Switch() as switch:
with switch.case(layers.equal(x=global_step, y=zero_var)):
layers.assign(input=one_var, output=div_res)
decay_steps = decay_steps * div_res
else:
decay_steps_var = layers.fill_constant(
shape=[1], dtype='float32', value=float(decay_steps))
global_step = layers.elementwise_min(
x=global_step, y=decay_steps_var)

decayed_lr = (learning_rate - end_learning_rate) * \
((1 - global_step / decay_steps) ** power) + end_learning_rate
return decayed_lr


def piecewise_decay(global_step, boundaries, values):
Expand All @@ -200,24 +215,27 @@ def piecewise_decay(global_step, boundaries, values):
if not isinstance(global_step, Variable):
raise ValueError("global_step is required for piecewise_decay.")

lr = layers.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")

with layers.Switch() as switch:
for i in range(len(boundaries)):
boundary_val = layers.fill_constant(
shape=[1], dtype='float32', value=float(boundaries[i]))
value_var = layers.fill_constant(
shape=[1], dtype='float32', value=float(values[i]))
with switch.case(layers.less_than(global_step, boundary_val)):
layers.assign(value_var, lr)
last_value_var = layers.fill_constant(
shape=[1], dtype='float32', value=float(values[len(values) - 1]))
with switch.default():
layers.assign(last_value_var, lr)
with init_on_cpu():
lr = layers.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")

with layers.Switch() as switch:
for i in range(len(boundaries)):
boundary_val = layers.fill_constant(
shape=[1], dtype='float32', value=float(boundaries[i]))
value_var = layers.fill_constant(
shape=[1], dtype='float32', value=float(values[i]))
with switch.case(layers.less_than(global_step, boundary_val)):
layers.assign(value_var, lr)
last_value_var = layers.fill_constant(
shape=[1],
dtype='float32',
value=float(values[len(values) - 1]))
with switch.default():
layers.assign(last_value_var, lr)

return lr
12 changes: 11 additions & 1 deletion python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
import paddle.v2.fluid as fluid
from paddle.v2.fluid.initializer import init_on_cpu
import contextlib
import time
import unittest
Expand Down Expand Up @@ -167,7 +168,16 @@ def train(use_cuda, save_dirname=None):

# TODO(qiao)
# check other optimizers and check why out will be NAN
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001)
global_step = fluid.layers.create_global_var(
shape=[1], value=0, dtype='float32', force_cpu=True, persistable=True)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.learning_rate_decay.exponential_decay(
learning_rate=0.0001,
global_step=global_step,
decay_steps=100000,
decay_rate=0.5,
staircase=True),
global_step=global_step)
sgd_optimizer.minimize(avg_cost)

# TODO(qiao)
Expand Down