Skip to content

add new API paddle.nn.initializer.Orthogonal and calculate_gain #37163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions python/paddle/fluid/initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from __future__ import print_function

import math
from . import framework
from . import core
from .framework import in_dygraph_mode, default_main_program
Expand Down Expand Up @@ -1033,6 +1034,52 @@ def _global_bias_initializer():
return _global_bias_initializer_


def calculate_gain(nonlinearity, param=None):
"""
Get the recommended gain value of some nonlinearity function.

Args:
nonlinearity(str): nonlinearity function.
param(bool|int|float, optional): optional parameter for somme nonlinearity function. Now, it only applies to 'leaky_relu'. Default: None,
it will be calculated as 0.01 in the formula.

Returns:
The recommended gain value for nonlinearity function.

Examples:
.. code-block:: python

import paddle
gain = paddle.nn.initializer.calculate_gain('tanh') # 5.0 / 3
gain = paddle.nn.initializer.calculate_gain('leaky_relu', param=1.0) # 1.0 = math.sqrt(2.0 / (1+param^2))

"""
if param is None:
param = 0.01
else:
assert isinstance(param, (bool, int, float))
param = float(param)
recommended_gain = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we support calculate gain of selu?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

'sigmoid': 1,
'linear': 1,
'conv1d': 1,
'conv2d': 1,
'conv3d': 1,
'conv_transpose1d': 1,
'conv_transpose2d': 1,
'conv_transpose3d': 1,
'tanh': 5.0 / 3,
'relu': math.sqrt(2.0),
'leaky_relu': math.sqrt(2.0 / (1 + param**2)),
'selu': 3.0 / 4
}
if nonlinearity in recommended_gain.keys():
return recommended_gain[nonlinearity]
else:
raise ValueError("nonlinearity function {} is not suppported now.".
format(nonlinearity))


# We short the class name, since users will use the initializer with the package
# name. The sample code:
#
Expand Down
199 changes: 199 additions & 0 deletions python/paddle/fluid/tests/unittests/test_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from __future__ import print_function

import numpy as np
import math
import unittest

import paddle
Expand All @@ -41,6 +42,17 @@ def output_hist(out):


class TestConstantInitializer(unittest.TestCase):
def test_calculate_gain(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we add test case of relu and selu?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done, thx

self.assertEqual(paddle.nn.initializer.calculate_gain('sigmoid'), 1)
self.assertEqual(paddle.nn.initializer.calculate_gain('linear'), 1)
self.assertEqual(paddle.nn.initializer.calculate_gain('conv2d'), 1)
self.assertEqual(paddle.nn.initializer.calculate_gain('tanh'), 5.0 / 3)
self.assertEqual(
paddle.nn.initializer.calculate_gain('relu'), math.sqrt(2.0))
self.assertEqual(
paddle.nn.initializer.calculate_gain('leaky_relu', 1), 1)
self.assertEqual(paddle.nn.initializer.calculate_gain('selu'), 3.0 / 4)

def test_constant_initializer_default_value(self, dtype="float32"):
"""Test the constant initializer with default value
"""
Expand Down Expand Up @@ -716,5 +728,192 @@ def run_static_graph():
self.assertTrue(np.array_equal(dynamic_res[1], static_res[1]))


# 2-D Parameter with shape: [10, 15]
class TestOrthogonalInitializer1(unittest.TestCase):
"""
case 1
"""

def config(self):
self.weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Orthogonal(gain=3.0))
self.dtype = "float64"
self.in_features = 10
self.out_features = 15
self.num_ops = 9

def check_result(self, a, b):
self.assertTrue(np.array_equal(a, b))
self.assertTrue(np.allclose(np.matmul(a, a.T), 9 * np.eye(10)))

def test_orthogonal(self):
self.config()
paddle.set_default_dtype(self.dtype)

paddle.disable_static()
paddle.seed(2021)
linear = paddle.nn.Linear(
self.in_features, self.out_features, weight_attr=self.weight_attr)
res_dygraph = linear.weight.numpy()

paddle.enable_static()
paddle.seed(2021)
start_prog = paddle.static.Program()
main_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
linear = paddle.nn.Linear(
self.in_features,
self.out_features,
weight_attr=self.weight_attr)

block = start_prog.global_block()
self.assertEqual(len(block.ops), self.num_ops)
self.assertEqual(block.ops[0].type, 'gaussian_random')
self.assertEqual(block.ops[1].type, 'qr')
self.assertEqual(block.ops[2].type, 'diag_v2')
self.assertEqual(block.ops[3].type, 'sign')
self.assertEqual(block.ops[4].type, 'elementwise_mul')
self.assertEqual(block.ops[-3].type, 'reshape2')
self.assertEqual(block.ops[-2].type, 'scale')

exe = paddle.static.Executor()
res_static = exe.run(start_prog, fetch_list=[linear.weight])[0]

self.check_result(res_dygraph, res_static)


# 2-D Parameter with shape: [15, 10]
class TestOrthogonalInitializer2(TestOrthogonalInitializer1):
"""
case 2
"""

def config(self):
self.weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Orthogonal(gain=2.0))
self.dtype = "float64"
self.in_features = 15
self.out_features = 10
self.num_ops = 8

def check_result(self, a, b):
self.assertTrue(np.array_equal(a, b))
self.assertTrue(np.allclose(np.matmul(a.T, a), 4 * np.eye(10)))


# 2-D Parameter with shape: [10, 10]
class TestOrthogonalInitializer3(TestOrthogonalInitializer1):
"""
case 3
"""

def config(self):
self.weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Orthogonal())
self.dtype = "float32"
self.in_features = 10
self.out_features = 10
self.num_ops = 8

def check_result(self, a, b):
self.assertTrue(np.array_equal(a, b))
self.assertTrue(np.allclose(np.matmul(a.T, a), np.eye(10), atol=1.e-6))
self.assertTrue(np.allclose(np.matmul(a, a.T), np.eye(10), atol=1.e-6))

def test_error(self):
self.config()
with self.assertRaises(AssertionError):
paddle.nn.Linear(10, 10, bias_attr=self.weight_attr)


# 4-D Parameter with shape: [6, 4, 3, 3]
class TestOrthogonalInitializer4(unittest.TestCase):
"""
case 4
"""

def config(self):
self.weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Orthogonal(gain=3.0))
self.dtype = "float64"
self.in_features = 4
self.out_features = 6
self.kernel_size = (3, 3)

def check_result(self, a, b):
self.assertTrue(np.array_equal(a, b))
a = a.reshape(6, -1)
self.assertTrue(np.allclose(np.matmul(a, a.T), 9 * np.eye(6)))

def test_orthogonal(self):
self.config()
paddle.set_default_dtype(self.dtype)

paddle.disable_static()
paddle.seed(2021)
conv2d = paddle.nn.Conv2D(
self.in_features,
self.out_features,
self.kernel_size,
weight_attr=self.weight_attr)
res_dygraph = conv2d.weight.numpy()

paddle.enable_static()
paddle.seed(2021)
start_prog = paddle.static.Program()
main_prog = paddle.static.Program()
with paddle.static.program_guard(main_prog, start_prog):
conv2d = paddle.nn.Conv2D(
self.in_features,
self.out_features,
self.kernel_size,
weight_attr=self.weight_attr)
exe = paddle.static.Executor()
res_static = exe.run(paddle.static.default_startup_program(),
fetch_list=[conv2d.weight])[0]
self.check_result(res_dygraph, res_static)


# 4-D Parameter with shape: [50, 4, 3, 3]
class TestOrthogonalInitializer5(TestOrthogonalInitializer4):
"""
case 5
"""

def config(self):
self.weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Orthogonal(gain=2.0))
self.dtype = "float64"
self.in_features = 4
self.out_features = 50
self.kernel_size = (3, 3)

def check_result(self, a, b):
self.assertTrue(np.array_equal(a, b))
a = a.reshape(50, -1)
self.assertTrue(np.allclose(np.matmul(a.T, a), 4 * np.eye(36)))


# 4-D Parameter with shape: [36, 4, 3, 3]
class TestOrthogonalInitializer6(TestOrthogonalInitializer4):
"""
case 6
"""

def config(self):
self.weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Orthogonal())
self.dtype = "float32"
self.in_features = 4
self.out_features = 36
self.kernel_size = (3, 3)

def check_result(self, a, b):
self.assertTrue(np.array_equal(a, b))
a = a.reshape(36, -1)
self.assertTrue(np.allclose(np.matmul(a.T, a), np.eye(36), atol=1.e-6))
self.assertTrue(np.allclose(np.matmul(a, a.T), np.eye(36), atol=1.e-6))


if __name__ == '__main__':
unittest.main()
7 changes: 6 additions & 1 deletion python/paddle/nn/initializer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# TODO: define the initializers to create a Parameter in neural network
from ...fluid.initializer import Bilinear # noqa: F401
from ...fluid.initializer import set_global_initializer # noqa: F401
from ...fluid.initializer import calculate_gain # noqa: F401

from .constant import Constant # noqa: F401

Expand All @@ -31,6 +32,8 @@

from .uniform import Uniform # noqa: F401

from .orthogonal import Orthogonal # noqa: F401

__all__ = [ #noqa
'Bilinear',
'Constant',
Expand All @@ -42,5 +45,7 @@
'Normal',
'TruncatedNormal',
'Uniform',
'set_global_initializer'
'Orthogonal',
'set_global_initializer',
'calculate_gain'
]
Loading