Skip to content

Commit 1f93582

Browse files
levi131JiabinYang
andauthored
Add functional autograd API:hessian (#36108)
* init functional jacobian api * finish test with dtype float32 * add float64 test case * polish code * use atol=1e-5 with dtype float64 * fix for ci * set timeout for test_jacobian * init hessian API * save status * polish API docstring * modify docstring * add utils.py * save status * fix dygraph double grad dtype error when calling for high differential senario * reinvoke ci * test_hessian.py is ok * polish hessian API * init vhp * Revert "init vhp" This reverts commit cbd4d3b. * add test for partial_engine.cc * modify numerical_delta with dtype float32 * merge fix for dtype float64 * spell fix * polish code * rm _stop_gradient_pre_process Co-authored-by: JiabinYang <360788950@qq.com>
1 parent a9ea41c commit 1f93582

File tree

7 files changed

+426
-85
lines changed

7 files changed

+426
-85
lines changed

python/paddle/autograd/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@
1818
from .py_layer import PyLayer, PyLayerContext # noqa: F401
1919
from ..framework import set_grad_enabled # noqa: F401
2020
from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401
21-
from .functional import jacobian # noqa: F401
21+
from .functional import jacobian, hessian # noqa: F401
2222

2323
__all__ = ['backward', 'PyLayer', 'PyLayerContext']

python/paddle/autograd/functional.py

Lines changed: 127 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,34 +13,10 @@
1313
# limitations under the License.
1414

1515
from paddle.fluid import framework
16+
from .utils import _check_tensors, _stack_tensor_or_return_none, _replace_none_with_zero_tensor
1617
import paddle
1718

1819

19-
def _check_tensors(in_out_list, name):
20-
assert in_out_list is not None, "{} should not be None".format(name)
21-
22-
if isinstance(in_out_list, (list, tuple)):
23-
assert len(in_out_list) > 0, "{} connot be empyt".format(name)
24-
for each_var in in_out_list:
25-
assert isinstance(
26-
each_var,
27-
paddle.Tensor), "Elements of {} must be paddle.Tensor".format(
28-
name)
29-
return in_out_list
30-
else:
31-
assert isinstance(
32-
in_out_list,
33-
paddle.Tensor), "{} must be Tensor or list of Tensor".format(name)
34-
return [in_out_list]
35-
36-
37-
def _stack_tensor_or_return_none(origin_list):
38-
assert len(origin_list) > 0, "Can't not stack an empty list"
39-
return paddle.stack(
40-
origin_list, axis=0) if isinstance(origin_list[0],
41-
paddle.Tensor) else None
42-
43-
4420
@framework.dygraph_only
4521
def jacobian(func, inputs, create_graph=False, allow_unused=False):
4622
'''
@@ -183,3 +159,129 @@ def func(x, y):
183159
return jacobian[0]
184160
else:
185161
return jacobian
162+
163+
164+
@framework.dygraph_only
165+
def hessian(func, inputs, create_graph=False, allow_unused=False):
166+
'''
167+
.. note::
168+
**This API is ONLY available in imperative mode.**
169+
170+
This API computes the Hessian matrix of `func` with respect to `inputs`.
171+
172+
Parameters:
173+
func (function): a Python function that takes a Tensor or a Tensor
174+
list/tuple as inputs and returns a Tensor with a single element.
175+
inputs (Tensor|list(Tensor)|tuple(Tensor)): the input Tensor or
176+
Tensor list/tuple of the function ``func``.
177+
create_graph (bool, optional): whether to create the gradient graphs
178+
of the computing process. When it is True, higher order derivatives
179+
are supported to compute; when it is False, the gradient graphs of
180+
the computing process would be discarded. Defaults to ``False``.
181+
allow_unused (bool, optional): whether to raise error or return None if
182+
some Tensors of `inputs` are unreachable in the graph. Error would
183+
be raised if allow_unused=False, and None would be returned as
184+
their gradients if allow_unused=True. Default False.
185+
Returns:
186+
Hessian (Tensor or a tuple of tuple of Tensors): if function ``func``
187+
takes a Tensor as ``inputs``, Hessian will be a single Tensor containing
188+
the Hessian matrix for the linearized ``inputs`` Tensor. If function
189+
``func`` takes a Tensor list/tuple as ``inputs``, then the Hessian will
190+
be a tuple of tuple of Tensors where ``Hessian[i][j]`` will contain the
191+
Hessian matrix of the ``i``th input and ``j``th input with size ``m * n``.
192+
Here ``m`` and ``n`` denote the number of elements of the ``i`` th input
193+
and the ``j`` th input respectively.
194+
195+
Examples 1:
196+
.. code-block:: python
197+
198+
import paddle
199+
200+
def func(x):
201+
return paddle.sum(paddle.matmul(x, x))
202+
203+
x = paddle.ones(shape=[2, 2], dtype='float32')
204+
x.stop_gradient = False
205+
hessian = paddle.autograd.hessian(func, x)
206+
print(hessian)
207+
# Tensor(shape=[4, 4], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
208+
# [[2., 1., 1., 0.],
209+
# [1., 0., 2., 1.],
210+
# [1., 2., 0., 1.],
211+
# [0., 1., 1., 2.]])
212+
213+
Examples 2:
214+
.. code-block:: python
215+
216+
import paddle
217+
218+
def func(x, y):
219+
return paddle.sum(paddle.matmul(x, y))
220+
221+
x = paddle.ones(shape=[2, 2], dtype='float32')
222+
y = paddle.ones(shape=[2, 2], dtype='float32')
223+
x.stop_gradient = False
224+
y.stop_gradient = False
225+
hessian = paddle.autograd.hessian(func, [x, y])
226+
print(hessian)
227+
# ((Tensor(shape=[4, 4], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
228+
# [[0., 0., 0., 0.],
229+
# [0., 0., 0., 0.],
230+
# [0., 0., 0., 0.],
231+
# [0., 0., 0., 0.]]),
232+
# Tensor(shape=[4, 4], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
233+
# [[1., 1., 0., 0.],
234+
# [0., 0., 1., 1.],
235+
# [1., 1., 0., 0.],
236+
# [0., 0., 1., 1.]])),
237+
# (Tensor(shape=[4, 4], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
238+
# [[1., 0., 1., 0.],
239+
# [1., 0., 1., 0.],
240+
# [0., 1., 0., 1.],
241+
# [0., 1., 0., 1.]]),
242+
# Tensor(shape=[4, 4], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
243+
# [[0., 0., 0., 0.],
244+
# [0., 0., 0., 0.],
245+
# [0., 0., 0., 0.],
246+
# [0., 0., 0., 0.]])))
247+
248+
Examples 3:
249+
.. code-block:: python
250+
251+
import paddle
252+
253+
def func(x, y):
254+
return paddle.sum(paddle.matmul(x, x))
255+
256+
x = paddle.ones(shape=[2, 2], dtype='float32')
257+
y = paddle.ones(shape=[2, 2], dtype='float32')
258+
x.stop_gradient = False
259+
y.stop_gradient = False
260+
hessian = paddle.autograd.hessian(func, [x, y], allow_unused=True)
261+
print(hessian)
262+
# ((Tensor(shape=[4, 4], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
263+
# [[2., 1., 1., 0.],
264+
# [1., 0., 2., 1.],
265+
# [1., 2., 0., 1.],
266+
# [0., 1., 1., 2.]]), None), (None, None))
267+
268+
'''
269+
inputs = _check_tensors(inputs, "inputs")
270+
outputs = func(*inputs)
271+
assert isinstance(outputs, paddle.Tensor) and outputs.shape == [
272+
1
273+
], "The function to compute Hessian matrix should return a Tensor with a single element"
274+
275+
def jac_func(*ins):
276+
grad_inputs = paddle.grad(
277+
outputs,
278+
ins,
279+
create_graph=True,
280+
retain_graph=True,
281+
allow_unused=allow_unused)
282+
return tuple(
283+
_replace_none_with_zero_tensor(grad_inputs[i], inputs[i])
284+
for i in range(len(inputs)))
285+
286+
return jacobian(
287+
jac_func, inputs, create_graph=create_graph, allow_unused=allow_unused)

python/paddle/autograd/utils.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import paddle
16+
17+
18+
def _check_tensors(in_out_list, name):
19+
assert in_out_list is not None, "{} should not be None".format(name)
20+
21+
if isinstance(in_out_list, (list, tuple)):
22+
assert len(in_out_list) > 0, "{} connot be empyt".format(name)
23+
for each_var in in_out_list:
24+
assert isinstance(
25+
each_var,
26+
paddle.Tensor), "Elements of {} must be paddle.Tensor".format(
27+
name)
28+
return list(in_out_list)
29+
else:
30+
assert isinstance(
31+
in_out_list,
32+
paddle.Tensor), "{} must be Tensor or list of Tensor".format(name)
33+
return [in_out_list]
34+
35+
36+
def _stack_tensor_or_return_none(origin_list):
37+
assert len(origin_list) > 0, "Can't not stack an empty list"
38+
return paddle.stack(
39+
origin_list, axis=0) if isinstance(origin_list[0],
40+
paddle.Tensor) else None
41+
42+
43+
def _replace_none_with_zero_tensor(t, spec_t):
44+
if t is None:
45+
zero_t = paddle.zeros(shape=spec_t.shape, dtype=spec_t.dtype)
46+
zero_t.stop_gradient = spec_t.stop_gradient
47+
return zero_t
48+
else:
49+
return t

python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ foreach(TEST_OP ${TEST_OPS})
77
endforeach(TEST_OP)
88

99
set_tests_properties(test_jacobian PROPERTIES TIMEOUT 20)
10+
set_tests_properties(test_hessian PROPERTIES TIMEOUT 20)
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
import numpy as np
17+
import paddle
18+
import paddle.compat as cpt
19+
from utils import _compute_numerical_hessian
20+
21+
22+
class TestHessian(unittest.TestCase):
23+
@classmethod
24+
def setUpClass(self):
25+
self.shape = (2, 2)
26+
self.dtype = 'float32'
27+
self.np_dtype = np.float32
28+
self.numerical_delta = 1e-2
29+
self.rtol = 1e-2
30+
self.atol = 1e-2
31+
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
32+
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
33+
34+
def test_single_input(self):
35+
def func(x):
36+
return paddle.sum(paddle.matmul(x, x))
37+
38+
numerical_hessian = _compute_numerical_hessian(
39+
func, self.x, self.numerical_delta, self.np_dtype)
40+
41+
self.x.stop_gradient = False
42+
hessian = paddle.autograd.hessian(func, self.x)
43+
assert np.allclose(hessian.numpy(), numerical_hessian[0][0], self.rtol,
44+
self.atol)
45+
46+
def test_multi_input(self):
47+
def func(x, y):
48+
return paddle.sum(paddle.matmul(x, y))
49+
50+
numerical_hessian = _compute_numerical_hessian(
51+
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
52+
53+
self.x.stop_gradient = False
54+
self.y.stop_gradient = False
55+
hessian = paddle.autograd.hessian(func, [self.x, self.y])
56+
for i in range(len(hessian)):
57+
for j in range(len(hessian[0])):
58+
assert np.allclose(hessian[i][j].numpy(),
59+
numerical_hessian[i][j], self.rtol,
60+
self.atol)
61+
62+
def test_allow_unused_false(self):
63+
def func(x, y):
64+
return paddle.sum(paddle.matmul(x, x))
65+
66+
try:
67+
self.x.stop_gradient = False
68+
self.y.stop_gradient = False
69+
hessian = paddle.autograd.hessian(func, [self.x, self.y])
70+
except ValueError as e:
71+
error_msg = cpt.get_exception_message(e)
72+
assert error_msg.find("allow_unused") > 0
73+
74+
def test_allow_unused_true(self):
75+
def func(x, y):
76+
return paddle.sum(paddle.matmul(x, x))
77+
78+
numerical_hessian = _compute_numerical_hessian(
79+
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
80+
self.x.stop_gradient = False
81+
self.y.stop_gradient = False
82+
hessian = paddle.autograd.hessian(
83+
func, [self.x, self.y], allow_unused=True)
84+
for i in range(len(hessian)):
85+
for j in range(len(hessian[0])):
86+
if i == j == 0:
87+
assert np.allclose(hessian[i][j].numpy(),
88+
numerical_hessian[i][j], self.rtol,
89+
self.atol)
90+
else:
91+
assert hessian[i][j] is None
92+
93+
def test_create_graph_false(self):
94+
def func(x):
95+
return paddle.sum(paddle.matmul(x, x))
96+
97+
numerical_hessian = _compute_numerical_hessian(
98+
func, self.x, self.numerical_delta, self.np_dtype)
99+
self.x.stop_gradient = False
100+
hessian = paddle.autograd.hessian(func, self.x)
101+
assert hessian.stop_gradient == True
102+
assert np.allclose(hessian.numpy(), numerical_hessian[0][0], self.rtol,
103+
self.atol)
104+
try:
105+
paddle.grad(hessian, self.x)
106+
except RuntimeError as e:
107+
error_msg = cpt.get_exception_message(e)
108+
assert error_msg.find("has no gradient") > 0
109+
110+
# TODO(levi): enable this test case when matmul_grad_grad_grad is ok
111+
def _test_create_graph_true(self):
112+
def func(x):
113+
return paddle.sum(paddle.matmul(x, x))
114+
115+
numerical_hessian = _compute_numerical_hessian(
116+
func, self.x, self.numerical_delta, self.np_dtype)
117+
self.x.stop_gradient = False
118+
hessian = paddle.autograd.hessian(func, self.x, create_graph=True)
119+
assert hessian.stop_gradient == False
120+
assert np.allclose(hessian.numpy(), numerical_hessian[0][0], self.rtol,
121+
self.atol)
122+
triple_grad = paddle.grad(hessian, self.x)
123+
assert triple_grad is not None
124+
125+
126+
class TestHessianFloat64(TestHessian):
127+
@classmethod
128+
def setUpClass(self):
129+
self.shape = (2, 2)
130+
self.dtype = 'float64'
131+
self.np_dtype = np.float64
132+
self.numerical_delta = 1e-5
133+
self.rtol = 1e-5
134+
self.atol = 1e-5
135+
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
136+
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
137+
138+
139+
if __name__ == "__main__":
140+
unittest.main()

0 commit comments

Comments
 (0)