Skip to content

[Bug fixes] Add default arg to enhance varbase ClearGradient func #36837

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions paddle/fluid/imperative/layer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
#endif

DECLARE_bool(use_mkldnn);

namespace paddle {
namespace imperative {

Expand Down Expand Up @@ -186,7 +185,7 @@ size_t VarBase::GradOpNum() const {
return grad_node_ ? grad_node_->size() : 0;
}

void VarBase::ClearGradient() {
void VarBase::ClearGradient(bool set_to_zero) {
VLOG(4) << "ClearGradient " << Name();
if (grad_var_) {
if (grad_var_->Var().IsType<framework::SelectedRows>()) {
Expand All @@ -204,9 +203,13 @@ void VarBase::ClearGradient() {
auto* grad_t =
grad_var_->MutableVar()->GetMutable<framework::LoDTensor>();
if (grad_t->IsInitialized()) {
auto* dev_ctx =
platform::DeviceContextPool::Instance().Get(grad_t->place());
operators::math::set_constant(*dev_ctx, grad_t, 0.0);
if (set_to_zero) {
auto* dev_ctx =
platform::DeviceContextPool::Instance().Get(grad_t->place());
operators::math::set_constant(*dev_ctx, grad_t, 0.0);
} else {
grad_t->clear();
}
#ifdef PADDLE_WITH_MKLDNN
if (FLAGS_use_mkldnn) ClearMKLDNNCache(grad_t->place());
#endif
Expand All @@ -219,6 +222,28 @@ void VarBase::ClearGradient() {
}
}

void VarBase::_GradientSetEmpty(bool is_empty) {
VLOG(4) << "Set gradient " << Name() << " is_empty:" << is_empty;
if (grad_var_) {
auto share_var = grad_var_->SharedVar();
if (share_var) {
share_var->SetIsEmpty(is_empty);
}
}
}

bool VarBase::_IsGradientSetEmpty() {
bool res = true;
if (grad_var_) {
auto share_var = grad_var_->SharedVar();
if (share_var) {
res = share_var->is_empty_;
VLOG(4) << "Check gradient " << Name() << " is empty:" << res;
}
}
return res;
}

std::shared_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
const bool blocking) const {
PADDLE_ENFORCE_EQ(
Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/imperative/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,10 @@ class VarBase {

const platform::Place Place() const { return var_->Place(); }

void ClearGradient();
void ClearGradient(bool set_to_zero = true);

void _GradientSetEmpty(bool is_empty = true);
bool _IsGradientSetEmpty();

std::shared_ptr<VarBase> NewVarBase(const platform::Place& dst_place,
const bool blocking) const;
Expand Down
6 changes: 5 additions & 1 deletion paddle/fluid/pybind/imperative.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1443,7 +1443,8 @@ void BindImperative(py::module *m_ptr) {
# one of the variables needed for gradient computation has been modified by an inplace operation.

)DOC")
.def("clear_gradient", &imperative::VarBase::ClearGradient, R"DOC(
.def("clear_gradient", &imperative::VarBase::ClearGradient,
py::arg("set_to_zero") = true, R"DOC(

Only for Tensor that has gradient, normally we use this for Parameters since other temporary Tensor doesen't has gradient.

Expand All @@ -1463,6 +1464,9 @@ void BindImperative(py::module *m_ptr) {
linear.weight.clear_gradient()
print("After clear_gradient, linear.weight.grad: {}".format(linear.weight.grad))
)DOC")
.def("_gradient_set_empty", &imperative::VarBase::_GradientSetEmpty,
py::arg("set_is_empty") = true)
.def("_is_gradient_set_empty", &imperative::VarBase::_IsGradientSetEmpty)
.def("clone",
[](std::shared_ptr<imperative::VarBase> &self) {
const auto &tensor = self->Var().Get<framework::LoDTensor>();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle.fluid as fluid
import paddle
from paddle.fluid.wrapped_decorator import wrap_decorator
import unittest
from unittest import TestCase
import numpy as np


def _dygraph_guard_(func):
def __impl__(*args, **kwargs):
if fluid.in_dygraph_mode():
return func(*args, **kwargs)
else:
with fluid.dygraph.guard():
return func(*args, **kwargs)

return __impl__


dygraph_guard = wrap_decorator(_dygraph_guard_)


class TestDygraphClearGradient(TestCase):
def setUp(self):
self.input_shape = [10, 2]

@dygraph_guard
def test_tensor_method_clear_gradient_case1(self):
input = paddle.uniform(self.input_shape)
linear = paddle.nn.Linear(2, 3)
out = linear(input)
out.backward()
linear.weight.clear_gradient()

# actual result
gradient_actual = linear.weight.grad
# expected result
gradient_expected = np.zeros([2, 3]).astype('float64')
self.assertTrue(np.allclose(gradient_actual.numpy(), gradient_expected))

@dygraph_guard
def test_tensor_method_clear_gradient_case2(self):
input = paddle.uniform(self.input_shape)
linear = paddle.nn.Linear(2, 3)
out = linear(input)
out.backward()
# default arg set_to_zero is true
# so, False means real clear gradient
linear.weight.clear_gradient(False)

# before ._gradient_set_empty(False),
# the return of ._is_gradient_set_empty() should be True
self.assertTrue(linear.weight._is_gradient_set_empty())

# reset, because ClearGradient will call SetIsEmpty(True), but this is not our expectation.
linear.weight._gradient_set_empty(False)
# after ._gradient_set_empty(False),
# the return of ._is_gradient_set_empty() should be False
self.assertFalse(linear.weight._is_gradient_set_empty())

# actual result
gradient_actual = linear.weight.grad
# expected result
self.assertTrue(np.empty(gradient_actual))


if __name__ == '__main__':
unittest.main()