Skip to content

Commit afadb8c

Browse files
authored
[DoubleGrad PR #5] Enabled gradient computations for grad_tensors passed to paddle.grad() (PaddlePaddle#41198)
* [Refactor] refactored eager_gen.py PR #2 * [DoubleGrad PR #1] Decoupled code generation logics for Dygraph ForwardFunctions and GradNodes * Fixed minor issue * Adjusted logics of GenerateNodeCreationCodes and GenerateForwardDefinition * Fixed issues * Supported higher-order grad node generation * [DoubleGrad PR #4] Supported higher-order GradNode generation * [DoubleGrad #4] Bug Fixes to Double Grad Node Generation * Fixed yaml typo * Fixed yaml typo * fixed minor issues * [DoubleGrad PR #5] Enabled gradient computations for grad_tensors passed to paddle.grad() * Fixed minor issue * Fixed CI-Inference issue * Fixed CI-inference issues
1 parent 56f108f commit afadb8c

File tree

14 files changed

+124
-70
lines changed

14 files changed

+124
-70
lines changed

paddle/fluid/eager/CMakeLists.txt

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,16 @@ add_subdirectory(accumulation)
1313
add_subdirectory(custom_operator)
1414
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
1515
add_subdirectory(pylayer)
16+
cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulator)
17+
add_dependencies(grad_tensor_holder eager_final_state_codegen)
18+
cc_library(backward SRCS backward.cc DEPS grad_tensor_holder utils autograd_meta grad_node_info)
1619
endif()
20+
1721
cc_library(grad_node_info SRCS grad_node_info.cc DEPS phi_api phi_tensor)
18-
cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulator)
1922

2023
cc_library(autograd_meta SRCS autograd_meta.cc DEPS phi_api phi_tensor)
2124
cc_library(utils SRCS utils.cc DEPS phi_api phi_tensor global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta hook_utils)
22-
cc_library(backward SRCS backward.cc DEPS grad_tensor_holder utils autograd_meta grad_node_info)
2325

24-
add_subdirectory(tests)
26+
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
27+
add_subdirectory(tests)
28+
endif()

paddle/fluid/eager/api/utils/hook_utils.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
7676
VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name();
7777
// Simply Copy impl() to grad_tensor
7878
grad_tensor->set_impl(t.impl());
79+
grad_tensor->set_autograd_meta(t.mutable_autograd_meta());
7980
return *grad_tensor.get();
8081
} else {
8182
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";

paddle/fluid/eager/backward.cc

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
466466
continue;
467467
}
468468

469+
// TODO(zhanlve): Copy and Modify GradNode if is_general_grad
469470
GradNodeBase* grad_node = shared_grad_node.get();
470471

471472
// Prepare GradTensorHolder
@@ -486,16 +487,9 @@ std::vector<paddle::experimental::Tensor> RunBackward(
486487
// Feed given tensor if it's provided
487488
VLOG(6) << "Fill grad input tensor " << i << "with give grad tensor";
488489

489-
if (grad_tensors[i].is_initialized()) {
490-
// Deep copy
491-
paddle::experimental::Tensor tmp_tensor;
492-
tmp_tensor.copy_(grad_tensors[i], grad_tensors[i].inner_place(), false);
493-
node_input_buffers_dict[grad_node]->add(input_info.first,
494-
input_info.second, tmp_tensor);
495-
} else {
496-
node_input_buffers_dict[grad_node]->add(
497-
input_info.first, input_info.second, grad_tensors[i]);
498-
}
490+
// Deep copy
491+
node_input_buffers_dict[grad_node]->CopyValueFromTensor(
492+
input_info.first, input_info.second, grad_tensors[i]);
499493

500494
} else {
501495
VLOG(6) << "Fill grad input tensor " << i << " with 1.0";
@@ -504,7 +498,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
504498
// dims
505499
// GradTensorHolder will initialize another tensor with same tensortype,
506500
// datatype and dims but filled with 1.0
507-
node_input_buffers_dict[grad_node]->add(
501+
node_input_buffers_dict[grad_node]->CopyValueFromTensor(
508502
input_info.first, input_info.second, tensor, true /*fill_one=true*/);
509503
}
510504

@@ -686,6 +680,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
686680
}
687681
}
688682
}
683+
689684
if (!is_general_grad) return {};
690685
return GeneralGrad::Instance().GetResults(inputs, allow_unused, create_graph);
691686
}

paddle/fluid/eager/grad_tensor_holder.cc

Lines changed: 79 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "paddle/fluid/eager/grad_tensor_holder.h"
1616
#include "paddle/fluid/imperative/gradient_accumulator.h"
1717

18+
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
1819
#include "paddle/fluid/framework/convert_utils.h"
1920
#include "paddle/fluid/framework/var_type.h"
2021
#include "paddle/phi/kernels/funcs/math_function.h"
@@ -26,9 +27,9 @@ void GradTensorHolder::SetBufferSlotRankZeros(size_t slot_id, size_t rank) {
2627
paddle::experimental::zeros_like(buffer_[slot_id][rank]);
2728
}
2829

29-
void GradTensorHolder::add(size_t slot_id, size_t rank,
30-
const paddle::experimental::Tensor& t,
31-
bool fill_one) {
30+
void GradTensorHolder::CopyValueFromTensor(
31+
size_t slot_id, size_t rank, const paddle::experimental::Tensor& t,
32+
bool fill_one) {
3233
// TODO(jiabin): We need to deal with empty input_buffer with slot size not
3334
// empty;
3435
PADDLE_ENFORCE(slot_id < buffer_.size(),
@@ -50,44 +51,15 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
5051
slot_id, buffer_[slot_id].size(), rank));
5152
if (!fill_one) {
5253
paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank];
53-
// TODO(jiabin): Code bellow is ugly to divide which inner var we used,
54-
// remove framework::Variable
55-
// related code later.
56-
// This if statement is trying to test neither phi::Tensor nor
57-
// framework::Variable is initialized.
5854
if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) {
59-
// Simply copy tensor->impl
60-
buffer_tensor = t;
55+
// Perform deep copy here
56+
buffer_tensor.copy_(t, t.inner_place(), false);
57+
buffer_tensor.set_autograd_meta(t.mutable_autograd_meta());
58+
6159
} else {
62-
// Accumulation
63-
PADDLE_ENFORCE_EQ(t.initialized(), true,
64-
paddle::platform::errors::Fatal(
65-
"We can only accumulate initialized tensor, but we "
66-
"got tensor: %s is empty please check you network "
67-
"and make sure it creates grads.",
68-
t.name()));
69-
if (t.is_dense_tensor()) {
70-
if (buffer_tensor.is_dense_tensor()) {
71-
paddle::imperative::TensorAdd<paddle::experimental::Tensor>(
72-
t, &buffer_tensor);
73-
} else {
74-
// TODO(jiabin): Support Other TensorBase later
75-
paddle::experimental::Tensor new_buffer(
76-
std::make_shared<phi::DenseTensor>(), "tmp_accumulator");
77-
paddle::imperative::SelectedRowsAddTensor(buffer_tensor, t,
78-
&new_buffer);
79-
buffer_tensor.set_impl(new_buffer.impl());
80-
}
81-
} else {
82-
// TODO(jiabin): Support Other TensorBase later
83-
if (buffer_tensor.is_dense_tensor()) {
84-
paddle::imperative::SelectedRowsAddToTensor(t, &buffer_tensor);
85-
} else {
86-
buffer_tensor =
87-
std::move(*paddle::imperative::SelectedRowsMerge<
88-
paddle::experimental::Tensor>(t, buffer_tensor));
89-
}
90-
}
60+
PADDLE_THROW(paddle::platform::errors::Fatal(
61+
"Cannot copy grad_tensors' value to grad tensor holders,"
62+
"input buffer has already been initialized."));
9163
}
9264
} else {
9365
// Create new tensor->impl and fill it with 1.0
@@ -98,4 +70,72 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
9870
}
9971
}
10072

73+
void GradTensorHolder::add(size_t slot_id, size_t rank,
74+
const paddle::experimental::Tensor& t) {
75+
// TODO(jiabin): We need to deal with empty input_buffer with slot size not
76+
// empty;
77+
PADDLE_ENFORCE(slot_id < buffer_.size(),
78+
paddle::platform::errors::Fatal(
79+
"Invalid slot_id for GradTensorHolder::add() "
80+
"which exceeds size of buffer"));
81+
VLOG(6) << "Add Tensor for buffer_ slot: " << slot_id
82+
<< ", size: " << buffer_[slot_id].size();
83+
if (buffer_[slot_id].empty()) {
84+
VLOG(6) << "Pass add Tensor for buffer_ slot: " << slot_id
85+
<< " since its buffer_ is empty ";
86+
return;
87+
}
88+
PADDLE_ENFORCE(
89+
rank < buffer_[slot_id].size(),
90+
paddle::platform::errors::Fatal(
91+
"Invalid rank for GradTensorHolder::add() which exceeds size "
92+
"of buffer slot %d, got slot size is: %d rank is: %d",
93+
slot_id, buffer_[slot_id].size(), rank));
94+
95+
paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank];
96+
// TODO(jiabin): Code bellow is ugly to divide which inner var we used,
97+
// remove framework::Variable
98+
// related code later.
99+
// This if statement is trying to test neither phi::Tensor nor
100+
// framework::Variable is initialized.
101+
if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) {
102+
// Simply copy tensor->impl
103+
buffer_tensor = t;
104+
} else {
105+
// Accumulation
106+
PADDLE_ENFORCE_EQ(t.initialized(), true,
107+
paddle::platform::errors::Fatal(
108+
"We can only accumulate initialized tensor, but we "
109+
"got tensor: %s is empty please check you network "
110+
"and make sure it creates grads.",
111+
t.name()));
112+
if (t.is_dense_tensor()) {
113+
if (buffer_tensor.is_dense_tensor()) {
114+
buffer_tensor = add_final_state_dygraph_function(t, buffer_tensor);
115+
116+
} else {
117+
// TODO(jiabin): Support Other TensorBase later
118+
// TODO(zhanlve): Replace SelectedRowsAddTensor with
119+
// add_dygraph_function once it's supported
120+
paddle::experimental::Tensor new_buffer(
121+
std::make_shared<phi::DenseTensor>(), "tmp_accumulator");
122+
paddle::imperative::SelectedRowsAddTensor(buffer_tensor, t,
123+
&new_buffer);
124+
buffer_tensor.set_impl(new_buffer.impl());
125+
}
126+
} else {
127+
// TODO(jiabin): Support Other TensorBase later
128+
// TODO(zhanlve): Replace SelectedRowsAddTensor with add_dygraph_function
129+
// once it's supported
130+
if (buffer_tensor.is_dense_tensor()) {
131+
paddle::imperative::SelectedRowsAddToTensor(t, &buffer_tensor);
132+
} else {
133+
buffer_tensor =
134+
std::move(*paddle::imperative::SelectedRowsMerge<
135+
paddle::experimental::Tensor>(t, buffer_tensor));
136+
}
137+
}
138+
}
139+
}
140+
101141
} // namespace egr

paddle/fluid/eager/grad_tensor_holder.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,10 @@ class GradTensorHolder {
4545
GradTensorHolder& operator=(const GradTensorHolder& other) = default;
4646

4747
// Create new tensor and copy tensor->impl
48-
void add(size_t slot_id, size_t rank, const paddle::experimental::Tensor& t,
49-
bool fill_one = false);
48+
void add(size_t slot_id, size_t rank, const paddle::experimental::Tensor& t);
49+
void CopyValueFromTensor(size_t slot_id, size_t rank,
50+
const paddle::experimental::Tensor& t,
51+
bool fill_one = false);
5052

5153
const std::vector<paddle::experimental::Tensor>& operator[](
5254
const size_t& pos) {
Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
11
add_subdirectory(data_structure_tests)
22
add_subdirectory(task_tests)
3-
4-
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
5-
add_subdirectory(performance_tests)
6-
endif()
3+
add_subdirectory(performance_tests)
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
cc_test(test_egr_ds_eager_tensor SRCS eager_tensor_test.cc DEPS ${eager_deps})
22
cc_test(test_egr_ds_auotgrad_meta SRCS autograd_meta_test.cc DEPS ${eager_deps})
33
cc_test(test_egr_ds_grad_node_info SRCS grad_node_info_test.cc DEPS ${eager_deps})
4-
cc_test(test_egr_ds_grad_tensor_holder SRCS grad_tensor_holder_test.cc DEPS ${eager_deps})
54
cc_test(test_egr_ds_accumulation_node SRCS accumulation_node_test.cc DEPS ${eager_deps})
65
cc_test(test_egr_ds_tensor_wrapper SRCS tensor_wrapper_test.cc DEPS ${eager_deps})
6+
7+
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
8+
cc_test(test_egr_ds_grad_tensor_holder SRCS grad_tensor_holder_test.cc DEPS ${eager_deps} ${generated_deps})
9+
endif()

paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "paddle/phi/core/kernel_registry.h"
2626

2727
PD_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT);
28+
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
2829

2930
// TODO(jiabin): remove nolint here!!!
3031
using namespace egr; // NOLINT
@@ -77,11 +78,11 @@ TEST(GradTensorHolder, Interfaces) {
7778

7879
// add():
7980
// fill one
80-
grad_tensor_holder.add(0, 0, et0, true);
81+
grad_tensor_holder.CopyValueFromTensor(0, 0, et0, true);
8182

8283
// accumulation
83-
grad_tensor_holder.add(1, 0, et0, false);
84-
grad_tensor_holder.add(1, 0, et1, false);
84+
grad_tensor_holder.add(1, 0, et0);
85+
grad_tensor_holder.add(1, 0, et1);
8586

8687
// Buffers()
8788
const auto& buffers = grad_tensor_holder.Buffers();
@@ -141,8 +142,8 @@ TEST(GradTensorHolder, SelectedRowsMergeAdd) {
141142
GradTensorHolder({slot_meta, slot_meta});
142143

143144
// accumulation
144-
grad_tensor_holder.add(0, 0, t1, false);
145-
grad_tensor_holder.add(0, 0, t2, false);
145+
grad_tensor_holder.add(0, 0, t1);
146+
grad_tensor_holder.add(0, 0, t2);
146147

147148
// Buffers()
148149
const auto& buffers = grad_tensor_holder.Buffers();
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
cc_test(test_egr_task_tensor_utils SRCS tensor_utils_test.cc DEPS ${eager_deps})
22
cc_test(test_egr_task_eager_utils SRCS eager_utils_test.cc DEPS ${eager_deps})
33
cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
4-
cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
5-
cc_test(test_egr_task_hook SRCS hook_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
6-
cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
7-
cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
8-
cc_test(test_egr_task_grad SRCS grad_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
94

105
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
6+
cc_test(test_egr_task_hook SRCS hook_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
7+
cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
8+
cc_test(test_egr_task_grad SRCS grad_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
9+
cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
10+
cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
1111
cc_test(test_egr_task_hook_intermidiate SRCS hook_test_intermidiate.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} dygraph_node)
1212
cc_test(test_egr_task_autocodegen SRCS generated_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps})
1313
endif()

paddle/fluid/eager/tests/task_tests/backward_test.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
3636
PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
37+
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
3738

3839
namespace egr {
3940

0 commit comments

Comments
 (0)