Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
b4055b2
add view strategy on squeeze,unsqueeze,reshape,flatten
pangyoki Dec 25, 2020
f40ce0e
add squeeze unittest
pangyoki Dec 28, 2020
e158f48
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
pangyoki Dec 28, 2020
34c4c0e
add unittests
pangyoki Dec 28, 2020
15ad08d
use View strategy as name rather than Reuse Allacation
pangyoki Dec 28, 2020
da3eb53
fix view api doc
pangyoki Dec 28, 2020
8f22386
fix format
pangyoki Dec 28, 2020
d009278
use core.ops when input of reshape2 is Tensor
pangyoki Jan 4, 2021
7e39478
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
pangyoki Jan 4, 2021
043e038
fix test_cross_entropy_loss error because of reshape2
pangyoki Jan 4, 2021
cce7caa
fix test_cross_entropy_loss error because of reshape2
pangyoki Jan 4, 2021
a03d8a7
add inplace strategy
pangyoki Jan 5, 2021
2ad1bfe
add elementwise_add sub
pangyoki Jan 5, 2021
ec89083
let backward op not use inplace
pangyoki Jan 6, 2021
90dd38d
grad op do not use inplace
pangyoki Jan 6, 2021
9285c68
fix memory increase error and add leaf error message
pangyoki Jan 7, 2021
b249b0f
solve conflict
pangyoki Jan 7, 2021
343baf9
delete selected_rows
pangyoki Jan 7, 2021
38f24d4
change op_function
pangyoki Jan 8, 2021
1d99e46
little change
pangyoki Jan 8, 2021
2bbef1a
solve HandleViewBetweenInputAndOutput
pangyoki Jan 8, 2021
4edfb0c
solve conflict
pangyoki Jan 8, 2021
1230d5c
add unittest and leaf error message
pangyoki Jan 8, 2021
281f680
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
pangyoki Jan 8, 2021
5e0573a
solve conflict
pangyoki Jan 8, 2021
6d2629c
merge view error
pangyoki Jan 8, 2021
6ef5c57
solve conflict
pangyoki Jan 10, 2021
2cd1066
optimize op_function_generator format and support sum inplace op
pangyoki Jan 10, 2021
393083a
fix format of basic_engine
pangyoki Jan 10, 2021
e30937a
fix format for framework
pangyoki Jan 10, 2021
ae92664
little change of variable wrapper
pangyoki Jan 10, 2021
c65efb1
add reshape, squeeze, unsqueeze, scatter api
pangyoki Jan 10, 2021
b4f6305
add relu elu tanh softmax inplace api
pangyoki Jan 10, 2021
1545e3b
fix test_squeeze_op unittest
pangyoki Jan 11, 2021
af3ad44
fix test_relu_op unittest
pangyoki Jan 11, 2021
55146f0
fix comment problems
pangyoki Jan 11, 2021
4b07958
delete sample code of inplace api
pangyoki Jan 11, 2021
8fb5fa7
add reference of grad_pending_nodes in basic_engine
pangyoki Jan 13, 2021
32752d4
fix unittest name
pangyoki Jan 13, 2021
41ae335
add inplace apis into wlist
pangyoki Jan 14, 2021
503b626
fix error message
pangyoki Jan 14, 2021
509dc07
add PADDLE_ENFORCE for set grad op twice
pangyoki Jan 14, 2021
0fa088f
fix head file error
pangyoki Jan 14, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions paddle/fluid/framework/details/op_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License. */

#pragma once

#include <map>
#include <memory>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -247,8 +248,9 @@ struct OpInfoFiller<T, kGradOpBaseMaker> {
const std::string& type,
const imperative::NameVarBaseMap& var_base_map_in,
const imperative::NameVarBaseMap& var_base_map_out,
const framework::AttributeMap& attrs) {
T maker(type, var_base_map_in, var_base_map_out, attrs);
const framework::AttributeMap& attrs,
const std::map<std::string, std::string>& inplace_map) {
T maker(type, var_base_map_in, var_base_map_out, attrs, inplace_map);
return maker();
};
}
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/grad_op_desc_maker.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ class SingleGradOpMaker<imperative::OpBase>

std::shared_ptr<imperative::GradOpNode> operator()() const final {
auto node = this->NewGradNode();
auto& inplace_map = this->GetInplaceMap();
if (!inplace_map.empty()) {
node->SetInplaceGradNameMap(inplace_map);
}
{
imperative::TracedGradOp traced_grad_op(node);
try {
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/framework/type_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ using DygraphGradOpMakerFN =
const std::string& /*op_type*/,
const imperative::NameVarBaseMap& /*var_base_map_in*/,
const imperative::NameVarBaseMap& /*var_base_map_out*/,
const framework::AttributeMap& /*attributes*/)>;
const framework::AttributeMap& /*attributes*/,
const std::map<std::string, std::string>& /*inplace_map*/)>;

using InferVarTypeFN =
std::function<void(framework::InferVarTypeContext* /*context*/)>;
Expand Down
191 changes: 158 additions & 33 deletions paddle/fluid/imperative/basic_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) {
}
}

void BasicEngine::PrepareGradAccumulators(const OpBase& op) {
void BasicEngine::PrepareGradAccumulators(
const OpBase& op,
const std::vector<std::shared_ptr<GradOpNode>>& grad_pending_nodes) {
for (const auto& pair : op.GetOutsMap()) {
if (!pair.second.IsGrad()) {
continue;
Expand All @@ -123,29 +125,94 @@ void BasicEngine::PrepareGradAccumulators(const OpBase& op) {
for (const auto& var : pair.second) {
if (!var) continue;

auto& accumulator = accumulators_[var.get()];
if (!accumulator) {
if (FLAGS_sort_sum_gradient) {
accumulator.reset(new SortedGradientAccumulator(var.get()));
} else {
accumulator.reset(new EagerGradientAccumulator(var.get()));
if (!var->HasGradNode()) {
auto& accumulator = accumulators_[var.get()];
if (!accumulator) {
if (FLAGS_sort_sum_gradient) {
accumulator.reset(new SortedGradientAccumulator(var.get()));
} else {
accumulator.reset(new EagerGradientAccumulator(var.get()));
}
}
}

accumulator->IncreaseRefCnt();
accumulator->IncreaseRefCnt();

VLOG(3) << "Prepare to acccumulate variable grad " << var->Name() << "("
<< var.get() << ") with reference count "
<< accumulator->RefCnt();
VLOG(3) << "Prepare to acccumulate variable grad " << var->Name() << "("
<< var.get()
<< ") that don't have grad node with reference count "
<< accumulator->RefCnt();

if (var->HasLeafHooks()) {
VLOG(3) << "Grad variable wrapper (" << var->Name()
<< ") has leaf grad hooks.";
PADDLE_ENFORCE_NE(
var->HasGradNode(), true,
platform::errors::PermissionDenied(
"Only leaf Tensor's gradient can append hook to "
"Gradientaccumulator."));
accumulator->SetPostHooks(var->GetLeafHooks());
}
} else {
// Because Inplace op overwrites the grad_node of the input grad_var. So
// only the information of grad_pending_node can be used to find the
// grad_node of grad_var.
bool find_grad_node_of_var = false;
for (auto& grad_pending_node : grad_pending_nodes) {
PADDLE_ENFORCE_NOT_NULL(
grad_pending_node,
platform::errors::NotFound("Grad pending node is nullptr."));
for (auto& grad_pending_op : *grad_pending_node) {
VLOG(6) << "Determine whether var (" << var->Name()
<< ") is the input var of grad_pending_op ("
<< grad_pending_op.Type() << ").";
grad_pending_op.EnforceHasInOut();
for (const auto& grad_pending_op_ins_pair :
grad_pending_op.GetInsMap()) {
if (!grad_pending_op_ins_pair.second.IsGrad()) {
continue;
}
for (const auto& pending_in_var :
grad_pending_op_ins_pair.second) {
if (var == pending_in_var) {
VLOG(6) << "Var (" << var->Name()
<< ") is the input var of grad_pending_op ("
<< grad_pending_op.Type() << ").";
find_grad_node_of_var = true;
break;
}
}
if (find_grad_node_of_var) {
break;
}
}
}

if (var->HasLeafHooks()) {
VLOG(3) << "Grad variable wrapper (" << var->Name()
<< ") has leaf grad hooks.";
PADDLE_ENFORCE_NE(var->HasGradNode(), true,
platform::errors::PermissionDenied(
"Only leaf Tensor's gradient can append hook to "
"Gradientaccumulator."));
accumulator->SetPostHooks(var->GetLeafHooks());
if (find_grad_node_of_var) {
auto& accumulator =
accumulators_with_grad_node_[grad_pending_node][var.get()];

if (!accumulator) {
if (FLAGS_sort_sum_gradient) {
accumulator.reset(new SortedGradientAccumulator(var.get()));
} else {
accumulator.reset(new EagerGradientAccumulator(var.get()));
}
}

accumulator->IncreaseRefCnt();

VLOG(3) << "Prepare to acccumulate variable grad " << var->Name()
<< "(" << var.get()
<< ") that has grad node with reference count "
<< accumulator->RefCnt();
break;
}
}
PADDLE_ENFORCE_EQ(
find_grad_node_of_var, true,
platform::errors::NotFound(
"No grad node corresponding to grad Tensor (%s) was found.",
var->Name()));
}
}
}
Expand All @@ -154,10 +221,13 @@ void BasicEngine::PrepareGradAccumulators(const OpBase& op) {
void BasicEngine::PrepareDeps() {
PADDLE_ENFORCE_EQ(
node_deps_.empty(), true,
platform::errors::AlreadyExists("Op deps must be initialized here"));
platform::errors::AlreadyExists("Op deps must be initialized."));
PADDLE_ENFORCE_EQ(
accumulators_.empty(), true,
platform::errors::AlreadyExists("Accumulators must be initialized here"));
platform::errors::AlreadyExists("Accumulators must be initialized."));
PADDLE_ENFORCE_EQ(
accumulators_with_grad_node_.empty(), true,
platform::errors::AlreadyExists("Accumulators must be initialized."));

std::queue<GradOpNode*> q;
std::unordered_set<GradOpNode*> visited;
Expand All @@ -169,16 +239,17 @@ void BasicEngine::PrepareDeps() {
auto* cur_node = q.front();
q.pop();

const auto& grad_pending_nodes = cur_node->GradPendingNodes();

for (auto& cur_op : *cur_node) {
cur_op.EnforceHasInOut();
PrepareGradAccumulators(cur_op);
PrepareGradAccumulators(cur_op, grad_pending_nodes);
}

const auto& grad_pending_nodes = cur_node->GradPendingNodes();
for (auto& grad_pending_node : grad_pending_nodes) {
PADDLE_ENFORCE_NOT_NULL(
grad_pending_node,
platform::errors::NotFound("Grad pending node should not be null"));
platform::errors::NotFound("Grad pending node is nullptr."));
++node_deps_[grad_pending_node.get()];
if (visited.count(grad_pending_node.get()) == 0) {
visited.insert(grad_pending_node.get());
Expand All @@ -204,6 +275,8 @@ void BasicEngine::Execute() {
auto shared_cur_node = std::move(q.front());
q.pop();

auto& inplace_grad_name_map = shared_cur_node->InplaceGradNameMap();

for (auto& cur_op : *shared_cur_node) {
++op_num;

Expand All @@ -228,11 +301,38 @@ void BasicEngine::Execute() {
continue;
}

auto iter = accumulators_.find(var.get());
PADDLE_ENFORCE_EQ(
iter != accumulators_.end(), true,
platform::errors::NotFound("Cannot find gradient of variable %s",
var->Name()));
std::unordered_map<VariableWrapper*,
std::unique_ptr<GradientAccumulator>>::iterator
iter;
if (!var->HasGradNode()) {
VLOG(10) << "Find gradient of var (" << var->Name()
<< ") with no grad_node.";
iter = accumulators_.find(var.get());
PADDLE_ENFORCE_EQ(
iter != accumulators_.end(), true,
platform::errors::NotFound(
"Cannot find gradient of variable %s", var->Name()));
} else {
bool flag_find_grad = false;
VLOG(10) << "Find gradient of var (" << var->Name()
<< ") with grad_node.";
for (auto& grad_pending_node :
shared_cur_node->GradPendingNodes()) {
const auto& iter_grad_node =
accumulators_with_grad_node_.find(grad_pending_node);
if (iter_grad_node != accumulators_with_grad_node_.end()) {
iter = iter_grad_node->second.find(var.get());
if (iter != iter_grad_node->second.end()) {
flag_find_grad = true;
break;
}
}
}
PADDLE_ENFORCE_EQ(
flag_find_grad, true,
platform::errors::NotFound(
"Cannot find gradient of variable %s", var->Name()));
}

// leaf_accumulators_ : hooks and accumulate-grad for leaf tensor
if (var->IsLeafGrad()) {
Expand All @@ -251,6 +351,25 @@ void BasicEngine::Execute() {
need_accu_var_list_.emplace_back(iter->second.get(), var);
VLOG(10) << "create temporary var of " << var->Name()
<< " for sum gradient within this graph!";
} else if (!inplace_grad_name_map.empty() &&
inplace_grad_name_map.count(pair.first)) {
// When calculate Inplace grad op, create a new output var.
// If a tmp var has been created, there is no need to create it
// again.
for (auto& in_var :
bwd_ins.at(inplace_grad_name_map.at(pair.first))) {
if (in_var == var) {
auto tmp_var = std::make_shared<VariableWrapper>(var->Name());
tmp_var->SetType(var->Type());
tmp_var->SetForwardDataType(var->ForwardDataType());
inplace_output_grad_var_list_.emplace_back(var, tmp_var);
var = tmp_var;
VLOG(10) << "Inplace grad op does not use the Inplace "
"strategy, a temporary output var ("
<< var->Name() << ") will be created.";
break;
}
}
}
}
}
Expand Down Expand Up @@ -286,6 +405,10 @@ void BasicEngine::Execute() {
cur_op.place());
}

for (auto& pair : inplace_output_grad_var_list_) {
*pair.first = std::move(*pair.second);
}

// Step 2: Sum Gradient of This graph
for (auto& pair : need_accu_var_list_) {
pair.first->SumGrad(std::move(pair.second), cur_op.id());
Expand All @@ -308,6 +431,7 @@ void BasicEngine::Execute() {
}

need_accu_var_list_.clear();
inplace_output_grad_var_list_.clear();
leaf_accumulators_.clear();

if (!retain_graph_) {
Expand All @@ -318,9 +442,9 @@ void BasicEngine::Execute() {

// Step 3: Collect ready ops
for (auto& grad_pending_node : shared_cur_node->GradPendingNodes()) {
PADDLE_ENFORCE_NOT_NULL(grad_pending_node,
platform::errors::NotFound(
"Grad pending node should not be nullptr"));
PADDLE_ENFORCE_NOT_NULL(
grad_pending_node,
platform::errors::NotFound("Grad pending node is nullptr."));
auto iter = node_deps_.find(grad_pending_node.get());
if (iter == node_deps_.end()) {
continue;
Expand All @@ -340,6 +464,7 @@ void BasicEngine::Clear() {
init_node_.reset();
node_deps_.clear();
accumulators_.clear();
accumulators_with_grad_node_.clear();
need_accu_var_list_.clear();
leaf_accumulators_.clear();
}
Expand Down
20 changes: 19 additions & 1 deletion paddle/fluid/imperative/basic_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,33 @@ class BasicEngine : public Engine {

void CheckBackwardInputs(const OpBase& op);

void PrepareGradAccumulators(const OpBase& op);
void PrepareGradAccumulators(
const OpBase& op,
const std::vector<std::shared_ptr<GradOpNode>>& grad_pending_nodes);

void Clear();

private:
std::shared_ptr<GradOpNode> init_node_;
std::unordered_map<GradOpNode*, size_t> node_deps_;
// The input and output of Inplace op are the same. If only `var` is used
// as the key, then the input and output of inplace op must be gradient
// accumulated. Therefore, add the `grad_node` as the key to prevent the
// problem of gradient accumulation in inplace op.
std::unordered_map<std::shared_ptr<GradOpNode>,
std::unordered_map<VariableWrapper*,
std::unique_ptr<GradientAccumulator>>>
accumulators_with_grad_node_;
// Leaf var doesn't have grad_node, and leaf var with `stop_gradient=False`
// can't use Inplace strategy. If a var doesn't have grad_node, only use
// `var` as the key.
std::unordered_map<VariableWrapper*, std::unique_ptr<GradientAccumulator>>
accumulators_;
// The output grad var of Inplace grad op. Because Inplace grad op does not
// use the Inplace strategy, a new output grad var needs to be created.
std::vector<std::pair<std::shared_ptr<VariableWrapper>,
std::shared_ptr<VariableWrapper>>>
inplace_output_grad_var_list_;
std::vector<std::pair<GradientAccumulator*, std::shared_ptr<VariableWrapper>>>
need_accu_var_list_;
// leaf_accumulators_ is only for leaf tensor(hooks/accumulate grad)
Expand Down
Loading