Skip to content

Commit

Permalink
[Dy2stat] Fix Memory Optimization in run_program_op and Add SimNet as…
Browse files Browse the repository at this point in the history
… Unit Test (PaddlePaddle#25383)

Add Similarity Net as unit test. During the unit test, we found three problems:

1. The run_program_op has memory optimization error when running dy2stat net multiple times.
2. The support for SelectedRows can cause problem in dy2stat.
3. The return grammar has problem.

This PR fixes the 1. problem but modify codes for the 2. 3. problems to make PR smaller. I will fix those two problems in the next PR(s)
  • Loading branch information
zhhsplendid authored Jul 13, 2020
1 parent c42d662 commit f9ac5fb
Show file tree
Hide file tree
Showing 4 changed files with 733 additions and 8 deletions.
2 changes: 1 addition & 1 deletion paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_ten
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc matrix_inverse)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper boost)
if (WITH_GPU)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu bert_encoder_functor)
endif()
Expand Down
49 changes: 42 additions & 7 deletions paddle/fluid/operators/run_program_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ limitations under the License. */
#include <algorithm>
#include <iterator>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>

#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
Expand Down Expand Up @@ -149,14 +151,46 @@ static void ShareVarsFromScope(const std::vector<Variable *> &vars,
}
}

static void AppendSkipDeletionVars(
std::vector<std::string> *all_vars,
const std::vector<std::string> &append_vars) {
static void AppendSkipDeletionVars(const std::vector<std::string> &append_vars,
std::vector<std::string> *all_vars) {
for (auto &var : append_vars) {
all_vars->emplace_back(var);
}
}

static void AppendSafeEagerDeletionSkipVars(
const framework::ProgramDesc &program,
std::vector<std::string> *skip_vars) {
const framework::BlockDesc &block = program.Block(0);
const std::vector<framework::OpDesc *> &all_ops = block.AllOps();

std::unordered_set<std::string> grad_op_output;
std::unordered_set<std::string> grad_op_input;
for (const framework::OpDesc *op : all_ops) {
int op_role = BOOST_GET_CONST(
int, op->GetAttr(framework::OpProtoAndCheckerMaker::OpRoleAttrName()));
if ((op_role & static_cast<int>(framework::OpRole::kBackward)) == 0) {
continue;
}

for (const std::string &in_arg_name : op->InputArgumentNames()) {
grad_op_input.emplace(in_arg_name);
}
for (const std::string &out_arg_name : op->OutputArgumentNames()) {
grad_op_output.emplace(out_arg_name);
}
}

// For the grad op input variables, if it is not output of grad_op, it may
// be output of forward op and we should set the variables as skip_var to
// prevent it being deleted when grad op is called multiple times.
for (const std::string &var_name : grad_op_input) {
if (grad_op_output.find(var_name) == grad_op_output.end()) {
skip_vars->emplace_back(var_name);
}
}
}

} // namespace details

template <typename DeviceContext, typename T>
Expand Down Expand Up @@ -192,7 +226,7 @@ class RunProgramOpKernel : public framework::OpKernel<T> {

// skip delete vars
std::vector<std::string> skip_vars;
details::AppendSkipDeletionVars(&skip_vars, output_var_names);
details::AppendSkipDeletionVars(output_var_names, &skip_vars);
VLOG(2) << "Prepare to skip " << skip_vars.size()
<< " var(s): " << string::join_strings(skip_vars, ' ');

Expand Down Expand Up @@ -261,20 +295,21 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
out_scope_vec->size(), 1,
platform::errors::InvalidArgument(
"The OutScope of RunProgramGradOp should only hold one scope."));
auto &scope = *(out_scope_vec->front());

// Step 2. prepare executor and scope
framework::Executor exe(ctx.GetPlace());

// skip delete vars
std::vector<std::string> skip_vars;
details::AppendSkipDeletionVars(&skip_vars, input_grad_var_names);
details::AppendSkipDeletionVars(&skip_vars, param_grad_names);
details::AppendSkipDeletionVars(input_grad_var_names, &skip_vars);
details::AppendSkipDeletionVars(param_grad_names, &skip_vars);
details::AppendSafeEagerDeletionSkipVars(*program, &skip_vars);
VLOG(2) << "Prepare to skip " << skip_vars.size()
<< " var(s): " << string::join_strings(skip_vars, ' ');

auto exe_ctx = exe.Prepare(*program, 0, skip_vars);

auto &scope = *(out_scope_vec->front());
details::ShareVarsIntoScope(output_grad_vars, output_grad_var_names,
&scope);

Expand Down
Loading

0 comments on commit f9ac5fb

Please sign in to comment.