[Dy2stat] Fix Memory Optimization in run_program_op and Add SimNet as…

… Unit Test (PaddlePaddle#25383) Add Similarity Net as unit test. During the unit test, we found three problems: 1. The run_program_op has memory optimization error when running dy2stat net multiple times. 2. The support for SelectedRows can cause problem in dy2stat. 3. The return grammar has problem. This PR fixes the 1. problem but modify codes for the 2. 3. problems to make PR smaller. I will fix those two problems in the next PR(s)
MRXLT · Jul 13, 2020 · f9ac5fb · f9ac5fb
1 parent c42d662
commit f9ac5fb
Show file tree

Hide file tree

Showing 4 changed files with 733 additions and 8 deletions.
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
@@ -91,7 +91,7 @@ set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_ten
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc)
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col)
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc matrix_inverse)
-set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper)
+set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper boost)
 if (WITH_GPU)
   set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu bert_encoder_functor)
 endif()

diff --git a/paddle/fluid/operators/run_program_op.h b/paddle/fluid/operators/run_program_op.h
@@ -17,10 +17,12 @@ limitations under the License. */
 #include <algorithm>
 #include <iterator>
 #include <string>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
 #include "paddle/fluid/framework/executor.h"
+#include "paddle/fluid/framework/op_desc.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/program_desc.h"
@@ -149,14 +151,46 @@ static void ShareVarsFromScope(const std::vector<Variable *> &vars,
   }
 }
 
-static void AppendSkipDeletionVars(
-    std::vector<std::string> *all_vars,
-    const std::vector<std::string> &append_vars) {
+static void AppendSkipDeletionVars(const std::vector<std::string> &append_vars,
+                                   std::vector<std::string> *all_vars) {
   for (auto &var : append_vars) {
     all_vars->emplace_back(var);
   }
 }
 
+static void AppendSafeEagerDeletionSkipVars(
+    const framework::ProgramDesc &program,
+    std::vector<std::string> *skip_vars) {
+  const framework::BlockDesc &block = program.Block(0);
+  const std::vector<framework::OpDesc *> &all_ops = block.AllOps();
+
+  std::unordered_set<std::string> grad_op_output;
+  std::unordered_set<std::string> grad_op_input;
+  for (const framework::OpDesc *op : all_ops) {
+    int op_role = BOOST_GET_CONST(
+        int, op->GetAttr(framework::OpProtoAndCheckerMaker::OpRoleAttrName()));
+    if ((op_role & static_cast<int>(framework::OpRole::kBackward)) == 0) {
+      continue;
+    }
+
+    for (const std::string &in_arg_name : op->InputArgumentNames()) {
+      grad_op_input.emplace(in_arg_name);
+    }
+    for (const std::string &out_arg_name : op->OutputArgumentNames()) {
+      grad_op_output.emplace(out_arg_name);
+    }
+  }
+
+  // For the grad op input variables, if it is not output of grad_op, it may
+  // be output of forward op and we should set the variables as skip_var to
+  // prevent it being deleted when grad op is called multiple times.
+  for (const std::string &var_name : grad_op_input) {
+    if (grad_op_output.find(var_name) == grad_op_output.end()) {
+      skip_vars->emplace_back(var_name);
+    }
+  }
+}
+
 }  // namespace details
 
 template <typename DeviceContext, typename T>
@@ -192,7 +226,7 @@ class RunProgramOpKernel : public framework::OpKernel<T> {
 
     // skip delete vars
     std::vector<std::string> skip_vars;
-    details::AppendSkipDeletionVars(&skip_vars, output_var_names);
+    details::AppendSkipDeletionVars(output_var_names, &skip_vars);
     VLOG(2) << "Prepare to skip " << skip_vars.size()
             << " var(s): " << string::join_strings(skip_vars, ' ');
 
@@ -261,20 +295,21 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
         out_scope_vec->size(), 1,
         platform::errors::InvalidArgument(
             "The OutScope of RunProgramGradOp should only hold one scope."));
+    auto &scope = *(out_scope_vec->front());
 
     // Step 2. prepare executor and scope
     framework::Executor exe(ctx.GetPlace());
 
     // skip delete vars
     std::vector<std::string> skip_vars;
-    details::AppendSkipDeletionVars(&skip_vars, input_grad_var_names);
-    details::AppendSkipDeletionVars(&skip_vars, param_grad_names);
+    details::AppendSkipDeletionVars(input_grad_var_names, &skip_vars);
+    details::AppendSkipDeletionVars(param_grad_names, &skip_vars);
+    details::AppendSafeEagerDeletionSkipVars(*program, &skip_vars);
     VLOG(2) << "Prepare to skip " << skip_vars.size()
             << " var(s): " << string::join_strings(skip_vars, ' ');
 
     auto exe_ctx = exe.Prepare(*program, 0, skip_vars);
 
-    auto &scope = *(out_scope_vec->front());
     details::ShareVarsIntoScope(output_grad_vars, output_grad_var_names,
                                 &scope);