PaddlePaddle · liym27 · Nov 30, 2020 · Oct 12, 2020 · Sep 5, 2020 · Sep 5, 2020
@@ -43,6 +43,49 @@ namespace framework {
 
 class LoDTensor;
 
+/*
+ NOTE(liym27): [ What is TensorInplaceVersion used for? ]
+
+ TensorInplaceVersion is a version counter and every Tensor has a version
+ counter. It's used to check whether an inplace operation will result in an
+ incorrect gradient calculation. Version is incremented when the data of the
+ Variable is modified in place.
+
+ - Question: In what scenarios will version counters be shared?
+ - Answer: When two Variables/VarBases share the same C++ Tensor(its Allocation
+ may change), both of them share the same version counter. For examples:
+  1. `z = paddle.assign(input=x, output=y)`, `z` shares the same version counter
+    of `y` because z and y is the same VarBase;
+  2. `y = x.detach()`, `y` shares the same version counter of `x`.
+
+ - Question: In what scenarios will version counters NOT be shared?
+ - Answer: Replacing a `Variable`'s data by calling `Tensor::ShareDataWith(...)`
+ or `Tensor::ShareBufferWith(...)`. Because they share the same Allocation but
+ not framework::Tensor.
+
+ - Question: Why put the inplace_version_counter_ in framework::Tensor instead
+ of Allocation or Variable?
+ - Answer:
+  1. Tensor can call ResetHolder() to reset the corresponding Allocation so that
+  the inplace_version_counter_ changes if it's in Allocation, which will lead to
+  confusing information about inplace version.
+  2. If inplace_version_counter_ is in Variable, different VariableWrappers
+  should be able to share the same Variable. However, a VariableWrapper hold a
+  Variable object but not a pointer.
+*/
+
+class TensorInplaceVersion {
+ public:
+  explicit TensorInplaceVersion(uint32_t inplace_version = 0)
+      : inplace_version_(inplace_version) {}
+  bool IsUnique() const { return inplace_version_ == 0; }
+  void Bump() { ++inplace_version_; }
+  uint32_t CurrentVersion() const { return inplace_version_; }
+
+ private:
+  uint32_t inplace_version_;
+};
+
 class Tensor {
 #ifdef PADDLE_WITH_MKLDNN
 
@@ -189,6 +232,9 @@ class Tensor {
 
   void ResetHolderWithType(std::shared_ptr<memory::Allocation> holder,
                            const proto::VarType::Type type);
+  TensorInplaceVersion& InplaceVersionCounter() {
+    return inplace_version_counter_;
+  }
 
  private:
   /*! holds the memory block if allocated. */
@@ -225,6 +271,7 @@ class Tensor {
    *          PlaceHolder::ptr_ and where the tensor data really begins.
    */
   size_t offset_;
+  TensorInplaceVersion inplace_version_counter_;
 };
 
 }  // namespace framework

diff --git a/paddle/fluid/framework/variable.h b/paddle/fluid/framework/variable.h
@@ -18,8 +18,8 @@
 #include <typeindex>
 #include <typeinfo>
 
+#include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/framework/var_type_traits.h"
-
 namespace paddle {
 namespace framework {
 
@@ -69,6 +69,15 @@ class Variable {
     return holder_->Type();
   }
 
+ private:
+  // This method hides type T, so it doesn't appear as a template parameter of
+  // Variable.
+  framework::TensorInplaceVersion* InplaceVersionCounter();
+
+ public:
+  uint32_t CurrentInplaceVersion();
+  void BumpInplaceVersion();
+
  private:
   struct Placeholder {
     virtual ~Placeholder() PADDLE_MAY_THROW {}
@@ -101,8 +110,48 @@ class Variable {
   };
 
   // pointers to a PlaceholderImpl object indeed.
-  std::unique_ptr<Placeholder> holder_;
+  std::shared_ptr<Placeholder> holder_;
 };
 
+inline framework::TensorInplaceVersion* Variable::InplaceVersionCounter() {
+  framework::TensorInplaceVersion* version_counter_ptr(nullptr);
+  if (IsType<framework::LoDTensor>()) {
+    version_counter_ptr =
+        &GetMutable<framework::LoDTensor>()->InplaceVersionCounter();
+  } else if (IsType<framework::Tensor>()) {
+    version_counter_ptr =
+        &GetMutable<framework::Tensor>()->InplaceVersionCounter();
+
+  } else if (IsType<framework::SelectedRows>()) {
+    version_counter_ptr = &GetMutable<framework::SelectedRows>()
+                               ->mutable_value()
+                               ->InplaceVersionCounter();
+  } else {
+    VLOG(4) << "Only supports Tensor, LoDTensor, SelectedRows to have "
+               "TensorInplaceVersion, but received type "
+            << platform::demangle(framework::ToTypeName(Type()));
+  }
+  return version_counter_ptr;
+}
+
+inline uint32_t Variable::CurrentInplaceVersion() {
+  auto version_counter_ptr = InplaceVersionCounter();
+  if (version_counter_ptr) {
+    return version_counter_ptr->CurrentVersion();
+  } else {
+    return 0;
+  }
+}
+
+inline void Variable::BumpInplaceVersion() {
+  auto version_counter_ptr = InplaceVersionCounter();
+  if (version_counter_ptr) {
+    return version_counter_ptr->Bump();
+  } else {
+    VLOG(4) << "Only supports Tensor, LoDTensor, SelectedRows to have "
+               "TensorInplaceVersion, but received type "
+            << platform::demangle(framework::ToTypeName(Type()));
+  }
+}
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc
@@ -225,6 +225,31 @@ void BasicEngine::Execute() {
         }
       }
 
+      VLOG(4) << "Check whether there is any inplace operation affecting "
+                 "gradient calculation.";
+      for (auto& pair : bwd_ins) {
+        for (auto& var_wrapper : pair.second) {
+          auto wrapper_version_snapshot = var_wrapper->InplaceVersionSnapshot();
+          auto tensor_version =
+              var_wrapper->MutableVar()->CurrentInplaceVersion();
+          PADDLE_ENFORCE_EQ(
+              tensor_version, wrapper_version_snapshot,
+              platform::errors::PermissionDenied(
+                  "Tensor '%s' used in gradient computation in grad op '%s' "
+                  "has been "
+                  "modified by an inplace operation. "
+                  "Its version is %s but the expected version is %s. "
+                  "Please fix your code to void calling an inplace operator "
+                  "after using the Tensor which will used in gradient "
+                  "computation.",
+                  var_wrapper->Name(), cur_op.Type(), tensor_version,
+                  wrapper_version_snapshot));
+
+          VLOG(6) << " The version of Tensor '" << var_wrapper->Name()
+                  << "' is [ " << wrapper_version_snapshot << " ]";
+        }
+      }
+
       {
         VLOG(3) << "Start to execute grad op " << cur_op.Type();
         OpBase::Run(cur_op.InnerOp(), bwd_ins, tmp_outs, cur_op.Attrs(),

diff --git a/paddle/fluid/imperative/dygraph_grad_maker.h b/paddle/fluid/imperative/dygraph_grad_maker.h
@@ -147,7 +147,6 @@ class GradOpBaseMakerBase {
                                                bool is_input) const {
     const auto& data_map = is_input ? var_base_map_in_ : var_base_map_out_;
     auto iterator = data_map.find(name);
-
     TracedVarList<VarBase, kRole> vec_temp;
     if (iterator != data_map.end()) {
       vec_temp.reserve(iterator->second.size());
@@ -226,6 +225,7 @@ class TracedGradOp {
     }
 
     auto var_wrappers = ToVarWrapperList<kRole>(vars);
+
     if (!var_wrappers.empty()) {
       op_->SetInput(name, std::move(var_wrappers),
                     kRole == TracedVarRole::kBackward);
@@ -293,7 +293,8 @@ class TracedGradOp {
                             var->OverridedStopGradient()))) {
         result.emplace_back();
       } else {
-        result.emplace_back(var->SharedVar());
+        auto var_wrapper = SnapshotVarWrapper(var->SharedVar());
+        result.emplace_back(var_wrapper);
         has_valid = true;
       }
     }
@@ -304,6 +305,26 @@ class TracedGradOp {
     return result;
   }
 
+  // Get a snapshot of VariableWrapper at a certain inplace version.
+  // The inplace version number of VariableWrapper is used for inplace
+  // detection in gradient compution.
+  static const std::shared_ptr<VariableWrapper> SnapshotVarWrapper(
+      const std::shared_ptr<VariableWrapper>& var_wrapper) {
+    // NOTE(liym27):
+    //  Use original var_wrapper if its inplace_version is not
+    //  changed. Otherwise, it will affect the accuracy of the model
+    //  results and affect double grad.
+    if (!var_wrapper->MutableVar()->IsInitialized() ||
+        var_wrapper->InplaceVersionSnapshot() ==
+            var_wrapper->MutableVar()->CurrentInplaceVersion()) {
+      return var_wrapper;
+    } else {
+      VariableWrapper new_var_wrapper = *var_wrapper.get();
+      new_var_wrapper.ResetInplaceVersion();
+      return std::make_shared<VariableWrapper>(new_var_wrapper);
+    }
+  }
+
  private:
   const std::shared_ptr<GradOpNode>& node_;
   OpBase* op_;

diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc
@@ -278,6 +278,15 @@ std::shared_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
   }
 }
 
+void VarBase::BumpInplaceVersion() {
+  PADDLE_ENFORCE_EQ(
+      Var().IsInitialized(), true,
+      platform::errors::InvalidArgument(
+          "Tensor %s has not been initialized, please check if it has no data.",
+          Name()));
+  MutableVar()->BumpInplaceVersion();
+}
+
 void OpBase::SetType(const std::string& type) {
   op_ = framework::OpRegistry::CreateOp(type, {}, {}, {}, false);
 }

diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h
@@ -202,6 +202,8 @@ class VarBase {
   std::shared_ptr<VarBase> NewVarBase(const platform::Place& dst_place,
                                       const bool blocking) const;
 
+  void BumpInplaceVersion();
+
  private:
   /**
    * NOTE(zengjinle): never remove the const qualifier of `var_` if you are

diff --git a/paddle/fluid/imperative/variable_wrapper.h b/paddle/fluid/imperative/variable_wrapper.h
@@ -174,6 +174,17 @@ class VariableWrapper {
 
   std::shared_ptr<LeafVarHookPipeline>& GetLeafHooks() { return leaf_hooks_; }
 
+  uint32_t InplaceVersionSnapshot() const { return inplace_version_snapshot_; }
+
+  void ResetInplaceVersion() {
+    auto new_version = var_.CurrentInplaceVersion();
+
+    VLOG(6) << "The wrapper version of VariableWrapper '" << name_
+            << "' will be updated from " << inplace_version_snapshot_ << "to "
+            << new_version;
+    inplace_version_snapshot_ = new_version;
+  }
+
  private:
   void SetGradVar(const std::shared_ptr<VariableWrapper>& var) {
     auto shared_var = grad_var_.lock();
@@ -244,6 +255,10 @@ class VariableWrapper {
   int overrided_stop_gradient_{-1};
   bool persistable_{false};
 
+  // Used for checking whether there is any inplace operation affecting gradient
+  // calculation.
+  uint32_t inplace_version_snapshot_{0};
+
   framework::proto::VarType::Type type_{framework::proto::VarType::LOD_TENSOR};
   framework::proto::VarType::Type data_type_{framework::proto::VarType::FP32};
 

diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
@@ -589,6 +589,10 @@ void BindImperative(py::module *m_ptr) {
                SetTensorFromPyArray(self_tensor, self_numpy,
                                     self_tensor->place(), true);
              }
+             // NOTE(liym27):
+             // Increase the version of VarBase self because __setitem__ is an
+             // inplace operator for the VarBase self.
+             self->BumpInplaceVersion();
            })
       .def("__getitem__",
            [](std::shared_ptr<imperative::VarBase> &self, py::handle _index) {
@@ -628,6 +632,28 @@ void BindImperative(py::module *m_ptr) {
                return out;
              }
            })
+      .def("_inplace_version",
+           [](imperative::VarBase &self) -> uint32_t {
+             const auto &var = self.MutableVar();
+             PADDLE_ENFORCE_EQ(
+                 var->IsInitialized(), true,
+                 platform::errors::InvalidArgument(
+                     "Tensor of %s is Empty, please check if it has no data.",
+                     self.Name()));
+             return var->CurrentInplaceVersion();
+           })
+      .def("_bump_inplace_version",
+           [](std::shared_ptr<imperative::VarBase> &self) {
+             // NOTE(liym27): _bump_inplace_version is only used for inplace
+             // operation
+             self->BumpInplaceVersion();
+           },
+           R"DOC(
+        **Notes**:
+            **This API is ONLY available in Dygraph mode.**
+            **This is a very low level API. Users should not use it directly. **
+         Bump the version whenever the Tensor is modified through an inplace operation.
+            )DOC")
       .def("numpy",
            [](imperative::VarBase &self) -> py::array {
              const auto &tensor =

diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py
@@ -226,6 +226,27 @@ def grad(self):
 
         return self.gradient()
 
+    @property
+    def inplace_version(self):
+        """
+        The inplace version of current Tensor.
+        The version number is incremented whenever the current Tensor is modified through an inplace operation.
+
+        **Notes: This is a read-only property**
+
+        Examples:
+          .. code-block:: python
+
+            import paddle
+            var = paddle.ones(shape=[4, 2, 3], dtype="float32")
+            print(var.inplace_version)  # 0
+
+            var[1] = 2.2
+            print(var.inplace_version)  # 1
+
+        """
+        return self._inplace_version()
+
     def __str__(self):
         """
         Convert a VarBase object to a readable string.
@@ -264,8 +285,9 @@ def __bool__(self):
         ("__bool__", __bool__), ("__nonzero__", __nonzero__),
         ("_to_static_var", _to_static_var), ("set_value", set_value),
         ("block", block), ("backward", backward), ("grad", grad),
-        ("gradient", gradient), ("__str__", __str__), ("__repr__", __str__),
-        ("__module__", "paddle"), ("__name__", "Tensor")):
+        ("inplace_version", inplace_version), ("gradient", gradient),
+        ("__str__", __str__), ("__repr__", __str__), ("__module__", "paddle"),
+        ("__name__", "Tensor")):
         setattr(core.VarBase, method_name, method)
 
     # patch math methods for varbase