Moved at::assert_no_internal_overlap to TensorIterator

pbelevich · facebook-github-bot · commit fd61cc9ebc92 · 2019-07-30T07:47:33.000-07:00
Summary: Pull Request resolved: pytorch#22917 Differential Revision: D16521429 Pulled By: pbelevich fbshipit-source-id: 80ae583c6486d6948431b79e1452902bdf2cfbc3
diff --git a/aten/src/ATen/MemoryOverlap.cpp b/aten/src/ATen/MemoryOverlap.cpp
@@ -23,17 +23,15 @@ MemOverlap has_internal_overlap(TensorImpl* t) {
   return MemOverlap::TOO_HARD;
 }
 
-void assert_no_internal_overlap(const Tensor& t, const std::string& op) {
-  assert_no_internal_overlap(t.unsafeGetTensorImpl(), op);
+void assert_no_internal_overlap(const Tensor& t) {
+  assert_no_internal_overlap(t.unsafeGetTensorImpl());
 }
 
-void assert_no_internal_overlap(TensorImpl* t, const std::string& op) {
-  if (has_internal_overlap(t) == MemOverlap::YES) {
-    AT_ERROR(
-        op, ": unsupported operation: more than one element of the written-to "
-        "tensor refers to a single memory location. Please clone() the tensor "
-        "before calling ", op);
-  }
+void assert_no_internal_overlap(TensorImpl* t) {
+  TORCH_CHECK(has_internal_overlap(t) != MemOverlap::YES,
+    "unsupported operation: more than one element of the written-to tensor "
+    "refers to a single memory location. Please clone() the tensor before "
+    "performing the operation.");
 }
 
 }
diff --git a/aten/src/ATen/MemoryOverlap.h b/aten/src/ATen/MemoryOverlap.h
@@ -16,7 +16,7 @@ enum class MemOverlap { NO, YES, TOO_HARD };
 CAFFE2_API MemOverlap has_internal_overlap(const Tensor& t);
 CAFFE2_API MemOverlap has_internal_overlap(TensorImpl* t);
 
-CAFFE2_API void assert_no_internal_overlap(const Tensor& t, const std::string& op);
-CAFFE2_API void assert_no_internal_overlap(TensorImpl* t, const std::string& op);
+CAFFE2_API void assert_no_internal_overlap(const Tensor& t);
+CAFFE2_API void assert_no_internal_overlap(TensorImpl* t);
 
 }
diff --git a/aten/src/ATen/native/BinaryOps.cpp b/aten/src/ATen/native/BinaryOps.cpp
@@ -25,8 +25,8 @@ Tensor& add_out(Tensor& result, const Tensor& self, const Tensor& other, Scalar
   } else if (self.is_sparse()) {
     AT_ERROR("add(sparse, dense) is not supported. Use add(dense, sparse) instead.");
   }
-  at::assert_no_internal_overlap(result, "add");
-  auto iter = TensorIterator::binary_op(result, self, other);
+  auto iter = TensorIterator::binary_op(result, self, other,
+    /*check_internal_overlap=*/true);
   add_stub(iter.device_type(), iter, alpha);
   return result;
 }
@@ -54,8 +54,8 @@ Tensor& div_out(Tensor& result, const Tensor& self, const Tensor& other) {
     }
     return at::_sparse_div_zerodim_out(result, self, other);
   }
-  at::assert_no_internal_overlap(result, "div");
-  auto iter = TensorIterator::binary_op(result, self, other);
+  auto iter = TensorIterator::binary_op(result, self, other,
+    /*check_internal_overlap=*/true);
   div_stub(iter.device_type(), iter);
   return result;
 }
@@ -79,8 +79,8 @@ Tensor& mul_out(Tensor& result, const Tensor& self, const Tensor& other) {
   if (self.is_sparse() || other.is_sparse()) {
     return at::_sparse_mul_out(result, self, other);
   }
-  at::assert_no_internal_overlap(result, "mul");
-  auto iter = TensorIterator::binary_op(result, self, other);
+  auto iter = TensorIterator::binary_op(result, self, other,
+    /*check_internal_overlap=*/true);
   mul_stub(iter.device_type(), iter);
   return result;
 }
@@ -125,8 +125,8 @@ Tensor& sub_out(Tensor& result, const Tensor& self, const Tensor& other, Scalar
   } else if (self.is_sparse()) {
     AT_ERROR("sub(sparse, dense) is not supported. Use sub(dense, sparse) instead.");
   }
-  at::assert_no_internal_overlap(result, "sub");
-  auto iter = TensorIterator::binary_op(result, self, other);
+  auto iter = TensorIterator::binary_op(result, self, other,
+    /*check_internal_overlap=*/true);
   sub_stub(iter.device_type(), iter, alpha);
   return result;
 }
diff --git a/aten/src/ATen/native/TensorIterator.cpp b/aten/src/ATen/native/TensorIterator.cpp
@@ -534,19 +534,29 @@ void TensorIterator::select_all_keeping_dim(int start_dim, IntArrayRef indices)
   }
 }
 
-TensorIterator TensorIterator::binary_op(Tensor& out, const Tensor& a, const Tensor& b) {
+TensorIterator TensorIterator::binary_op(Tensor& out, const Tensor& a,
+    const Tensor& b, bool check_internal_overlap) {
   auto iter = TensorIterator();
-  iter.add_output(out);
+  if (check_internal_overlap) {
+    iter.check_and_add_output(out);
+  } else {
+    iter.add_output(out);
+  }
   iter.add_input(a);
   iter.add_input(b);
   iter.allow_cpu_scalars_ = true;
   iter.build();
   return iter;
 }
 
-TensorIterator TensorIterator::unary_op(Tensor& out, const Tensor& a) {
+TensorIterator TensorIterator::unary_op(Tensor& out, const Tensor& a,
+    bool check_internal_overlap) {
   auto iter = TensorIterator();
-  iter.add_output(out);
+  if (check_internal_overlap) {
+    iter.check_and_add_output(out);
+  } else {
+    iter.add_output(out);
+  }
   iter.add_input(a);
   iter.num_outputs_ = 1;
   iter.build();
diff --git a/aten/src/ATen/native/TensorIterator.h b/aten/src/ATen/native/TensorIterator.h
@@ -6,6 +6,7 @@
 #include <ATen/detail/ScalarTypeConversions.h>
 #include <bitset>
 #include <c10/util/Optional.h>
+#include <ATen/MemoryOverlap.h>
 #ifdef BUILD_NAMEDTENSOR
 #include <ATen/NamedTensorUtils.h>
 #endif
@@ -142,8 +143,10 @@ struct CAFFE2_API TensorIterator {
 
   void foreach_reduced_elt(const loop_subiter_t& loop, bool parallelize=true);
 
-  static TensorIterator binary_op(Tensor& out, const Tensor& a, const Tensor& b);
-  static TensorIterator unary_op(Tensor& out, const Tensor& a);
+  static TensorIterator binary_op(Tensor& out, const Tensor& a, const Tensor& b,
+    bool check_internal_overlap = false);
+  static TensorIterator unary_op(Tensor& out, const Tensor& a,
+    bool check_internal_overlap = false);
   static TensorIterator nullary_op(Tensor& out);
   static TensorIterator reduce_op(Tensor& out, const Tensor& a);
   static TensorIterator reduce_op(Tensor& out1, Tensor& out2, const Tensor& a);
@@ -261,6 +264,11 @@ struct CAFFE2_API TensorIterator {
     num_outputs_++;
   }
 
+  void check_and_add_output(const Tensor& output) {
+    assert_no_internal_overlap(output);
+    add_output(output);
+  }
+
   void add_output(const Tensor& input, Device device, ScalarType dtype) {
     operands_.emplace_back(input, device, dtype);
     num_outputs_++;
@@ -312,7 +320,6 @@ struct CAFFE2_API TensorIterator {
   bool promote_gpu_output_dtypes_ = false;
   bool final_output_ = true;
 };
-
 /// A container-like struct that acts as if it contains splits of a
 /// TensorIterator that can use 32-bit indexing. Taken together the splits cover
 /// the original TensorIterator.
diff --git a/aten/src/ATen/native/UnaryOps.cpp b/aten/src/ATen/native/UnaryOps.cpp
@@ -47,8 +47,8 @@ Tensor& bitwise_not_(Tensor& self) {
 
 Tensor& bitwise_not_out(Tensor& result, const Tensor& self) {
   checkBackend("bitwise_not", result, self.type().backend());
-  assert_no_internal_overlap(result, "bitwise_not");
-  auto iter = TensorIterator::unary_op(result, self);
+  auto iter = TensorIterator::unary_op(result, self,
+    /*check_internal_overlap=*/true);
   bitwise_not_stub(iter.device_type(), iter);
 #ifdef BUILD_NAMEDTENSOR
   at::namedinference::propagate_names(result, self);
@@ -161,8 +161,8 @@ static void propagate_names_if_namedtensor_enabled(Tensor& result, const Tensor&
   }                                                             \
   Tensor& _##op##_out_cpu(Tensor& result, const Tensor& self) { \
     checkBackend(#op, result, Backend::CPU);                    \
-    assert_no_internal_overlap(result, #op);                    \
-    auto iter = TensorIterator::unary_op(result, self);         \
+    auto iter = TensorIterator::unary_op(result, self,          \
+      /*check_internal_overlap=*/true);                         \
     op##_stub(iter.device_type(), iter);                        \
     return result;                                              \
   }
diff --git a/aten/src/THC/generic/THCTensorMathPointwise.cu b/aten/src/THC/generic/THCTensorMathPointwise.cu
@@ -196,8 +196,8 @@ static void propagate_names_if_named_tensor_enabled(THCTensor* result, THCTensor
   };                                                                    \
                                                                         \
   void THCTensor_(NAME)(THCState* state, THCTensor* self_, THCTensor* src) { \
-    THCAssertSameGPU(THCTensor_(checkGPU)(state, 2, self_, src));               \
-    at::assert_no_internal_overlap(self_, #NAME);                       \
+    THCAssertSameGPU(THCTensor_(checkGPU)(state, 2, self_, src));       \
+    at::assert_no_internal_overlap(self_);                              \
     if (self_ == src) {                                                 \
       if (!THC_pointwiseApply1<scalar_t>(state, self_, Tensor_##NAME##_##REAL##_Op())) { \
         THArgCheck(false, 2, CUTORCH_DIM_WARNING);                      \
diff --git a/test/test_torch.py b/test/test_torch.py
@@ -12197,6 +12197,14 @@ def test_sinh_unary_mem_overlap(self):
     def test_cosh_unary_mem_overlap(self):
         self.unary_check_mem_overlap(lambda t: t.cosh_())
 
+    @unittest.expectedFailure
+    def test_lerp_mem_overlap(self):
+        start = torch.randn(1, device=device).expand(3, 3)
+        end = torch.randn(3, 3, device=device)
+        weight = torch.randn(3, 3, device=device)
+        with self.assertRaisesRegex(RuntimeError, 'single memory location'):
+            start.lerp_(end, weight)
+
     @unittest.skipIf(torch.cuda.device_count() < 2, 'only one GPU detected')
     def test_reverse_binary_ops_multiple_device(self):
         self.assertEqual(2 + torch.tensor(3), 2 + torch.tensor(3).to("cuda:1"))    # __radd__

Original file line number	Diff line number	Diff line change
`@@ -23,17 +23,15 @@ MemOverlap has_internal_overlap(TensorImpl* t) {`
`23`	`23`	`return MemOverlap::TOO_HARD;`
`24`	`24`	`}`
`25`	`25`
`26`		`-void assert_no_internal_overlap(const Tensor& t, const std::string& op) {`
`27`		`- assert_no_internal_overlap(t.unsafeGetTensorImpl(), op);`
	`26`	`+void assert_no_internal_overlap(const Tensor& t) {`
	`27`	`+ assert_no_internal_overlap(t.unsafeGetTensorImpl());`
`28`	`28`	`}`
`29`	`29`
`30`		`-void assert_no_internal_overlap(TensorImpl* t, const std::string& op) {`
`31`		`- if (has_internal_overlap(t) == MemOverlap::YES) {`
`32`		`- AT_ERROR(`
`33`		`- op, ": unsupported operation: more than one element of the written-to "`
`34`		`- "tensor refers to a single memory location. Please clone() the tensor "`
`35`		`- "before calling ", op);`
`36`		`- }`
	`30`	`+void assert_no_internal_overlap(TensorImpl* t) {`
	`31`	`+ TORCH_CHECK(has_internal_overlap(t) != MemOverlap::YES,`
	`32`	`+ "unsupported operation: more than one element of the written-to tensor "`
	`33`	`+ "refers to a single memory location. Please clone() the tensor before "`
	`34`	`+ "performing the operation.");`
`37`	`35`	`}`
`38`	`36`
`39`	`37`	`}`
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@ enum class MemOverlap { NO, YES, TOO_HARD };`
`16`	`16`	`CAFFE2_API MemOverlap has_internal_overlap(const Tensor& t);`
`17`	`17`	`CAFFE2_API MemOverlap has_internal_overlap(TensorImpl* t);`
`18`	`18`
`19`		`-CAFFE2_API void assert_no_internal_overlap(const Tensor& t, const std::string& op);`
`20`		`-CAFFE2_API void assert_no_internal_overlap(TensorImpl* t, const std::string& op);`
	`19`	`+CAFFE2_API void assert_no_internal_overlap(const Tensor& t);`
	`20`	`+CAFFE2_API void assert_no_internal_overlap(TensorImpl* t);`
`21`	`21`
`22`	`22`	`}`
Original file line number	Diff line number	Diff line change
`@@ -25,8 +25,8 @@ Tensor& add_out(Tensor& result, const Tensor& self, const Tensor& other, Scalar`
`25`	`25`	`} else if (self.is_sparse()) {`
`26`	`26`	`AT_ERROR("add(sparse, dense) is not supported. Use add(dense, sparse) instead.");`
`27`	`27`	`}`
`28`		`- at::assert_no_internal_overlap(result, "add");`
`29`		`- auto iter = TensorIterator::binary_op(result, self, other);`
	`28`	`+ auto iter = TensorIterator::binary_op(result, self, other,`
	`29`	`+ /check_internal_overlap=/true);`
`30`	`30`	`add_stub(iter.device_type(), iter, alpha);`
`31`	`31`	`return result;`
`32`	`32`	`}`
`@@ -54,8 +54,8 @@ Tensor& div_out(Tensor& result, const Tensor& self, const Tensor& other) {`
`54`	`54`	`}`
`55`	`55`	`return at::_sparse_div_zerodim_out(result, self, other);`
`56`	`56`	`}`
`57`		`- at::assert_no_internal_overlap(result, "div");`
`58`		`- auto iter = TensorIterator::binary_op(result, self, other);`
	`57`	`+ auto iter = TensorIterator::binary_op(result, self, other,`
	`58`	`+ /check_internal_overlap=/true);`
`59`	`59`	`div_stub(iter.device_type(), iter);`
`60`	`60`	`return result;`
`61`	`61`	`}`
`@@ -79,8 +79,8 @@ Tensor& mul_out(Tensor& result, const Tensor& self, const Tensor& other) {`
`79`	`79`	`if (self.is_sparse() \|\| other.is_sparse()) {`
`80`	`80`	`return at::_sparse_mul_out(result, self, other);`
`81`	`81`	`}`
`82`		`- at::assert_no_internal_overlap(result, "mul");`
`83`		`- auto iter = TensorIterator::binary_op(result, self, other);`
	`82`	`+ auto iter = TensorIterator::binary_op(result, self, other,`
	`83`	`+ /check_internal_overlap=/true);`
`84`	`84`	`mul_stub(iter.device_type(), iter);`
`85`	`85`	`return result;`
`86`	`86`	`}`
`@@ -125,8 +125,8 @@ Tensor& sub_out(Tensor& result, const Tensor& self, const Tensor& other, Scalar`
`125`	`125`	`} else if (self.is_sparse()) {`
`126`	`126`	`AT_ERROR("sub(sparse, dense) is not supported. Use sub(dense, sparse) instead.");`
`127`	`127`	`}`
`128`		`- at::assert_no_internal_overlap(result, "sub");`
`129`		`- auto iter = TensorIterator::binary_op(result, self, other);`
	`128`	`+ auto iter = TensorIterator::binary_op(result, self, other,`
	`129`	`+ /check_internal_overlap=/true);`
`130`	`130`	`sub_stub(iter.device_type(), iter, alpha);`
`131`	`131`	`return result;`
`132`	`132`	`}`