Back out "Add efficient zero tensors" (pytorch#69327)

Summary: Pull Request resolved: pytorch#69327 Original commit changeset: d44096d88265 Original Phabricator Diff: D32144240 (pytorch@668574a) Test Plan: CI original diff failed 175 builds in CI Reviewed By: airboyang, anjali411 Differential Revision: D32809407 fbshipit-source-id: c7c8e69bcee0274992e2d5da901f035332e60071
yanbing-j · Dec 3, 2021 · 834bd31 · 834bd31
1 parent c572a60
commit 834bd31
Show file tree

Hide file tree

Showing 22 changed files with 21 additions and 337 deletions.
diff --git a/BUILD.bazel b/BUILD.bazel
@@ -136,7 +136,6 @@ genrule(
         "aten/src/ATen/RegisterQuantizedCPU.cpp",
         "aten/src/ATen/RegisterSparseCPU.cpp",
         "aten/src/ATen/RegisterSparseCsrCPU.cpp",
-        "aten/src/ATen/RegisterZeroTensor.cpp",
         "aten/src/ATen/RegisterCompositeImplicitAutograd.cpp",
         "aten/src/ATen/RegisterCompositeExplicitAutograd.cpp",
         "aten/src/ATen/RegisterMeta.cpp",

diff --git a/aten/src/ATen/ZeroTensorFallback.cpp b/aten/src/ATen/ZeroTensorFallback.cpp
diff --git a/aten/src/ATen/core/TensorBase.h b/aten/src/ATen/core/TensorBase.h
@@ -304,14 +304,6 @@ class TORCH_API TensorBase {
     return impl_->storage().is_alias_of(other.storage());
   }
 
-  inline bool _is_zerotensor() const {
-    return impl_->_is_zerotensor();
-  }
-
-  inline void _set_zero(bool zero) const {
-    impl_->_set_zero(zero);
-  }
-
   inline bool is_conj() const {
     return impl_->is_conj();
   }

diff --git a/aten/src/ATen/native/BinaryOps.cpp b/aten/src/ATen/native/BinaryOps.cpp
@@ -7,8 +7,7 @@
 #include <ATen/MemoryOverlap.h>
 #include <ATen/NativeFunctions.h>
 #include <ATen/native/TensorIterator.h>
-#include <ATen/ExpandUtils.h>
-#include <ATen/RedispatchFunctions.h>
+
 #include <torch/library.h>
 
 namespace at {
@@ -626,45 +625,6 @@ Tensor& mul_(Tensor& self, const Scalar& other) {
   return at::mul_out(self, wrapped_scalar_tensor(other), self); // redispatch!
 }
 
-Device correct_out_device(const Tensor& self, const Tensor& other) {
-  if (self.device() == at::kCPU){
-      return other.device();
-  } else {
-    return self.device();
-  }
-}
-
-Tensor mul_zerotensor(const Tensor& self, const Tensor& other) {
-  auto out_device = correct_out_device(self, other);
-  // hack to use the TensorIterator to get the correct broadcasting and type promotion logic
-  auto device_ = Device(DeviceType::Meta);
-  auto meta_out = at::redispatch::mul(c10::DispatchKeySet(at::DispatchKey::Meta), self.to(device_), other.to(device_));
-  return at::_efficientzerotensor(meta_out.sizes(), meta_out.options().device(out_device));
-}
-
-Tensor add_zerotensor(const Tensor& self, const Tensor& other, const Scalar& alpha) {
-  auto out_device = correct_out_device(self, other);
-  // hack to use the TensorIterator to get the correct broadcasting and type promotion logic
-  auto device_ = Device(DeviceType::Meta);
-  auto meta_out = at::redispatch::add(c10::DispatchKeySet(at::DispatchKey::Meta), self.to(device_), other.to(device_));
-
-  auto get_out_like = [&] (const Tensor& tensor)
-  {
-      auto sizes = meta_out.sizes();
-      return at::_to_copy(tensor.expand(sizes), meta_out.options().device(out_device));
-  };
-
-  if (self._is_zerotensor()) {
-    if (other._is_zerotensor()) {
-      return at::_efficientzerotensor(meta_out.sizes(), meta_out.options().device(out_device));
-    }
-    auto res = get_out_like(other);
-    return alpha.equal(1) ? res : res.mul(alpha);
-  } else {
-    return get_out_like(self);
-  }
-}
-
 // multiply, alias for mul
 Tensor& multiply_out(const Tensor& self, const Tensor& other, Tensor& result) {
   return at::mul_out(result, self, other);

diff --git a/aten/src/ATen/native/Copy.cpp b/aten/src/ATen/native/Copy.cpp
@@ -244,12 +244,6 @@ Tensor& copy_(Tensor& self, const Tensor& src, bool non_blocking) {
   auto maybe_outnames = namedinference::compute_broadcast_outnames(self, src);
   {
     NoNamesGuard guard;
-    if (self._is_zerotensor()) {
-     TORCH_CHECK(false, "ZeroTensors are immutable. Please materialize the tensor using `.clone()`, if you want a mutable zero tensor.");
-    }
-    if (src._is_zerotensor()) {
-      return self.zero_();
-    }
     copy_impl(self, src, non_blocking);
   }
   namedinference::propagate_names_if_nonempty(self, maybe_outnames);

diff --git a/aten/src/ATen/native/Resize.cpp b/aten/src/ATen/native/Resize.cpp
@@ -27,7 +27,8 @@ bool resize_output_check(const Tensor& output, IntArrayRef shape) {
 
 static auto kFunctorchWrappedTensors = DispatchKeySet({
     DispatchKey::FuncTorchGradWrapper,
-    DispatchKey::FuncTorchBatched});
+    DispatchKey::FuncTorchBatched,
+    DispatchKey::FuncTorchPython});
 
 static bool is_functorch_wrapped_tensor(const Tensor& tensor) {
   auto key_set = tensor.unsafeGetTensorImpl()->key_set();

diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp
@@ -349,10 +349,9 @@ Tensor empty_like(
     namedinference::propagate_names(result, self.names());
   }
 
-  // never propagate Conjugate, Negative, and ZeroTensor dispatch key
+  // never propagate Conjugate and Negative dispatch key
   result._set_conj(false);
   result._set_neg(false);
-  result._set_zero(false);
   return result;
 }
 
@@ -1058,20 +1057,6 @@ Tensor zeros(IntArrayRef size,
   return result.zero_();
 }
 
-Tensor _efficientzerotensor(IntArrayRef size,
-    c10::optional<ScalarType> dtype,
-    c10::optional<Layout> layout,
-    c10::optional<Device> device,
-    c10::optional<bool> pin_memory) {
-  caffe2::TypeMeta dtype_ = scalarTypeToTypeMeta(dtype_or_default(dtype));
-  Tensor tensor = detail::make_tensor<TensorImpl>(c10::DispatchKeySet({at::DispatchKey::ZeroTensor}), dtype_, device);
-  // Default TensorImpl has size [0]
-  if (size.size() != 1 || size[0] != 0) {
-    tensor.unsafeGetTensorImpl()->set_sizes_contiguous(size);
-  }
-  return tensor;
-}
-
 Tensor& zeros_out(IntArrayRef size, Tensor& result) {
   if (result.is_sparse()) {
     result.sparse_resize_and_clear_(size, size.size(), 0.);
@@ -1442,11 +1427,7 @@ Tensor clone(const Tensor& src, c10::optional<c10::MemoryFormat> optional_memory
     self = at::empty_like(src, src.options(), memory_format);
   }
 
-  if (src._is_zerotensor()) {
-    self.zero_();
-  } else {
-    self.copy_(src);
-  }
+  self.copy_(src);
   return self;
 }
 

diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp
@@ -767,9 +767,6 @@ Tensor make_qtensor(const Tensor& self, IntArrayRef size, IntArrayRef stride, Qu
 }
 
 Tensor as_strided_tensorimpl(const Tensor& self, IntArrayRef size, IntArrayRef stride, optional<int64_t> storage_offset_) {
-  if (self._is_zerotensor()) {
-    return at::_efficientzerotensor(size, self.options());
-  }
   auto storage_offset = storage_offset_.value_or(self.storage_offset());
   auto result = detail::make_tensor<TensorImpl>(
       c10::TensorImpl::VIEW, Storage(self.storage()), self.key_set(), self.dtype());
@@ -1027,11 +1024,6 @@ Tensor alias_with_sizes_and_strides(
     const Tensor& self,
     const Vec& sizes,
     const Vec& strides) {
-  // it's okay to return a new tensor here since we disallow in-place operation on ZeroTensors
-  if (self._is_zerotensor()) {
-    return at::_efficientzerotensor(sizes, self.options());
-  }
-
   Tensor self_;
   if (self.is_quantized()) {
     self_ = detail::make_tensor<QTensorImpl>(

diff --git a/aten/src/ATen/native/TypeProperties.cpp b/aten/src/ATen/native/TypeProperties.cpp
@@ -30,10 +30,6 @@ bool is_signed(const Tensor &self) {
   return self.is_signed();
 }
 
-bool _is_zerotensor(const Tensor& self) {
-  return self._is_zerotensor();
-}
-
 bool is_conj(const Tensor& self) {
   return self.is_conj();
 }

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -428,7 +428,6 @@
     SparseCPU, SparseCUDA: add_sparse
     SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
     MkldnnCPU: mkldnn_add
-    ZeroTensor: add_zerotensor
 
 - func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
   device_check: NoCheck   # TensorIterator
@@ -695,7 +694,7 @@
 - func: as_strided(Tensor(a) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a)
   variants: function, method
   dispatch:
-    ZeroTensor, CPU, CUDA, Meta: as_strided_tensorimpl
+    CPU, CUDA, Meta: as_strided_tensorimpl
     QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
   device_check: NoCheck
   device_guard: False
@@ -2428,11 +2427,6 @@
   device_guard: False
   manual_cpp_binding: True
 
-- func: _is_zerotensor(Tensor self) -> bool
-  variants: function, method
-  device_guard: False
-  manual_cpp_binding: True
-
 - func: is_neg(Tensor self) -> bool
   variants: function, method
   device_guard: False
@@ -3166,7 +3160,6 @@
   dispatch:
     SparseCPU, SparseCUDA: mul_sparse
     MkldnnCPU: mkldnn_mul
-    ZeroTensor: mul_zerotensor
 
 - func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   device_check: NoCheck   # TensorIterator
@@ -3629,7 +3622,7 @@
   device_check: NoCheck
   device_guard: False
   dispatch:
-    CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor: _reshape_alias
+    CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: _reshape_alias
     # We don't need to support mkldnn since this is handled explicitly by the reshape operator.
 
 - func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
@@ -4771,10 +4764,6 @@
   device_check: NoCheck
   device_guard: False
 
-- func: _efficientzerotensor(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
-  dispatch:
-    CompositeExplicitAutograd: _efficientzerotensor
-
 - func: zeros(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
 
 - func: zeros.out(int[] size, *, Tensor(a!) out) -> Tensor(a!)
@@ -5834,7 +5823,7 @@
   device_check: NoCheck
   device_guard: False
   dispatch:
-    ZeroTensor, CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view
+    CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view
     MkldnnCPU: mkldnn_view
 
 # Warning: If you want to change the name or overload name of this

diff --git a/aten/src/ATen/templates/Functions.h b/aten/src/ATen/templates/Functions.h
@@ -221,10 +221,6 @@ inline bool is_inference(const Tensor& tensor) {
   return tensor.is_inference();
 }
 
-inline bool _is_zerotensor(const Tensor& tensor) {
-  return tensor._is_zerotensor();
-}
-
 inline bool is_conj(const Tensor& tensor) {
   return tensor.is_conj();
 }

diff --git a/c10/core/DispatchKey.cpp b/c10/core/DispatchKey.cpp
@@ -111,9 +111,6 @@ const char* toString(DispatchKey t) {
       return "AutogradPrivateUse3";
     case DispatchKey::AutogradOther:
       return "AutogradOther";
-
-    case DispatchKey::ZeroTensor:
-      return "ZeroTensor";
     case DispatchKey::BackendSelect:
       return "BackendSelect";
     case DispatchKey::Named:
@@ -152,6 +149,8 @@ const char* toString(DispatchKey t) {
     // https://github.com/zou3519/functorch
     // We plan on eventually upstreaming the prototype into core, at which
     // point it will have a different design that should use fewer keys.
+    case DispatchKey::FuncTorchPython:
+      return "FuncTorchPython";
     case DispatchKey::FuncTorchDynamicLayerBackMode:
       return "FuncTorchDynamicLayerBackMode";
     case DispatchKey::FuncTorchDynamicLayerFrontMode:
@@ -243,10 +242,10 @@ c10::DispatchKey parseDispatchKey(const std::string& k) {
       {"PrivateUse3", c10::DispatchKey::PrivateUse3},
       {"BackendSelect", c10::DispatchKey::BackendSelect},
       {"Python", c10::DispatchKey::Python},
+      {"FuncTorchPython", c10::DispatchKey::FuncTorchPython},
       {"Named", c10::DispatchKey::Named},
       {"Conjugate", c10::DispatchKey::Conjugate},
       {"Negative", c10::DispatchKey::Negative},
-      {"ZeroTensor", c10::DispatchKey::ZeroTensor},
       {"FuncTorchDynamicLayerBackMode",
        c10::DispatchKey::FuncTorchDynamicLayerBackMode},
       {"ADInplaceOrView", c10::DispatchKey::ADInplaceOrView},