Fix symint ops and blacklist lift_fresh_copy (llvm#1373)

* Add symint to native functions yaml * Re-enable LTC * Fix new_empty_strided and narrow_copy
Abhishek-TyRnT · Sep 20, 2022 · 8967463 · 8967463
1 parent 4f3cd23
commit 8967463
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 13 deletions.
diff --git a/build_tools/autogen_ltc_backend.py b/build_tools/autogen_ltc_backend.py
@@ -264,6 +264,9 @@ def get_opnames(ops):
         # Additional ops to support that are not supported by Torch-MLIR explicitly
         supported |= set(config.get("additional_ops", []))
 
+        # List of ops that will take in symints for its size
+        symint = set(config.get("symint", []))
+
         self.ops = sorted(ops)
 
         with self.source_yaml.open("w") as f:
@@ -272,6 +275,7 @@ def get_opnames(ops):
                 "cpp_namespace": "torch::lazy",
                 "full_codegen": self.ops,
                 "supported": sorted(supported),
+                "symint": sorted(symint),
                 "non_native": non_native,
             }
             yaml.dump(source_yaml, f, default_flow_style=False)

diff --git a/build_tools/autogen_ltc_backend.yaml b/build_tools/autogen_ltc_backend.yaml
@@ -16,6 +16,7 @@ blacklist:
 - copy_
 
 # Disabled for consistency with TS backend
+- lift_fresh_copy
 - new_empty
 - rsub
 - slice.Tensor  # Disabled in favour of slice_copy.Tensor
@@ -60,6 +61,7 @@ supported:
 # but their implementations call view operators (which we need to functionalize away).
 - block_diag
 - new_empty_strided
+- narrow_copy
 - pixel_shuffle
 - pixel_unshuffle
 - select_backward
@@ -69,6 +71,16 @@ supported:
 - linalg_pinv.atol_rtol_tensor
 - logsumexp.out
 
+# List of ops that will take in symints for the size instead of ints
+symint:
+- empty.memory_format
+- new_empty_strided
+- expand
+- expand_copy
+- narrow_copy
+- view
+- view_copy
+
 
 additional_ops:
 # Additional ops to support that are not supported by Torch-MLIR explicitly

diff --git a/build_tools/python_deploy/build_linux_packages.sh b/build_tools/python_deploy/build_linux_packages.sh
@@ -177,7 +177,7 @@ function build_in_tree() {
       -DLLVM_EXTERNAL_TORCH_MLIR_DIALECTS_SOURCE_DIR="/main_checkout/torch-mlir/externals/llvm-external-projects/torch-mlir-dialects" \
       -DLLVM_TARGETS_TO_BUILD=host \
       -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
-      -DTORCH_MLIR_ENABLE_LTC=OFF \
+      -DTORCH_MLIR_ENABLE_LTC=ON \
       -DTORCH_MLIR_USE_INSTALLED_PYTORCH="$torch_from_src" \
       -DPython3_EXECUTABLE="$(which python3)" \
       /main_checkout/torch-mlir/externals/llvm-project/llvm
@@ -289,7 +289,7 @@ function build_out_of_tree() {
       -DLLVM_DIR="/main_checkout/torch-mlir/llvm-build/lib/cmake/llvm/" \
       -DMLIR_DIR="/main_checkout/torch-mlir/llvm-build/lib/cmake/mlir/" \
       -DMLIR_ENABLE_BINDINGS_PYTHON=OFF \
-      -DTORCH_MLIR_ENABLE_LTC=OFF \
+      -DTORCH_MLIR_ENABLE_LTC=ON \
       -DTORCH_MLIR_USE_INSTALLED_PYTORCH="$torch_from_src" \
       -DPython3_EXECUTABLE="$(which python3)" \
       /main_checkout/torch-mlir

diff --git a/python/torch_mlir/csrc/base_lazy_backend/mlir_native_functions.cpp b/python/torch_mlir/csrc/base_lazy_backend/mlir_native_functions.cpp
@@ -301,9 +301,11 @@ at::Tensor LazyNativeFunctions::_to_copy(
   }
 };
 
-at::Tensor LazyNativeFunctions::empty(
-    at::SymIntArrayRef sym_size, c10::optional<at::ScalarType> dtype,
-    c10::optional<at::Layout> layout, c10::optional<at::Device> device,
+at::Tensor LazyNativeFunctions::empty_symint(
+    at::SymIntArrayRef sym_size,
+    c10::optional<at::ScalarType> dtype,
+    c10::optional<at::Layout> layout,
+    c10::optional<at::Device> device,
     c10::optional<bool> pin_memory,
     c10::optional<at::MemoryFormat> memory_format) {
   // TODO: support this directly
@@ -333,8 +335,8 @@ at::Tensor LazyNativeFunctions::empty_strided(
     c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout,
     c10::optional<at::Device> device, c10::optional<bool> pin_memory) {
   TORCH_LAZY_FN_COUNTER("lazy::");
-  at::Tensor t = empty(
-    c10::SymIntArrayRef::fromIntArrayRef(size),
+  at::Tensor t = empty_symint(
+    c10::fromIntArrayRef(size),
     dtype, layout, device, pin_memory, c10::nullopt);
   return t.as_strided(size, stride, /*storage_offset=*/0);
 }
@@ -354,7 +356,7 @@ LazyNativeFunctions::fill_(at::Tensor& self, const at::Scalar& value) {
 at::Tensor LazyNativeFunctions::_unsafe_view(
     const at::Tensor& self, at::IntArrayRef size) {
   TORCH_LAZY_FN_COUNTER("lazy::");
-  return LazyNativeFunctions::view_copy(self, c10::SymIntArrayRef::fromIntArrayRef(size));
+  return LazyNativeFunctions::view_copy_symint(self, c10::fromIntArrayRef(size));
 }
 
 // This is needed by the torch.tensor constructor.
@@ -380,15 +382,27 @@ at::Tensor LazyNativeFunctions::block_diag(at::TensorList tensors) {
   return at::functionalization::functionalize_aten_op<ATEN_OP(
       block_diag)>::call(tensors);
 }
-at::Tensor LazyNativeFunctions::new_empty_strided(
-    const at::Tensor& self, at::IntArrayRef size, at::IntArrayRef stride,
-    c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout,
-    c10::optional<at::Device> device, c10::optional<bool> pin_memory) {
+at::Tensor LazyNativeFunctions::new_empty_strided_symint(
+    const at::Tensor& self,
+    c10::SymIntArrayRef size,
+    c10::SymIntArrayRef stride,
+    c10::optional<at::ScalarType> dtype,
+    c10::optional<at::Layout> layout,
+    c10::optional<at::Device> device,
+    c10::optional<bool> pin_memory) {
   return at::functionalization::
       functionalize_aten_op<ATEN_OP(new_empty_strided)>::call(
           self, size, stride, dtype, layout, device, pin_memory);
 }
 
+at::Tensor LazyNativeFunctions::narrow_copy_symint(
+    const at::Tensor& self,
+    int64_t dim,
+    c10::SymInt start,
+    c10::SymInt length) {
+  return at::functionalization::functionalize_aten_op<ATEN_OP(
+      narrow_copy)>::call(self, dim, start, length);
+}
 at::Tensor LazyNativeFunctions::pixel_shuffle(
     const at::Tensor& self, int64_t upscale_factor) {
   return at::functionalization::functionalize_aten_op<ATEN_OP(

diff --git a/setup.py b/setup.py
@@ -45,6 +45,9 @@
 
 PACKAGE_VERSION = os.environ.get("TORCH_MLIR_PYTHON_PACKAGE_VERSION") or "0.0.1"
 
+# If true, enable LTC build by default
+TORCH_MLIR_ENABLE_LTC_DEFAULT = True
+
 # Build phase discovery is unreliable. Just tell it what phases to run.
 class CustomBuild(_build):
 
@@ -68,6 +71,9 @@ def run(self):
             src_dir = os.path.abspath(os.path.dirname(__file__))
             llvm_dir = os.path.join(
                 src_dir, "externals", "llvm-project", "llvm")
+
+            enable_ltc = int(os.environ.get('TORCH_MLIR_ENABLE_LTC', TORCH_MLIR_ENABLE_LTC_DEFAULT))
+
             cmake_args = [
                 f"-DCMAKE_BUILD_TYPE=Release",
                 f"-DPython3_EXECUTABLE={sys.executable}",
@@ -82,7 +88,7 @@ def run(self):
                 f"-DCMAKE_VISIBILITY_INLINES_HIDDEN=ON",
                 f"-DCMAKE_C_VISIBILITY_PRESET=hidden",
                 f"-DCMAKE_CXX_VISIBILITY_PRESET=hidden",
-                f"-DTORCH_MLIR_ENABLE_LTC={'OFF' if int(os.environ.get('TORCH_MLIR_ENABLE_LTC', 1)) else 'OFF'}",
+                f"-DTORCH_MLIR_ENABLE_LTC={'ON' if enable_ltc else 'OFF'}",
             ]
 
             os.makedirs(cmake_build_dir, exist_ok=True)