Fold slice+copy_ into index_put_

gpetters94 · gpetters94 · commit b2ae471ce103 · 2023-03-06T13:50:57.000-05:00
diff --git a/e2e_testing/xfail_sets.py b/e2e_testing/xfail_sets.py
@@ -837,6 +837,7 @@
     "DropoutTrainModule_basic",
     "StdCorrectionKeepDimModule_basic",
     "StdCorrectionNoneModule_basic",
+    "SliceCopy_Module_basic",
     "VarBiasedModule_basic",
     "VarCorrectionAllDimReduceModule_basic",
     "VarCorrectionEmptyDimModule_basic",
diff --git a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
@@ -6380,6 +6380,7 @@ def Torch_AtenCopy_Op : Torch_Op<"aten.copy_", [
       printDefaultTorchOp(printer, *this, 3, 1);
     }
   }];
+  let hasCanonicalizer = 1;
 }
 
 def Torch_Aten_ToCopyOp : Torch_Op<"aten._to_copy", [
diff --git a/lib/Dialect/Torch/IR/TorchOps.cpp b/lib/Dialect/Torch/IR/TorchOps.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "torch-mlir/Dialect/Torch/IR/TorchTypes.h"
 #include "torch-mlir/Dialect/Torch/Utils/Utils.h"
 
 #include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -2134,6 +2136,59 @@ OpFoldResult AtenCatOp::fold(FoldAdaptor adaptor) {
   return list.getElements()[0];
 }
 
+//===----------------------------------------------------------------------===//
+// AtenCopy_Op
+//===----------------------------------------------------------------------===//
+
+void AtenCopy_Op::getCanonicalizationPatterns(RewritePatternSet &patterns,
+                                              MLIRContext *context) {
+  patterns.add(+[](AtenCopy_Op op, PatternRewriter &rewriter) {
+    if (!op.getSelf().getDefiningOp() ||
+        !isa<AtenSliceTensorOp>(op.getSelf().getDefiningOp()))
+      return failure();
+    auto sliceOp = cast<AtenSliceTensorOp>(op.getSelf().getDefiningOp());
+
+    // Get indices
+    int64_t dim;
+    if (!matchPattern(sliceOp.getDim(), m_TorchConstantInt(&dim)))
+      return failure();
+    int64_t end;
+    if (!matchPattern(sliceOp.getEnd(), m_TorchConstantInt(&end)) || end < 0)
+      return failure();
+    int64_t step;
+    if (!matchPattern(sliceOp.getStep(), m_TorchConstantInt(&step)) ||
+        step != 1)
+      return failure();
+
+    Value noneVal = rewriter.create<ConstantNoneOp>(op.getLoc());
+    Value falseVal = rewriter.create<ConstantBoolOp>(op.getLoc(), false);
+
+    // Create IndexPut_Op
+    BaseTensorType tensorType = op->getResultTypes()[0].cast<BaseTensorType>();
+    Value range = rewriter.create<AtenArangeStartStepOp>(
+        op.getLoc(), tensorType, sliceOp.getStart(), sliceOp.getEnd(),
+        sliceOp.getStep(),
+        /*dtype=*/noneVal, /*layout=*/noneVal, /*device=*/noneVal,
+        /*pin_memory=*/noneVal);
+
+    SmallVector<Value> indicesVector;
+    for (auto i = 0; i < dim - 1; i++)
+      indicesVector.push_back(noneVal);
+    indicesVector.push_back(range);
+    Value indices = rewriter.create<PrimListConstructOp>(
+        op.getLoc(),
+        Torch::ListType::get(op->getContext(),
+                             Torch::OptionalType::get(tensorType)),
+        indicesVector);
+
+    rewriter.replaceOpWithNewOp<Aten_IndexPutImpl_Op>(
+        op, op->getResultTypes(), sliceOp.getSelf(), indices, op.getSrc(),
+        /*accumulate=*/falseVal, /*unsafe=*/falseVal);
+
+    return success();
+  });
+}
+
 //===----------------------------------------------------------------------===//
 // AtenSliceTensorOp
 //===----------------------------------------------------------------------===//
diff --git a/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/torch_ods_gen.py b/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/torch_ods_gen.py
@@ -234,6 +234,12 @@ def emit_with_mutating_variants(key, **kwargs):
                 emitter_td,
                 traits=["IsTrailingUnderscoreInplaceVariant"] if not is_functional_op else [])
 
+    def emit_as_mutating_variant(key, **kwargs):
+        emit_op(registry[key],
+                emitter_td,
+                traits=["IsTrailingUnderscoreInplaceVariant"],
+                **kwargs)
+
     # ==========================================================================
     # `aten::` namespace.
     # ==========================================================================
@@ -461,7 +467,8 @@ def emit_with_mutating_variants(key, **kwargs):
     emit("aten::clone : (Tensor, int?) -> (Tensor)")
     emit("aten::lift_fresh_copy : (Tensor) -> (Tensor)")
     emit("aten::contiguous : (Tensor, int) -> (Tensor)")
-    emit_with_mutating_variants("aten::copy : (Tensor, Tensor, bool) -> (Tensor)")
+    emit("aten::copy : (Tensor, Tensor, bool) -> (Tensor)")
+    emit_as_mutating_variant("aten::copy_ : (Tensor, Tensor, bool) -> (Tensor)", has_canonicalizer=True)
     emit("aten::_to_copy : (Tensor, int?, int?, Device?, bool?, bool, int?) -> (Tensor)")
     emit("aten::detach : (Tensor) -> (Tensor)")
     emit("aten::embedding : (Tensor, Tensor, int, bool, bool) -> (Tensor)")
diff --git a/python/torch_mlir_e2e_test/test_suite/slice_like.py b/python/torch_mlir_e2e_test/test_suite/slice_like.py
@@ -481,3 +481,25 @@ def forward(self, x):
 @register_test_case(module_factory=lambda: NarrowVerticalTest2())
 def NarrowVerticalTest2_basic(module, tu: TestUtils):
     module.forward(tu.rand(6,4))
+
+# ==============================================================================
+
+class SliceCopy_Module(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    @export
+    @annotate_args([
+        None,
+        ([10, 4, 4], torch.float32, True),
+        ([4, 4, 4], torch.float32, True),
+    ])
+    def forward(self, x, y):
+        xslice = torch.ops.aten.slice(x, 0, 2, 6, 1)
+        xslice.copy_(y)
+        return x
+
+
+@register_test_case(module_factory=lambda: SliceCopy_Module())
+def SliceCopy_Module_basic(module, tu: TestUtils):
+    module.forward(tu.rand(10, 4, 4), tu.rand(4, 4, 4))
diff --git a/test/Dialect/Torch/canonicalize.mlir b/test/Dialect/Torch/canonicalize.mlir
@@ -1838,3 +1838,22 @@ func.func @torch.aten.slice.tensor$fold_full_domain_slice(%arg0: !torch.vtensor<
   %0 = torch.aten.slice.Tensor %arg0, %int0, %int0, %int-1, %int1 : !torch.vtensor<[4], f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4], f32>
   return %0 : !torch.vtensor<[4],f32>
 }
+
+// CHECK-LABEL:   func.func @torch.aten.slice.tensor$slice_plus_copy
+// CHECK-SAME:        %[[ARG0:.+]]: !torch.vtensor<[10,4,4],f32>
+// CHECK-SAME:        %[[ARG1:.+]]: !torch.vtensor<[4,4,4],f32>
+// CHECK:             %[[SLICE:.*]] = torch.aten.slice.Tensor %[[ARG0]], %[[INT0]], %[[INT2]], %[[INT6]], %[[INT1]] : !torch.vtensor<[10,4,4],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,4,4],f32>
+// CHECK:             %[[ARANGE:.*]] = torch.aten.arange.start_step %[[INT2]], %[[INT6]], %[[INT1]], %[[NONE]], %[[NONE]], %[[NONE]], %[[NONE]] : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[4,4,4],f32>
+// CHECK:             %[[LIST:.*]] = torch.prim.ListConstruct %[[ARANGE]] : (!torch.vtensor<[4,4,4],f32>) -> !torch.list<optional<vtensor<[4,4,4],f32>>>
+// CHECK:             %[[INDEXPUT:.*]] = torch.aten._index_put_impl_ %[[ARG0]], %[[LIST]], %[[ARG1]], %[[FALSE]], %[[FALSE]] : !torch.vtensor<[10,4,4],f32>, !torch.list<optional<vtensor<[4,4,4],f32>>>, !torch.vtensor<[4,4,4],f32>, !torch.bool, !torch.bool -> !torch.vtensor<[4,4,4],f32>
+// CHECK:             return %[[ARG0]] : !torch.vtensor<[10,4,4],f32>
+func.func @torch.aten.slice.tensor$slice_plus_copy(%arg0: !torch.vtensor<[10,4,4],f32>, %arg1: !torch.vtensor<[4,4,4],f32>) -> !torch.vtensor<[10,4,4],f32> {
+  %false = torch.constant.bool false
+  %int0 = torch.constant.int 0
+  %int2 = torch.constant.int 2
+  %int6 = torch.constant.int 6
+  %int1 = torch.constant.int 1
+  %1 = torch.aten.slice.Tensor %arg0, %int0, %int2, %int6, %int1 : !torch.vtensor<[10,4,4],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,4,4],f32>
+  %2 = torch.aten.copy_ %1, %arg1, %false : !torch.vtensor<[4,4,4],f32>, !torch.vtensor<[4,4,4],f32>, !torch.bool -> !torch.vtensor<[4,4,4],f32>
+  return %arg0 : !torch.vtensor<[10,4,4],f32>
+}

Original file line number	Diff line number	Diff line change
`@@ -6380,6 +6380,7 @@ def Torch_AtenCopy_Op : Torch_Op<"aten.copy_", [`
`6380`	`6380`	`printDefaultTorchOp(printer, *this, 3, 1);`
`6381`	`6381`	`}`
`6382`	`6382`	`}];`
	`6383`	`+ let hasCanonicalizer = 1;`
`6383`	`6384`	`}`
`6384`	`6385`
`6385`	`6386`	`def Torch_Aten_ToCopyOp : Torch_Op<"aten._to_copy", [`