[LOCAL IREE REVERT FOR UNSOLVED ISSUE] Revert "[mlir][vector] Prevent incorrect vector.transfer_{read|write} hoisting (llvm#66930)"

stellaraccident · stellaraccident · commit bc44e83290fd · 2023-10-12T13:05:48.000-07:00
This reverts commit 94c0477.
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
@@ -28,19 +28,11 @@ namespace linalg {
 ///   3. No uses of the memref either dominate the transfer_read or are
 ///   dominated by the transfer_write (i.e. no aliasing between the write and
 ///   the read across the loop)
-///   4. The source operands for vector.transfer_{read|write} do not originate
-///   from Ops implementing ViewLikeOpInterface (to reduce the risk of
-///   aliasing).
 /// To improve hoisting opportunities, call the `moveLoopInvariantCode` helper
 /// function on the candidate loop above which to hoist. Hoisting the transfers
 /// results in scf::ForOp yielding the value that originally transited through
 /// memory.
 ///
-/// TODO: To further improve hoisting opportunities, fold aliasing memref
-/// operations into respective vector.transfer{read|write} operations and
-/// avoid using ops implementing ViewLikeOpInterface as the source for transfer
-/// Ops.
-///
 /// WARNING: This hoisting does not model parallelism and is generally incorrect
 /// when used on distributed loops with memref semantics!
 void hoistRedundantVectorTransfers(func::FuncOp func);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -55,12 +55,11 @@ void mlir::linalg::hoistRedundantVectorTransfersOnTensor(func::FuncOp func) {
 static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
                                 LoopLikeOpInterface loop) {
   Value source = transferRead.getSource();
-
-  // Skip view-like Ops and retrive the actual soruce Operation
-  while (auto srcOp =
-             dyn_cast_or_null<ViewLikeOpInterface>(source.getDefiningOp()))
-    source = srcOp.getViewSource();
-
+  // Skip subview and collapse_shape Ops
+  while (auto subView = source.getDefiningOp<memref::SubViewOp>())
+    source = subView.getSource();
+  while (auto collapsed = source.getDefiningOp<memref::CollapseShapeOp>())
+    source = collapsed->getOperand(0);
   llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),
                                            source.getUsers().end());
   llvm::SmallDenseSet<Operation *, 32> processed;
@@ -69,8 +68,12 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
     // If the user has already been processed skip.
     if (!processed.insert(user).second)
       continue;
-    if (auto viewLike = dyn_cast<ViewLikeOpInterface>(user)) {
-      users.append(viewLike->getUsers().begin(), viewLike->getUsers().end());
+    if (auto subView = dyn_cast<memref::SubViewOp>(user)) {
+      users.append(subView->getUsers().begin(), subView->getUsers().end());
+      continue;
+    }
+    if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
+      users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
       continue;
     }
     if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
@@ -141,24 +144,14 @@ void mlir::linalg::hoistRedundantVectorTransfers(func::FuncOp func) {
       // Approximate aliasing by checking that:
       //   1. indices, vector type and permutation map are the same (i.e., the
       //      transfer_read/transfer_write ops are matching),
-      //   2. source operands for transfer.{read|write} do not originate from
-      //      Ops implementing ViewLikeOpInterface.
-      //   3. no other operations in the loop access the same memref except
+      //   2. no other operations in the loop access the same memref except
       //      for transfer_read/transfer_write accessing statically disjoint
       //      slices.
       if (transferRead.getIndices() != transferWrite.getIndices() ||
           transferRead.getVectorType() != transferWrite.getVectorType() ||
           transferRead.getPermutationMap() != transferWrite.getPermutationMap())
         return WalkResult::advance();
 
-      auto *source = transferRead.getSource().getDefiningOp();
-      if (source && isa_and_nonnull<ViewLikeOpInterface>(source))
-        return WalkResult::advance();
-
-      source = transferWrite.getSource().getDefiningOp();
-      if (source && isa_and_nonnull<ViewLikeOpInterface>(source))
-        return WalkResult::advance();
-
       // TODO: may want to memoize this information for performance but it
       // likely gets invalidated often.
       DominanceInfo dom(loop);
diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir
@@ -765,10 +765,10 @@ transform.sequence failures(propagate) {
 
 // CHECK-LABEL:  func.func @no_hoisting_collapse_shape
 //       CHECK:    scf.for {{.*}} {
-//       CHECK:      vector.transfer_write {{.*}} : vector<4xi32>, memref<4xi32>
-//       CHECK-NEXT:      vector.transfer_read {{.*}} : memref<1x4x1xi32>, vector<1x4x1xi32>
-//       CHECK-NEXT:      vector.transfer_write {{.*}} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
-//       CHECK-NEXT:    }
+//       CHECK:      vector.transfer_write
+//       CHECK:      vector.transfer_read
+//       CHECK:      vector.transfer_write
+//       CHECK:    }
 
 func.func @no_hoisting_collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1xi32>, %vec: vector<4xi32>) {
   %c0_i32 = arith.constant 0 : i32
@@ -827,48 +827,3 @@ transform.sequence failures(propagate) {
   transform.structured.hoist_redundant_vector_transfers %0
     : (!transform.any_op) -> !transform.any_op
 }
-
-// -----
-
-// Regression test - hoisting the following `vector.transfer_{read|write}` pair
-// would not be safe:
-//    %lhs = vector.transfer_read %collapsed_1[%c0]
-//    vector.transfer_write %op, %collapsed_1[%c0]
-// That's because the following `vector.transfer_read` reads from the same
-// memory (i.e. `%collapsed_1` and `%collapsed_2` alias):
-//    %acc = vector.transfer_read %collapsed_2[%c0]
-
-// CHECK-LABEL:  func.func @no_hoisting_write_to_memref
-//       CHECK:    scf.for {{.*}} {
-//       CHECK:      vector.transfer_read {{.*}} :  memref<2xi32>, vector<1xi32>
-//       CHECK-NEXT:      vector.transfer_read {{.*}} :  memref<2xi32>, vector<1xi32>
-//       CHECK-NEXT:      vector.outerproduct {{.*}} : vector<1xi32>, i32
-//       CHECK-NEXT:      vector.transfer_write {{.*}} : vector<1xi32>, memref<2xi32>
-//       CHECK-NEXT:    }
-
-func.func @no_hoisting_write_to_memref(%rhs: i32, %arg1: vector<1xi32>) {
-  %c0_i32 = arith.constant 0 : i32
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c20 = arith.constant 20 : index
-  %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x2xi32>
-  %cast = memref.cast %alloca : memref<1x1x2xi32> to memref<1x1x2xi32>
-  %collapsed_1 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32>
-  scf.for %_ = %c0 to %c20 step %c4 {
-    %collapsed_2 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32>
-    %lhs = vector.transfer_read %collapsed_1[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32>
-    %acc = vector.transfer_read %collapsed_2[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32>
-    %op = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<1xi32>, i32
-    vector.transfer_write %op, %collapsed_1[%c0] {in_bounds = [true]} : vector<1xi32>, memref<2xi32>
-  }
-  return
-}
-
-transform.sequence failures(propagate) {
-^bb1(%arg1: !transform.any_op):
-  %0 = transform.structured.match ops{["func.func"]} in %arg1
-    : (!transform.any_op) -> !transform.any_op
-  transform.structured.hoist_redundant_vector_transfers %0
-    : (!transform.any_op) -> !transform.any_op
-}