From 4d16a3d9b3193888ab7857d73bf3ddb5dfc7f64b Mon Sep 17 00:00:00 2001 From: Ruizhe Zhao Date: Tue, 19 Oct 2021 18:27:19 +0100 Subject: [PATCH] [ExtractScopStmt] add more tests and fixed bugs (#103) * [Pass][ExtractScopStmt] internal index_cast * Added test cases * Temporarily added aes.mlir as a dummy test case * [ExtractScopStmt] fixed issues with write op discovery * [ExtractScopStmt] make sure dom store use same mem * Removed unwanted tests --- lib/Transforms/ExtractScopStmt.cc | 40 +++++++++----- .../ExtractScopStmt/no-loop-blockarg.mlir | 13 +++++ .../ExtractScopStmt/no-loop-non-affine.mlir | 14 +++++ test/polymer-opt/ExtractScopStmt/no-loop.mlir | 14 +++++ .../scratchpad-dom-store-diff-mem.mlir | 14 +++++ .../scratchpad-dom-store-same-mem.mlir | 23 ++++++++ .../ExtractScopStmt/scratchpad-dom-store.mlir | 51 ++++++++++++++++++ test/polymer-opt/Reg2Mem/aes.mlir | 52 +++++++++++++++++++ 8 files changed, 209 insertions(+), 12 deletions(-) create mode 100644 test/polymer-opt/ExtractScopStmt/no-loop-blockarg.mlir create mode 100644 test/polymer-opt/ExtractScopStmt/no-loop-non-affine.mlir create mode 100644 test/polymer-opt/ExtractScopStmt/no-loop.mlir create mode 100644 test/polymer-opt/ExtractScopStmt/scratchpad-dom-store-diff-mem.mlir create mode 100644 test/polymer-opt/ExtractScopStmt/scratchpad-dom-store-same-mem.mlir create mode 100644 test/polymer-opt/ExtractScopStmt/scratchpad-dom-store.mlir create mode 100644 test/polymer-opt/Reg2Mem/aes.mlir diff --git a/lib/Transforms/ExtractScopStmt.cc b/lib/Transforms/ExtractScopStmt.cc index 3625b245..3f0687db 100644 --- a/lib/Transforms/ExtractScopStmt.cc +++ b/lib/Transforms/ExtractScopStmt.cc @@ -44,16 +44,10 @@ using CalleeToCallersMap = /// TODO: support CallOp. static void discoverMemWriteOps(mlir::FuncOp f, SmallVectorImpl &ops) { - bool hasAffineScope = false; f.getOperation()->walk([&](Operation *op) { - if (isa(op)) - hasAffineScope = true; - if (isa(op)) + if (isa(op)) ops.push_back(op); }); - - if (!hasAffineScope) - ops.clear(); } /// Returns the newly created scratchpad. @@ -126,6 +120,15 @@ insertScratchpadForInterprocUses(mlir::Operation *defOp, return memref; } +static Value getMemRef(Operation *op) { + if (isa(op)) + return op->getOperand(0); + if (isa(op)) + return op->getOperand(1); + + return nullptr; +} + /// Check is there any load in the use-def chains of op loads from a memref that /// is later updated by a store op that dominates the current op. We should use /// a proper RAW checker for this purpose. @@ -145,14 +148,21 @@ static bool isUpdatedByDominatingStore(Operation *op, Operation *domOp, while (!worklist.empty()) { Operation *currOp = worklist.pop_back_val(); - if (mlir::AffineLoadOp loadOp = dyn_cast(currOp)) { - Value memref = loadOp.memref(); + if (Value memref = getMemRef(currOp)) for (Operation *userOp : memref.getUsers()) - if (mlir::AffineStoreOp storeOp = dyn_cast(userOp)) - if (dom.dominates(storeOp, domOp)) + // Both affine.store and memref.store should be counted. + if (isa(userOp)) + if (memref == getMemRef(userOp) && userOp != domOp && + dom.dominates(userOp, domOp)) { + LLVM_DEBUG(dbgs() + << "The load op:\n\t" << (*currOp) + << "\nThe store op:\n\t" << (*userOp) + << "\naccess to the same memref:\n\t" << memref + << "\nand the store is dominating the final write:\n\t" + << (*domOp)); return true; - } + } for (mlir::Value operand : currOp->getOperands()) if (Operation *defOp = operand.getDefiningOp()) { @@ -385,6 +395,12 @@ static unsigned extractScopStmt(mlir::FuncOp f, unsigned numCallees, SmallVector writeOps; discoverMemWriteOps(f, writeOps); + LLVM_DEBUG({ + dbgs() << "Discovered memref write ops:\n"; + for (Operation *op : writeOps) + op->dump(); + }); + llvm::SetVector opsToRemove; // Map from an op in the original funcOp to which callee it would belong to. OpToCalleeMap opToCallee; diff --git a/test/polymer-opt/ExtractScopStmt/no-loop-blockarg.mlir b/test/polymer-opt/ExtractScopStmt/no-loop-blockarg.mlir new file mode 100644 index 00000000..67346a06 --- /dev/null +++ b/test/polymer-opt/ExtractScopStmt/no-loop-blockarg.mlir @@ -0,0 +1,13 @@ +// RUN: polymer-opt %s -extract-scop-stmt | FileCheck %s + +func @no_loop_blockarg(%A: memref<1xf32>, %a: f32) { + affine.store %a, %A[0] : memref<1xf32> + return +} + +// CHECK: func private @S0(%[[a:.*]]: f32, %[[A:.*]]: memref<1xf32>) attributes {scop.stmt} +// CHECK-NEXT: affine.store %[[a]], %[[A]][0] + +// CHECK: func @no_loop_blockarg(%[[A:.*]]: memref<1xf32>, %[[a:.*]]: f32) +// CHECK-NEXT: call @S0(%[[a]], %[[A]]) : (f32, memref<1xf32>) -> () + diff --git a/test/polymer-opt/ExtractScopStmt/no-loop-non-affine.mlir b/test/polymer-opt/ExtractScopStmt/no-loop-non-affine.mlir new file mode 100644 index 00000000..7e728092 --- /dev/null +++ b/test/polymer-opt/ExtractScopStmt/no-loop-non-affine.mlir @@ -0,0 +1,14 @@ +// RUN: polymer-opt %s -extract-scop-stmt | FileCheck %s + +func @foo(%A: memref<1xf32>) { + %0 = arith.constant 1.23 : f32 + %c0 = arith.constant 0 : index + memref.store %0, %A[%c0] : memref<1xf32> + return +} + +// CHECK-LABEL: func @foo +// CHECK-NEXT: %{{.*}} = arith.constant +// CHECK-NEXT: %{{.*}} = arith.constant +// CHECK-NEXT: memref.store +// CHECK-NEXT: return diff --git a/test/polymer-opt/ExtractScopStmt/no-loop.mlir b/test/polymer-opt/ExtractScopStmt/no-loop.mlir new file mode 100644 index 00000000..c39d8118 --- /dev/null +++ b/test/polymer-opt/ExtractScopStmt/no-loop.mlir @@ -0,0 +1,14 @@ +// RUN: polymer-opt %s -extract-scop-stmt | FileCheck %s + +func @no_loop(%A: memref<1xf32>) { + %0 = arith.constant 1.23 : f32 + affine.store %0, %A[0] : memref<1xf32> + return +} + +// CHECK: func private @S0(%[[A:.*]]: memref<1xf32>) attributes {scop.stmt} +// CHECK-NEXT: %[[CST:.*]] = arith.constant 1.23 +// CHECK-NEXT: affine.store %[[CST]], %[[A]][0] + +// CHECK: func @no_loop(%[[A:.*]]: memref<1xf32>) +// CHECK-NEXT: call @S0(%[[A]]) : (memref<1xf32>) -> () diff --git a/test/polymer-opt/ExtractScopStmt/scratchpad-dom-store-diff-mem.mlir b/test/polymer-opt/ExtractScopStmt/scratchpad-dom-store-diff-mem.mlir new file mode 100644 index 00000000..2da95601 --- /dev/null +++ b/test/polymer-opt/ExtractScopStmt/scratchpad-dom-store-diff-mem.mlir @@ -0,0 +1,14 @@ +// RUN: polymer-opt %s -extract-scop-stmt | FileCheck %s + +// There should be no scratchpad inserted. +func @foo(%A: memref<1xf32>, %B: memref<1xf32>) { + %c0 = arith.constant 0 : index + %0 = affine.load %A[0] : memref<1xf32> + affine.store %0, %B[0] : memref<1xf32> + affine.store %0, %A[0] : memref<1xf32> + return +} + +// CHECK: func @foo(%[[A:.*]]: memref<1xf32>, %[[B:.*]]: memref<1xf32>) +// CHECK-NEXT: call @S0(%[[B]], %[[A]]) +// CHECK-NEXT: call @S1(%[[A]]) diff --git a/test/polymer-opt/ExtractScopStmt/scratchpad-dom-store-same-mem.mlir b/test/polymer-opt/ExtractScopStmt/scratchpad-dom-store-same-mem.mlir new file mode 100644 index 00000000..e2d16196 --- /dev/null +++ b/test/polymer-opt/ExtractScopStmt/scratchpad-dom-store-same-mem.mlir @@ -0,0 +1,23 @@ +// RUN: polymer-opt %s -extract-scop-stmt | FileCheck %s + +func @foo(%A: memref<1xf32>) { + %c0 = arith.constant 0 : index + %0 = affine.load %A[0] : memref<1xf32> + affine.store %0, %A[0] : memref<1xf32> + affine.store %0, %A[0] : memref<1xf32> + return +} + +// CHECK: func private @S0(%[[A:.*]]: memref<1xf32>, %[[SCRATCHPAD:.*]]: memref<1xf32>) +// CHECK-NEXT: %[[VAL0:.*]] = affine.load %[[A]][0] +// CHECK-NEXT: affine.store %[[VAL0]], %[[SCRATCHPAD]][0] +// CHECK-NEXT: affine.store %[[VAL0]], %[[A]][0] + +// CHECK: func private @S1(%[[A:.*]]: memref<1xf32>, %[[SCRATCHPAD:.*]]: memref<1xf32>) +// CHECK-NEXT: %[[VAL0:.*]] = affine.load %[[SCRATCHPAD]][0] +// CHECK-NEXT: affine.store %[[VAL0]], %[[A]][0] + +// CHECK: func @foo(%[[A:.*]]: memref<1xf32>) +// CHECK-NEXT: %[[SCRATCHPAD:.*]] = memref.alloca() : memref<1xf32> +// CHECK-NEXT: call @S0(%[[A]], %[[SCRATCHPAD]]) +// CHECK-NEXT: call @S1(%[[A]], %[[SCRATCHPAD]]) diff --git a/test/polymer-opt/ExtractScopStmt/scratchpad-dom-store.mlir b/test/polymer-opt/ExtractScopStmt/scratchpad-dom-store.mlir new file mode 100644 index 00000000..f1a2384b --- /dev/null +++ b/test/polymer-opt/ExtractScopStmt/scratchpad-dom-store.mlir @@ -0,0 +1,51 @@ +// RUN: polymer-opt %s -extract-scop-stmt | FileCheck %s + + +/// The data-flow of the following program. Due to the existence of the dominating-store edge, +/// we should replace the load edge on the left by a scratchpad. +/// + +/// +---load A[i] ---+ +/// | | | +/// | | | +/// | v dominating +/// | mulf | +/// replace ---> | | | +/// | | v +/// | +----->store A[i] +/// | | + +/// | | dominating +/// | v | +/// +-->addf <-------+ +/// | +/// +----->store A[i] + +func @foo(%A: memref<10xf32>) { + affine.for %i = 0 to 10 { + %0 = affine.load %A[%i] : memref<10xf32> + %1 = arith.mulf %0, %0 : f32 + affine.store %1, %A[%i] : memref<10xf32> + // Should replace %0 by a load from a scratchpad. + %2 = arith.addf %1, %0 : f32 + affine.store %2, %A[%i] : memref<10xf32> + } + return +} + +// CHECK: func private @S0(%[[ARG0:.*]]: memref<10xf32>, %[[ARG1:.*]]: index, %[[ARG2:.*]]: memref<1xf32>) attributes {scop.stmt} +// CHECK: %[[VAL0:.*]] = affine.load %[[ARG0]][symbol(%[[ARG1]])] +// CHECK: affine.store %[[VAL0]], %[[ARG2]][0] +// CHECK: %[[VAL1:.*]] = arith.mulf %[[VAL0]], %[[VAL0]] +// CHECK: affine.store %[[VAL1]], %[[ARG0]][symbol(%[[ARG1]])] + +// CHECK: func private @S1(%[[ARG0:.*]]: memref<10xf32>, %[[ARG1:.*]]: index, %[[ARG2:.*]]: memref<1xf32>) attributes {scop.stmt} +// CHECK: %[[VAL0:.*]] = affine.load %[[ARG0]][symbol(%[[ARG1]])] +// CHECK: %[[VAL1:.*]] = affine.load %[[ARG2]][0] +// CHECK: %[[VAL2:.*]] = arith.addf %[[VAL0]], %[[VAL1]] +// CHECK: affine.store %[[VAL2]], %[[ARG0]][symbol(%[[ARG1]])] + +// CHECK: func @foo(%[[ARG0:.*]]: memref<10xf32>) +// CHECK: %[[VAL0:.*]] = memref.alloca() +// CHECK: affine.for %[[ARG1:.*]] = 0 to 10 +// CHECK: call @S0(%[[ARG0]], %[[ARG1]], %[[VAL0]]) +// CHECK: call @S1(%[[ARG0]], %[[ARG1]], %[[VAL0]]) diff --git a/test/polymer-opt/Reg2Mem/aes.mlir b/test/polymer-opt/Reg2Mem/aes.mlir new file mode 100644 index 00000000..95bfa3e0 --- /dev/null +++ b/test/polymer-opt/Reg2Mem/aes.mlir @@ -0,0 +1,52 @@ +// RUN: exit 0 + +func @encrypt(%arg0: memref, %arg1: memref) attributes {llvm.linkage = #llvm.linkage} { + %c1_i32 = constant 1 : i32 + %c4_i32 = constant 4 : i32 + %c15_i32 = constant 15 : i32 + %c8_i32 = constant 8 : i32 + %c283_i32 = constant 283 : i32 + %0 = memref.alloca() : memref<1024xi32> + affine.for %arg2 = 1 to 5 { + affine.for %arg3 = 0 to 16 { + %1 = affine.load %arg1[%arg3 * 4] : memref + %2 = shift_right_signed %1, %c4_i32 : i32 + %3 = index_cast %2 : i32 to index + %4 = and %1, %c15_i32 : i32 + %5 = index_cast %4 : i32 to index + %6 = memref.load %arg0[%3, %5] : memref + affine.store %6, %arg1[%arg3 * 4] : memref + } + affine.for %arg3 = 0 to 1023 { + %1 = affine.load %arg1[%arg3] : memref + %2 = shift_left %1, %c1_i32 : i32 + affine.store %2, %0[%arg3] : memref<1024xi32> + %3 = shift_right_signed %2, %c8_i32 : i32 + %4 = cmpi eq, %3, %c1_i32 : i32 + scf.if %4 { + %10 = xor %2, %c283_i32 : i32 + affine.store %10, %0[%arg3] : memref<1024xi32> + } + %5 = affine.load %arg1[%arg3 + 1] : memref + %6 = shift_left %5, %c1_i32 : i32 + %7 = xor %5, %6 : i32 + %8 = shift_right_signed %7, %c8_i32 : i32 + %9 = cmpi eq, %8, %c1_i32 : i32 + scf.if %9 { + %10 = xor %7, %c283_i32 : i32 + %11 = affine.load %0[%arg3] : memref<1024xi32> + %12 = xor %11, %10 : i32 + affine.store %12, %0[%arg3] : memref<1024xi32> + } else { + %10 = affine.load %0[%arg3] : memref<1024xi32> + %11 = xor %10, %7 : i32 + affine.store %11, %0[%arg3] : memref<1024xi32> + } + } + affine.for %arg3 = 0 to 1024 { + %1 = affine.load %0[%arg3] : memref<1024xi32> + affine.store %1, %arg1[%arg3] : memref + } + } + return +}