diff --git a/compiler/plugins/target/AMD-AIE/aie/AMDAIEDmaToNpu.cpp b/compiler/plugins/target/AMD-AIE/aie/AMDAIEDmaToNpu.cpp index df1a88383..c520ce2f6 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AMDAIEDmaToNpu.cpp +++ b/compiler/plugins/target/AMD-AIE/aie/AMDAIEDmaToNpu.cpp @@ -511,6 +511,23 @@ struct AMDAIEDmaToNpuPass : mlir::OperationPass { instructions.end()); device->setAttr("npu_instructions", DenseI32ArrayAttr::get(&getContext(), signedInstructions)); + // The LX instructions for the entry point function are already generated by + // the pass hence we can safely delete the function as it is of no use to + // us. A reason to do this is that otherwise it is unceseccarily lowered to + // llvm where it can have a chance to crash in case the argument list is not + // lowerable for reasons such as memref's with dynamic offsets. + auto symName = dyn_cast_or_null(device->getAttr("sym_name")); + SmallVector funcOps; + device->walk([&](func::FuncOp funcOp) { + // if the deviceOp has a symbol name attached to it we look for the funcOp + // that partically matches that symbol, if not we collect all funcOps + if (!symName || + symName.str().find(funcOp.getSymName().str()) != std::string::npos) + funcOps.push_back(funcOp); + }); + // If exactly one entry point function is found we can delete it. For any + // other result we do not make any change. + if (funcOps.size() == 1) funcOps[0].erase(); } }; diff --git a/compiler/plugins/target/AMD-AIE/aie/test/aiert_insts.mlir b/compiler/plugins/target/AMD-AIE/aie/test/aiert_insts.mlir index c82c71f99..c39050170 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/aiert_insts.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/aiert_insts.mlir @@ -3,16 +3,6 @@ // CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @of_toMem : memref<32xi32> // CHECK: memref.global "public" @of_fromMem : memref<32xi32> -// CHECK: func.func @sequence(%[[ARG0:.*]]: memref<4x2x8xi32>, %[[ARG1:.*]]: memref<32xi32>, %[[ARG2:.*]]: memref<64xi32>) { -// CHECK: %[[C0_I64:.*]] = arith.constant 0 : i64 -// CHECK: %[[C1_I64:.*]] = arith.constant 1 : i64 -// CHECK: %[[C2_I64:.*]] = arith.constant 2 : i64 -// CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64 -// CHECK: %[[C8_I64:.*]] = arith.constant 8 : i64 -// CHECK: %[[C16_I64:.*]] = arith.constant 16 : i64 -// CHECK: %[[C32_I64:.*]] = arith.constant 32 : i64 -// CHECK: return -// CHECK: } // CHECK: aie.shim_dma_allocation @of_fromMem(MM2S, 0, 0) // CHECK: aie.shim_dma_allocation @of_toMem(S2MM, 0, 0) // CHECK: } {npu_instructions = array} diff --git a/compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu.mlir b/compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu.mlir index bf5bfc49d..c08c460fd 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu.mlir @@ -4,9 +4,6 @@ // CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @toMem : memref<16xi32> // CHECK: memref.global "public" @fromMem : memref<16xi32> -// CHECK: func.func @dma_memcpy_nd_0(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) { -// CHECK: return -// CHECK: } // CHECK: aie.shim_dma_allocation @fromMem(MM2S, 0, 0) // CHECK: aie.shim_dma_allocation @toMem(S2MM, 0, 0) // CHECK: } {npu_instructions = array} @@ -29,9 +26,6 @@ module { // CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @toMem : memref<16xi32> -// CHECK: func.func @dma_wait_s2mm(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) { -// CHECK: return -// CHECK: } // CHECK: aie.shim_dma_allocation @toMem(S2MM, 0, 0) // CHECK: } {npu_instructions = array} @@ -51,9 +45,6 @@ module { // CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @toMem : memref<16xi32> -// CHECK: func.func @dma_wait_mm2s(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) { -// CHECK: return -// CHECK: } // CHECK: aie.shim_dma_allocation @toMem(MM2S, 1, 1) // CHECK: } {npu_instructions = array} @@ -68,3 +59,54 @@ module { aie.shim_dma_allocation @toMem (MM2S, 1, 1) } } + +// ----- + +// CHECK-LABEL: aie.device(npu1_4col) { +// CHECK: memref.global "public" @toMem : memref<16xi32> +// CHECK: func.func @pretend_microkernel +// CHECK-NOT: func.func @explicit_sym_name +// CHECK: aie.shim_dma_allocation @toMem(MM2S, 1, 1) +// CHECK: } {npu_instructions = array, sym_name = "explicit_sym_name_0"} + +module { + aie.device(npu1_4col) { + memref.global "public" @toMem : memref<16xi32> + func.func @pretend_microkernel(%arg0: memref<16xi32>, %arg1: memref<16xi32>) { + return + } + + func.func @explicit_sym_name(%arg0: memref<16xi32>, %arg1: memref<16xi32>) { + aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32> + aiex.npu.dma_wait {symbol = @toMem} + return + } + aie.shim_dma_allocation @toMem (MM2S, 1, 1) + } {sym_name = "explicit_sym_name_0"} +} + +// ----- + +// CHECK-LABEL: aie.device(npu1_4col) { +// CHECK: memref.global "public" @toMem : memref<16xi32> +// CHECK: func.func @pretend_microkernel +// CHECK: func.func @explicit_sym_name +// CHECK: aie.shim_dma_allocation @toMem(MM2S, 1, 1) +// CHECK: } {npu_instructions = array, sym_name = "wrong_sym_name"} + +module { + aie.device(npu1_4col) { + memref.global "public" @toMem : memref<16xi32> + func.func @pretend_microkernel(%arg0: memref<16xi32>, %arg1: memref<16xi32>) { + return + } + + func.func @explicit_sym_name(%arg0: memref<16xi32>, %arg1: memref<16xi32>) { + aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32> + aiex.npu.dma_wait {symbol = @toMem} + return + } + aie.shim_dma_allocation @toMem (MM2S, 1, 1) + } {sym_name = "wrong_sym_name"} +} + diff --git a/compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu_issue_token.mlir b/compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu_issue_token.mlir index 1bce01866..27ca250a7 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu_issue_token.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu_issue_token.mlir @@ -4,9 +4,6 @@ // CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @toMem : memref<16xi32> // CHECK: memref.global "public" @fromMem : memref<16xi32> -// CHECK: func.func @test1(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) { -// CHECK: return -// CHECK: } // CHECK: aie.shim_dma_allocation @fromMem(MM2S, 0, 0) // CHECK: aie.shim_dma_allocation @toMem(S2MM, 0, 0) // CHECK: } {npu_instructions = array} diff --git a/compiler/plugins/target/AMD-AIE/aie/test/push_to_queue.mlir b/compiler/plugins/target/AMD-AIE/aie/test/push_to_queue.mlir index 3c20553ff..42a35f734 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/push_to_queue.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/push_to_queue.mlir @@ -2,9 +2,6 @@ // RUN: iree-opt --amdaie-dma-to-npu %s | FileCheck %s // CHECK-LABEL: aie.device(npu1_4col) { -// CHECK: func.func @sequence() { -// CHECK: return -// CHECK: } // CHECK: } {npu_instructions = array} module {