Skip to content

Commit

Permalink
Delete the microcontroller function after generating the npu instruct…
Browse files Browse the repository at this point in the history
…ions (#536)

This is progress towards
#518
  • Loading branch information
nirvedhmeshram authored Jul 11, 2024
1 parent 9191fa2 commit 333012d
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 25 deletions.
17 changes: 17 additions & 0 deletions compiler/plugins/target/AMD-AIE/aie/AMDAIEDmaToNpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,23 @@ struct AMDAIEDmaToNpuPass : mlir::OperationPass<DeviceOp> {
instructions.end());
device->setAttr("npu_instructions",
DenseI32ArrayAttr::get(&getContext(), signedInstructions));
// The LX instructions for the entry point function are already generated by
// the pass hence we can safely delete the function as it is of no use to
// us. A reason to do this is that otherwise it is unceseccarily lowered to
// llvm where it can have a chance to crash in case the argument list is not
// lowerable for reasons such as memref's with dynamic offsets.
auto symName = dyn_cast_or_null<StringAttr>(device->getAttr("sym_name"));
SmallVector<func::FuncOp> funcOps;
device->walk([&](func::FuncOp funcOp) {
// if the deviceOp has a symbol name attached to it we look for the funcOp
// that partically matches that symbol, if not we collect all funcOps
if (!symName ||
symName.str().find(funcOp.getSymName().str()) != std::string::npos)
funcOps.push_back(funcOp);
});
// If exactly one entry point function is found we can delete it. For any
// other result we do not make any change.
if (funcOps.size() == 1) funcOps[0].erase();
}
};

Expand Down
10 changes: 0 additions & 10 deletions compiler/plugins/target/AMD-AIE/aie/test/aiert_insts.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,6 @@
// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK: memref.global "public" @of_toMem : memref<32xi32>
// CHECK: memref.global "public" @of_fromMem : memref<32xi32>
// CHECK: func.func @sequence(%[[ARG0:.*]]: memref<4x2x8xi32>, %[[ARG1:.*]]: memref<32xi32>, %[[ARG2:.*]]: memref<64xi32>) {
// CHECK: %[[C0_I64:.*]] = arith.constant 0 : i64
// CHECK: %[[C1_I64:.*]] = arith.constant 1 : i64
// CHECK: %[[C2_I64:.*]] = arith.constant 2 : i64
// CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64
// CHECK: %[[C8_I64:.*]] = arith.constant 8 : i64
// CHECK: %[[C16_I64:.*]] = arith.constant 16 : i64
// CHECK: %[[C32_I64:.*]] = arith.constant 32 : i64
// CHECK: return
// CHECK: }
// CHECK: aie.shim_dma_allocation @of_fromMem(MM2S, 0, 0)
// CHECK: aie.shim_dma_allocation @of_toMem(S2MM, 0, 0)
// CHECK: } {npu_instructions = array<i32: 100860160, 261, 6, 256, 1, 0, 118816, 48, 32, 0, 0, 0, -2147483648, 0, 0, 33554432, 129, 48, 0, 0, 0, 0, 118820, 0, 2, 0, 0, 0, 0, 0, 119300, 0, -2147483647, 24, 1, 0, 118784, 48, 32, 128, 0, 8388608, -2145386489, 15, 0, 33554432, 129, 48, 0, 0, 0, 0, 118788, 0, 0, 0, 128, 0, 0, 0, 119316, 0, 0, 24>}
Expand Down
60 changes: 51 additions & 9 deletions compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK: memref.global "public" @toMem : memref<16xi32>
// CHECK: memref.global "public" @fromMem : memref<16xi32>
// CHECK: func.func @dma_memcpy_nd_0(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) {
// CHECK: return
// CHECK: }
// CHECK: aie.shim_dma_allocation @fromMem(MM2S, 0, 0)
// CHECK: aie.shim_dma_allocation @toMem(S2MM, 0, 0)
// CHECK: } {npu_instructions = array<i32: 100860160, 261, 6, 256, 1, 0, 118816, 48, 256, 0, 0, 16777216, -2147483585, 0, 0, 33554432, 129, 48, 0, 0, 0, 0, 118820, 0, 0, 0, 0, 0, 0, 0, 119300, 0, -2147483647, 24, 1, 0, 118784, 48, 256, 64, 0, 16777216, -2147483585, 0, 0, 33554432, 129, 48, 0, 0, 0, 0, 118788, 0, 1, 0, 64, 0, 0, 0, 119316, 0, 0, 24>}
Expand All @@ -29,9 +26,6 @@ module {

// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK: memref.global "public" @toMem : memref<16xi32>
// CHECK: func.func @dma_wait_s2mm(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) {
// CHECK: return
// CHECK: }
// CHECK: aie.shim_dma_allocation @toMem(S2MM, 0, 0)
// CHECK: } {npu_instructions = array<i32: 100860160, 261, 4, 152, 1, 0, 118816, 48, 256, 0, 0, 16777216, -2147483585, 0, 0, 33554432, 129, 48, 0, 0, 0, 0, 118820, 0, 0, 0, 0, 0, 0, 0, 119300, 0, -2147483647, 24, 128, 16, 0, 65792>}

Expand All @@ -51,9 +45,6 @@ module {

// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK: memref.global "public" @toMem : memref<16xi32>
// CHECK: func.func @dma_wait_mm2s(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) {
// CHECK: return
// CHECK: }
// CHECK: aie.shim_dma_allocation @toMem(MM2S, 1, 1)
// CHECK: } {npu_instructions = array<i32: 100860160, 261, 4, 152, 1, 0, 33673248, 48, 256, 0, 0, 16777216, -2147483585, 0, 0, 33554432, 129, 48, 0, 0, 0, 0, 33673252, 0, 0, 0, 0, 0, 0, 0, 33673756, 0, 1, 24, 128, 16, 65537, 16843008>}

Expand All @@ -68,3 +59,54 @@ module {
aie.shim_dma_allocation @toMem (MM2S, 1, 1)
}
}

// -----

// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK: memref.global "public" @toMem : memref<16xi32>
// CHECK: func.func @pretend_microkernel
// CHECK-NOT: func.func @explicit_sym_name
// CHECK: aie.shim_dma_allocation @toMem(MM2S, 1, 1)
// CHECK: } {npu_instructions = array<i32: 100860160, 261, 4, 152, 1, 0, 33673248, 48, 256, 0, 0, 16777216, -2147483585, 0, 0, 33554432, 129, 48, 0, 0, 0, 0, 33673252, 0, 0, 0, 0, 0, 0, 0, 33673756, 0, 1, 24, 128, 16, 65537, 16843008>, sym_name = "explicit_sym_name_0"}

module {
aie.device(npu1_4col) {
memref.global "public" @toMem : memref<16xi32>
func.func @pretend_microkernel(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
return
}

func.func @explicit_sym_name(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_wait {symbol = @toMem}
return
}
aie.shim_dma_allocation @toMem (MM2S, 1, 1)
} {sym_name = "explicit_sym_name_0"}
}

// -----

// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK: memref.global "public" @toMem : memref<16xi32>
// CHECK: func.func @pretend_microkernel
// CHECK: func.func @explicit_sym_name
// CHECK: aie.shim_dma_allocation @toMem(MM2S, 1, 1)
// CHECK: } {npu_instructions = array<i32: 100860160, 261, 4, 152, 1, 0, 33673248, 48, 256, 0, 0, 16777216, -2147483585, 0, 0, 33554432, 129, 48, 0, 0, 0, 0, 33673252, 0, 0, 0, 0, 0, 0, 0, 33673756, 0, 1, 24, 128, 16, 65537, 16843008>, sym_name = "wrong_sym_name"}

module {
aie.device(npu1_4col) {
memref.global "public" @toMem : memref<16xi32>
func.func @pretend_microkernel(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
return
}

func.func @explicit_sym_name(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_wait {symbol = @toMem}
return
}
aie.shim_dma_allocation @toMem (MM2S, 1, 1)
} {sym_name = "wrong_sym_name"}
}

Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK: memref.global "public" @toMem : memref<16xi32>
// CHECK: memref.global "public" @fromMem : memref<16xi32>
// CHECK: func.func @test1(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) {
// CHECK: return
// CHECK: }
// CHECK: aie.shim_dma_allocation @fromMem(MM2S, 0, 0)
// CHECK: aie.shim_dma_allocation @toMem(S2MM, 0, 0)
// CHECK: } {npu_instructions = array<i32: 100860160, 261, 6, 256, 1, 0, 118816, 48, 256, 0, 0, 16777216, -2147483585, 0, 0, 33554432, 129, 48, 0, 0, 0, 0, 118820, 0, 0, 0, 0, 0, 0, 0, 119300, 0, -2147483647, 24, 1, 0, 118784, 48, 256, 64, 0, 16777216, -2147483585, 0, 0, 33554432, 129, 48, 0, 0, 0, 0, 118788, 0, 1, 0, 64, 0, 0, 0, 119316, 0, 0, 24>}
Expand Down
3 changes: 0 additions & 3 deletions compiler/plugins/target/AMD-AIE/aie/test/push_to_queue.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
// RUN: iree-opt --amdaie-dma-to-npu %s | FileCheck %s

// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK: func.func @sequence() {
// CHECK: return
// CHECK: }
// CHECK: } {npu_instructions = array<i32: 100860160, 261, 2, 64, 0, 0, 119308, 0, -2147483645, 24, 0, 0, 67228180, 0, 196610, 24>}

module {
Expand Down

0 comments on commit 333012d

Please sign in to comment.