From 764d8e65917472fed9a260caa1b9422994a40370 Mon Sep 17 00:00:00 2001 From: James Newling Date: Wed, 25 Sep 2024 13:05:47 -0700 Subject: [PATCH] update --- .../Transforms/AMDAIEConvertToDma.cpp | 2 +- .../Transforms/test/convert_to_dma.mlir | 365 +++++++++--------- 2 files changed, 190 insertions(+), 177 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEConvertToDma.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEConvertToDma.cpp index 8a688c11d..4a93ee31e 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEConvertToDma.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEConvertToDma.cpp @@ -382,7 +382,7 @@ LogicalResult updateFromExpandShape(memref::ExpandShapeOp expandShapeOp, // Offsets. For now we don't do any arithmetic to split the offset across // dimensions, in theory we need to split the offset amongst the reassociation // indices, but for now I'm just putting the offset on the inner most - // dimension. + // dimension. SmallVector newOffsets(resultShape.size()); for (int i = 0; i < resultShape.size(); i++) { newOffsets[i] = getAsIndexOpFoldResult(ctx, 0); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/convert_to_dma.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/convert_to_dma.mlir index 3f4e2dea4..487c5714a 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/convert_to_dma.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/convert_to_dma.mlir @@ -16,95 +16,6 @@ func.func @basic_unitdim_pack() { return } -// ----- - -// CHECK-LABEL: collapsed_and_expanded_pack_0 -// CHECK: amdaie.dma_cpy_nd -// CHECK-SAME: [0, 0] [10, 10] [10, 1] -// CHECK-SAME: [0, 0] [10, 10] [10, 1] -func.func @collapsed_and_expanded_pack_0() { - %alloc0 = memref.alloc() : memref<10x10xf32> - %src = memref.collapse_shape %alloc0 [[0, 1]] : memref<10x10xf32> into memref<100xf32> - %alloc1 = memref.alloc() : memref<100xf32> - %dst = memref.expand_shape %alloc1 [[0, 1]] output_shape [10, 10] : memref<100xf32> into memref<10x10xf32> - iree_linalg_ext.pack %src inner_dims_pos = [0] inner_tiles = [10] into %dst : (memref<100xf32> memref<10x10xf32>) - return -} - -// ----- - - -// This test is essentially the same as the test above, except there is an expand operation on the %src. -// CHECK-LABEL: @basic_unitdim_pack_expand -// CHECK-DAG: %[[SRCMEMREF:.*]] = amdaie.logicalobjectfifo.from_memref{{.*}}memref<8x16xi32, 1> -// CHECK-DAG: %[[DSTMEMREF:.*]] = amdaie.logicalobjectfifo.from_memref{{.*}}memref<8x16xi32, 2> -// CHECK: amdaie.dma_cpy_nd -// CHECK-SAME: %[[DSTMEMREF]][0, 0, 0, 0] [1, 1, 8, 16] [128, 128, 16, 1] -// CHECK-SAME: %[[SRCMEMREF]][0, 0, 0, 0] [1, 1, 8, 16] [128, 16, 16, 1] -func.func @basic_unitdim_pack_expand() { - %src = memref.alloc() : memref<8x16xi32, 1> - %dst = memref.alloc() : memref<8x16xi32, 2> - %dst_e = memref.expand_shape %dst [[0, 1, 2], [3]] output_shape [1, 1, 8, 16] - : memref<8x16xi32, 2> into memref<1x1x8x16xi32, 2> - iree_linalg_ext.pack %src inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %dst_e: - (memref<8x16xi32, 1> memref<1x1x8x16xi32, 2>) - return -} - -// ----- - -// CHECK-LABEL: @basic_unitdim_unpack_expand -// CHECK-DAG: %[[SRCMEMREF:.*]] = amdaie.logicalobjectfifo.from_memref{{.*}}memref<8x16xi32, 1> -// CHECK-DAG: %[[DSTMEMREF:.*]] = amdaie.logicalobjectfifo.from_memref{{.*}}memref<8x16xi32, 2> -// CHECK: amdaie.dma_cpy_nd -// CHECK-SAME: %[[DSTMEMREF]][0, 0] [8, 16] [16, 1] -// CHECK-SAME: %[[SRCMEMREF]][0, 0, 0, 0] [1, 8, 1, 16] [128, 16, 128, 1] -func.func @basic_unitdim_unpack_expand() { - %src = memref.alloc() : memref<8x16xi32, 1> - %dst = memref.alloc() : memref<8x16xi32, 2> - %src_e = memref.expand_shape %src [[0, 1, 2], [3]] output_shape [1, 1, 8, 16] - : memref<8x16xi32, 1> into memref<1x1x8x16xi32, 1> - iree_linalg_ext.unpack %src_e inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %dst: - (memref<1x1x8x16xi32, 1> memref<8x16xi32, 2>) - return -} - -// ----- - -// CHECK-LABEL: multidim_with_expand -// CHECK: amdaie.dma_cpy_nd -// dst of dma cpy: -// CHECK-SAME: [0, 0, 0, 0] [20, 5, 10, 10] [500, 100, 10, 1] -// src of dma cpy: -// CHECK-SAME: [0, 0, 0, 0] [20, 5, 10, 10] [500, 10, 50, 1] -func.func @multidim_with_expand() { - %src = memref.alloc() : memref<200x50xi32, 1> - %dst = memref.alloc() : memref<100x100xi32, 2> - %dst_e = memref.expand_shape %dst [[0, 1], [2, 3]] output_shape [20, 5, 10, 10] - : memref<100x100xi32, 2> into memref<20x5x10x10xi32, 2> - iree_linalg_ext.pack %src inner_dims_pos = [0, 1] inner_tiles = [10, 10] - into %dst_e: (memref<200x50xi32, 1> memref<20x5x10x10xi32, 2>) - return -} - -// ----- - -// CHECK-LABEL: multidim_without_expand -// CHECK: amdaie.dma_cpy_nd -// dst of dma cpy: -// CHECK-SAME: [0, 0, 0, 0] [20, 5, 10, 10] [500, 100, 10, 1] -// src of dma cpy: -// CHECK-SAME: [0, 0, 0, 0] [20, 5, 10, 10] [500, 10, 50, 1] -func.func @multidim_without_expand() { - %src = memref.alloc() : memref<200x50xi32, 1> - %dst = memref.alloc() : memref<20x5x10x10xi32, 2> - iree_linalg_ext.pack %src inner_dims_pos = [0, 1] inner_tiles = [10, 10] - into %dst: (memref<200x50xi32, 1> memref<20x5x10x10xi32, 2>) - return -} - - - // ----- // CHECK-LABEL: @multidim_pack @@ -123,7 +34,6 @@ func.func @multidim_pack() { return } - // ----- // CHECK-LABEL: @permute_pack @@ -172,67 +82,6 @@ func.func @subview_pack() { // ----- -// CHECK-LABEL: @subview_then_collapse(%arg0: index) -// CHECK: %[[ALLOC0:.*]] = memref.alloc() : memref<20x10xf32> -// CHECK: %[[C10:.*]] = arith.constant 10 : index -// CHECK: %[[MULI:.*]] = arith.muli %arg0, %[[C10]] : index -// CHECK: amdaie.dma_cpy_nd -// CHECK-SAME: [0, 0] [5, 20] [20, 1] -// CHECK-SAME: [0, %[[MULI]]] [5, 20] [20, 1] -func.func @subview_then_collapse(%arg0 : index) { - %src = memref.alloc() : memref<20x10xf32> - %subview = memref.subview %src[%arg0, 0] [10, 10] [1, 1] : - memref<20x10xf32> to memref<10x10xf32, strided<[10, 1], offset: ?>> - %collapsed = memref.collapse_shape %subview [[0, 1]] : memref<10x10xf32, strided<[10, 1], offset: ?>> - into memref<100xf32, strided<[1], offset: ?>> - %dst = memref.alloc() : memref<5x20xf32> - iree_linalg_ext.pack %collapsed inner_dims_pos = [0] inner_tiles = [20] into %dst - : (memref<100xf32, strided<[1], offset: ?>> memref<5x20xf32>) - return -} - -// ----- - -// CHECK-LABEL: @expand_shape_after_subview -// CHECK: amdaie.dma_cpy_nd -// CHECK-SAME: [0, 0, %arg0, 0, 0] [2, 3, 6, 6, 1] [300, 100, 10, 1, 1] -// CHECK-SAME: [0, 0, 0, 0, 0] [2, 3, 6, 6, 1] [108, 6, 1, 18, 6] -module { - func.func @expand_shape_after_subview(%arg0: index) { - %alloc = memref.alloc() : memref<10x10x10xf32> - %subview = memref.subview %alloc[0, %arg0, 0] [6, 6, 6] [1, 1, 1] : - memref<10x10x10xf32> to memref<6x6x6xf32, strided<[100, 10, 1], offset: ?>> - %expand_shape = memref.expand_shape %subview [[0, 1], [2], [3, 4]] - output_shape [2, 3, 6, 6, 1] : memref<6x6x6xf32, strided<[100, 10, 1], offset: ?>> - into memref<2x3x6x6x1xf32, strided<[300, 100, 10, 1, 1], offset: ?>> - %alloc_0 = memref.alloc() : memref<12x3x6xf32> - iree_linalg_ext.pack %alloc_0 inner_dims_pos = [0, 1] inner_tiles = [6, 1] - into %expand_shape : (memref<12x3x6xf32> memref<2x3x6x6x1xf32, strided<[300, 100, 10, 1, 1], offset: ?>>) - return - } -} - -// ----- - -// CHECK-LABEL: @subview_followed_by_subview(%arg0: index, %arg1: index) -// CHECK: %[[SUM:.*]] = arith.addi %arg0, %arg1 : index -// CHECK: amdaie.dma_cpy_nd -// CHECK-SAME: [0] [100] [1], -// CHECK-SAME: [%[[SUM]], 5] [10, 10] [20, 1] -func.func @subview_followed_by_subview(%arg0 : index, %arg1 : index){ - %src = memref.alloc() : memref<20x20xf32> - %subview0 = memref.subview %src[%arg0, 2] [15, 15] [1, 1] : - memref<20x20xf32> to memref<15x15xf32, strided<[20, 1], offset: ?>> - %subview1 = memref.subview %subview0[%arg1, 3] [10, 10] [1, 1] : - memref<15x15xf32, strided<[20, 1], offset: ?>> to memref<10x10xf32, strided<[20, 1], offset: ?>> - %dst = memref.alloc() : memref<100xf32> - iree_linalg_ext.unpack %subview1 inner_dims_pos = [0] inner_tiles = [10] into %dst - : (memref<10x10xf32, strided<[20, 1], offset: ?>> memref<100xf32>) - return -} - -// ----- - // CHECK-LABEL: @collapsing_subview_pack // CHECK: %[[SRC_LOFI:.*]] = amdaie.logicalobjectfifo.from_memref {{.*}} !amdaie.logicalobjectfifo> // CHECK: %[[DST_LOFI:.*]] = amdaie.logicalobjectfifo.from_memref {{.*}} !amdaie.logicalobjectfifo> @@ -240,11 +89,19 @@ func.func @subview_followed_by_subview(%arg0 : index, %arg1 : index){ // CHECK-SAME: %[[DST_LOFI]][0, 0, 0, 0] [2, 2, 3, 3] [18, 9, 3, 1] // CHECK-SAME: %[[SRC_LOFI]][0, 0, 0, 0] [2, 2, 3, 3] [14400, 480, 8, 4800] +// Note on the stride on the source side of [14400, 480, 8, 4800], how +// is calculated? The source (%sbv) in rank-3 with strides [4800, 480, 8]. +// The pack is essenitally 2 operations: +// 1) a reshape 6x2x3 -> 2x3x2x3 +// 2) a permute 2x3x2x3 -> 2x2x3x3 (index 1 migrates to end). +// The reshape makes the strides go from [4800, 480, 8] to [4800*3, 4800, 480, 8] +// The permute makes the strides go from [4800*3, 4800, 480, 8] to [4800*3, 480, 8, 4800] + func.func @collapsing_subview_pack() { %src = memref.alloc() : memref<12x5x2x10x6x8xf32> - %sbv = memref.subview %src[0, 0, 0, 0, 0, 0] - [6, 1, 2, 1, 3, 1] - [1, 1, 1, 1, 1, 1] : + %sbv = memref.subview %src[0, 0, 0, 0, 0, 0] // offset + [6, 1, 2, 1, 3, 1] // size + [1, 1, 1, 1, 1, 1] : // stride memref<12x5x2x10x6x8xf32> to memref<6x2x3xf32, strided<[4800,480,8]>> %dst= memref.alloc() : memref<2x2x3x3xf32, 1> iree_linalg_ext.pack %sbv inner_dims_pos = [0] @@ -255,28 +112,6 @@ func.func @collapsing_subview_pack() { // ----- -// CHECK-LABEL: @subview_followed_by_expand(%arg0: index) -// CHECK: amdaie.dma_cpy_nd -// CHECK-SAME: [0, 0] [25, 4] [4, 1] -// We might want to change the offsets to be -// [%arg0 / 2, 0, %arg0 %2, 0] -// in the future, but as the offsets ultimately get collapsed into a single -// global cumulative offset, this would just be undone. -// CHECK-SAME: [0, 0, %arg0, 2] [5, 5, 2, 2] [40, 2, 20, 1] -func.func @subview_followed_by_expand(%arg0 : index){ - %src = memref.alloc() : memref<20x20xf32> - %subview = memref.subview %src[%arg0, 2] [10, 10] [1, 1] : - memref<20x20xf32> to memref<10x10xf32, strided<[20, 1], offset: ?>> - %expanded = memref.expand_shape %subview [[0, 1], [2,3]] output_shape [5, 2, 5, 2] : - memref<10x10xf32, strided<[20, 1], offset: ?>> into memref<5x2x5x2xf32, strided<[40, 20, 2, 1], offset: ?>> - %dst = memref.alloc() : memref<25x4xf32> - iree_linalg_ext.unpack %expanded inner_dims_pos = [0, 1] inner_tiles = [5, 2] into %dst - : (memref<5x2x5x2xf32, strided<[40, 20, 2, 1], offset: ?>> memref<25x4xf32>) - return -} - -// ----- - // CHECK-LABEL: @unitdim_unpack // CHECK: %[[ALLOC0:.*]] = memref.alloc() : memref<1x1x8x16xi32, 1> // CHECK: %[[FROMMEMREF0:.*]] = amdaie.logicalobjectfifo.from_memref %[[ALLOC0]], {} : memref<1x1x8x16xi32, 1> -> !amdaie.logicalobjectfifo> @@ -460,5 +295,183 @@ func.func @permute_unpack_tricyle_permute(){ return } +// ----- + +// The pack operation in the following test does not permutate any dimensions, +// so we expect a contiguous copy on the source and destination sides. + +// CHECK-LABEL: collapsed_and_expanded_pack +// CHECK: amdaie.dma_cpy_nd +// destination of pack: +// CHECK-SAME: [0, 0] [10, 10] [10, 1] +// source of pack: +// CHECK-SAME: [0, 0] [10, 10] [10, 1] +func.func @collapsed_and_expanded_pack() { + %alloc0 = memref.alloc() : memref<10x10xf32> + %src = memref.collapse_shape %alloc0 [[0, 1]] : memref<10x10xf32> into memref<100xf32> + %alloc1 = memref.alloc() : memref<100xf32> + %dst = memref.expand_shape %alloc1 [[0, 1]] output_shape [10, 10] : memref<100xf32> into memref<10x10xf32> + iree_linalg_ext.pack %src inner_dims_pos = [0] inner_tiles = [10] into %dst : (memref<100xf32> memref<10x10xf32>) + return +} + +// ----- + +// The pack operation in the following test does not permutate any dimensions, +// so we expect a contiguous copy on the source and destination sides. + +// CHECK-LABEL: @unitdim_pack_expand +// CHECK-DAG: %[[SRCMEMREF:.*]] = amdaie.logicalobjectfifo.from_memref{{.*}}memref<8x16xi32, 1> +// CHECK-DAG: %[[DSTMEMREF:.*]] = amdaie.logicalobjectfifo.from_memref{{.*}}memref<8x16xi32, 2> +// CHECK: amdaie.dma_cpy_nd +// CHECK-SAME: %[[DSTMEMREF]][0, 0, 0, 0] [1, 1, 8, 16] [128, 128, 16, 1] +// CHECK-SAME: %[[SRCMEMREF]][0, 0, 0, 0] [1, 1, 8, 16] [128, 16, 16, 1] +func.func @unitdim_pack_expand() { + %src = memref.alloc() : memref<8x16xi32, 1> + %dst = memref.alloc() : memref<8x16xi32, 2> + %dst_e = memref.expand_shape %dst [[0, 1, 2], [3]] output_shape [1, 1, 8, 16] + : memref<8x16xi32, 2> into memref<1x1x8x16xi32, 2> + iree_linalg_ext.pack %src inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %dst_e: + (memref<8x16xi32, 1> memref<1x1x8x16xi32, 2>) + return +} + +// ----- + +// The unpack operation in the following test does not permutate any dimensions, +// so we expect a contiguous copy on the source and destination sides. + +// CHECK-LABEL: @unitdim_unpack_expand +// CHECK-DAG: %[[SRCMEMREF:.*]] = amdaie.logicalobjectfifo.from_memref{{.*}}memref<8x16xi32, 1> +// CHECK-DAG: %[[DSTMEMREF:.*]] = amdaie.logicalobjectfifo.from_memref{{.*}}memref<8x16xi32, 2> +// CHECK: amdaie.dma_cpy_nd +// CHECK-SAME: %[[DSTMEMREF]][0, 0] [8, 16] [16, 1] +// CHECK-SAME: %[[SRCMEMREF]][0, 0, 0, 0] [1, 8, 1, 16] [128, 16, 128, 1] +func.func @unitdim_unpack_expand() { + %src = memref.alloc() : memref<8x16xi32, 1> + %dst = memref.alloc() : memref<8x16xi32, 2> + %src_e = memref.expand_shape %src [[0, 1, 2], [3]] output_shape [1, 1, 8, 16] + : memref<8x16xi32, 1> into memref<1x1x8x16xi32, 1> + iree_linalg_ext.unpack %src_e inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %dst: + (memref<1x1x8x16xi32, 1> memref<8x16xi32, 2>) + return +} + +// ----- + +// CHECK-LABEL: multidim_with_expand +// CHECK: amdaie.dma_cpy_nd +// dst of dma cpy: +// CHECK-SAME: [0, 0, 0, 0] [20, 5, 10, 10] [500, 100, 10, 1] +// src of dma cpy: +// CHECK-SAME: [0, 0, 0, 0] [20, 5, 10, 10] [500, 10, 50, 1] +func.func @multidim_with_expand() { + %src = memref.alloc() : memref<200x50xi32, 1> + %dst = memref.alloc() : memref<100x100xi32, 2> + %dst_e = memref.expand_shape %dst [[0, 1], [2, 3]] output_shape [20, 5, 10, 10] + : memref<100x100xi32, 2> into memref<20x5x10x10xi32, 2> + iree_linalg_ext.pack %src inner_dims_pos = [0, 1] inner_tiles = [10, 10] + into %dst_e: (memref<200x50xi32, 1> memref<20x5x10x10xi32, 2>) + return +} + +// ----- + +// This test is included to illustrate that the dma copy is the same without the +// expand operation (compare to multidim_with_expand above). +// CHECK-LABEL: multidim_without_expand +// CHECK: amdaie.dma_cpy_nd +// dst of dma cpy: +// CHECK-SAME: [0, 0, 0, 0] [20, 5, 10, 10] [500, 100, 10, 1] +// src of dma cpy: +// CHECK-SAME: [0, 0, 0, 0] [20, 5, 10, 10] [500, 10, 50, 1] +func.func @multidim_without_expand() { + %src = memref.alloc() : memref<200x50xi32, 1> + %dst = memref.alloc() : memref<20x5x10x10xi32, 2> + iree_linalg_ext.pack %src inner_dims_pos = [0, 1] inner_tiles = [10, 10] + into %dst: (memref<200x50xi32, 1> memref<20x5x10x10xi32, 2>) + return +} + +// ----- + +// CHECK-LABEL: @subview_then_collapse(%arg0: index) +// CHECK: %[[ALLOC0:.*]] = memref.alloc() : memref<20x10xf32> +// CHECK: %[[C10:.*]] = arith.constant 10 : index +// CHECK: %[[MULI:.*]] = arith.muli %arg0, %[[C10]] : index +// CHECK: amdaie.dma_cpy_nd +// CHECK-SAME: [0, 0] [5, 20] [20, 1] +// CHECK-SAME: [0, %[[MULI]]] [5, 20] [20, 1] +func.func @subview_then_collapse(%arg0 : index) { + %src = memref.alloc() : memref<20x10xf32> + %subview = memref.subview %src[%arg0, 0] [10, 10] [1, 1] : + memref<20x10xf32> to memref<10x10xf32, strided<[10, 1], offset: ?>> + %collapsed = memref.collapse_shape %subview [[0, 1]] : memref<10x10xf32, strided<[10, 1], offset: ?>> + into memref<100xf32, strided<[1], offset: ?>> + %dst = memref.alloc() : memref<5x20xf32> + iree_linalg_ext.pack %collapsed inner_dims_pos = [0] inner_tiles = [20] into %dst + : (memref<100xf32, strided<[1], offset: ?>> memref<5x20xf32>) + return +} +// ----- +// CHECK-LABEL: @subview_then_expand +// CHECK: amdaie.dma_cpy_nd +// CHECK-SAME: [0, 0, %arg0, 0, 0] [2, 3, 6, 6, 1] [300, 100, 10, 1, 1] +// CHECK-SAME: [0, 0, 0, 0, 0] [2, 3, 6, 6, 1] [108, 6, 1, 18, 6] +module { + func.func @subview_then_expand(%arg0: index) { + %alloc = memref.alloc() : memref<10x10x10xf32> + %subview = memref.subview %alloc[0, %arg0, 0] [6, 6, 6] [1, 1, 1] : + memref<10x10x10xf32> to memref<6x6x6xf32, strided<[100, 10, 1], offset: ?>> + %expand_shape = memref.expand_shape %subview [[0, 1], [2], [3, 4]] + output_shape [2, 3, 6, 6, 1] : memref<6x6x6xf32, strided<[100, 10, 1], offset: ?>> + into memref<2x3x6x6x1xf32, strided<[300, 100, 10, 1, 1], offset: ?>> + %alloc_0 = memref.alloc() : memref<12x3x6xf32> + iree_linalg_ext.pack %alloc_0 inner_dims_pos = [0, 1] inner_tiles = [6, 1] + into %expand_shape : (memref<12x3x6xf32> memref<2x3x6x6x1xf32, strided<[300, 100, 10, 1, 1], offset: ?>>) + return + } +} + +// ----- + +// CHECK-LABEL: @subview_then_subview(%arg0: index, %arg1: index) +// CHECK: %[[SUM:.*]] = arith.addi %arg0, %arg1 : index +// CHECK: amdaie.dma_cpy_nd +// CHECK-SAME: [0] [100] [1], +// CHECK-SAME: [%[[SUM]], 5] [10, 10] [20, 1] +func.func @subview_then_subview(%arg0 : index, %arg1 : index){ + %src = memref.alloc() : memref<20x20xf32> + %subview0 = memref.subview %src[%arg0, 2] [15, 15] [1, 1] : + memref<20x20xf32> to memref<15x15xf32, strided<[20, 1], offset: ?>> + %subview1 = memref.subview %subview0[%arg1, 3] [10, 10] [1, 1] : + memref<15x15xf32, strided<[20, 1], offset: ?>> to memref<10x10xf32, strided<[20, 1], offset: ?>> + %dst = memref.alloc() : memref<100xf32> + iree_linalg_ext.unpack %subview1 inner_dims_pos = [0] inner_tiles = [10] into %dst + : (memref<10x10xf32, strided<[20, 1], offset: ?>> memref<100xf32>) + return +} + +// ----- + +// CHECK-LABEL: @subview_then_expand_1(%arg0: index) +// CHECK: amdaie.dma_cpy_nd +// CHECK-SAME: [0, 0] [25, 4] [4, 1] +// We might want to change the offsets to be +// [%arg0 / 2, 0, %arg0 %2, 0] +// in the future, but as the offsets ultimately get collapsed into a single +// global cumulative offset, this would just be undone. +// CHECK-SAME: [0, 0, %arg0, 2] [5, 5, 2, 2] [40, 2, 20, 1] +func.func @subview_then_expand_1(%arg0 : index){ + %src = memref.alloc() : memref<20x20xf32> + %subview = memref.subview %src[%arg0, 2] [10, 10] [1, 1] : + memref<20x20xf32> to memref<10x10xf32, strided<[20, 1], offset: ?>> + %expanded = memref.expand_shape %subview [[0, 1], [2,3]] output_shape [5, 2, 5, 2] : + memref<10x10xf32, strided<[20, 1], offset: ?>> into memref<5x2x5x2xf32, strided<[40, 20, 2, 1], offset: ?>> + %dst = memref.alloc() : memref<25x4xf32> + iree_linalg_ext.unpack %expanded inner_dims_pos = [0, 1] inner_tiles = [5, 2] into %dst + : (memref<5x2x5x2xf32, strided<[40, 20, 2, 1], offset: ?>> memref<25x4xf32>) + return +}