@@ -39,10 +39,17 @@ func.func @arm_sme_tile_load_ver(%src : memref<?x?xi32>) {
39
39
// CHECK-SAME: %[[SRC:.*]]: memref<?x?xi32>) {
40
40
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
41
41
// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
42
+ // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
42
43
// CHECK-DAG: %[[NUM_ROWS:.*]] = arith.constant 3 : index
44
+ // CHECK-DAG: %[[VSCALE:.*]] = vector.vscale
45
+ // CHECK-DAG: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[C4]], %[[VSCALE]] : index
46
+ // CHECK-DAG: %[[NUM_ROWS_I64:.*]] = arith.index_cast %[[NUM_ROWS]] : index to i64
47
+ // CHECK-DAG: %[[NUM_TILE_SLICES_I64:.*]] = arith.index_cast %[[NUM_TILE_SLICES]] : index to i64
48
+ // CHECK-DAG: %[[LOOP_UPPER_BOUND_I64:.*]] = arith.minsi %[[NUM_ROWS_I64]], %[[NUM_TILE_SLICES_I64]] : i64
49
+ // CHECK-DAG: %[[LOOP_UPPER_BOUND:.*]] = arith.index_cast %[[LOOP_UPPER_BOUND_I64]] : i64 to index
43
50
// CHECK-DAG: %[[NUM_COLS:.*]] = vector.create_mask %c2 : vector<[4]xi1>
44
51
// CHECK-DAG: %[[TILE_ZERO:.*]] = arm_sme.zero : vector<[4]x[4]xi32>
45
- // CHECK-NEXT: scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_ROWS ]] step %[[C1]] iter_args(%[[CURRENT_TILE:.*]] = %[[TILE_ZERO]]) -> (vector<[4]x[4]xi32>) {
52
+ // CHECK-NEXT: scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[LOOP_UPPER_BOUND ]] step %[[C1]] iter_args(%[[CURRENT_TILE:.*]] = %[[TILE_ZERO]]) -> (vector<[4]x[4]xi32>) {
46
53
// CHECK-NEXT: %[[OFFSET:.*]] = arith.addi %[[C0]], %[[TILE_SLICE_INDEX]] : index
47
54
// CHECK-NEXT: %[[TILE_UPDATE:.*]] = arm_sme.load_tile_slice %[[SRC]]{{\[}}%[[OFFSET]], %[[C0]]], %[[NUM_COLS]], %[[CURRENT_TILE]], %[[TILE_SLICE_INDEX]] : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32>
48
55
// CHECK-NEXT: scf.yield %[[TILE_UPDATE]] : vector<[4]x[4]xi32>
@@ -150,9 +157,16 @@ func.func @arm_sme_tile_store_ver(%tile : vector<[4]x[4]xi32>, %dest : memref<?x
150
157
// CHECK-SAME: %[[DEST:.*]]: memref<?x?xi32>) {
151
158
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
152
159
// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
160
+ // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
153
161
// CHECK-DAG: %[[NUM_ROWS:.*]] = arith.constant 3 : index
162
+ // CHECK-DAG: %[[VSCALE:.*]] = vector.vscale
163
+ // CHECK-DAG: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[C4]], %[[VSCALE]] : index
164
+ // CHECK-DAG: %[[NUM_ROWS_I64:.*]] = arith.index_cast %[[NUM_ROWS]] : index to i64
165
+ // CHECK-DAG: %[[NUM_TILE_SLICES_I64:.*]] = arith.index_cast %[[NUM_TILE_SLICES]] : index to i64
166
+ // CHECK-DAG: %[[LOOP_UPPER_BOUND_I64:.*]] = arith.minsi %[[NUM_ROWS_I64]], %[[NUM_TILE_SLICES_I64]] : i64
167
+ // CHECK-DAG: %[[LOOP_UPPER_BOUND:.*]] = arith.index_cast %[[LOOP_UPPER_BOUND_I64]] : i64 to index
154
168
// CHECK-DAG: %[[NUM_COLS:.*]] = vector.create_mask %c2 : vector<[4]xi1>
155
- // CHECK-NEXT: scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_ROWS ]] step %[[C1]] {
169
+ // CHECK-NEXT: scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[LOOP_UPPER_BOUND ]] step %[[C1]] {
156
170
// CHECK-NEXT: %[[OFFSET:.*]] = arith.addi %[[C0]], %[[TILE_SLICE_INDEX]] : index
157
171
// CHECK-NEXT: arm_sme.store_tile_slice %[[TILE]], %[[TILE_SLICE_INDEX]], %[[NUM_COLS]], %[[DEST]]{{\[}}%[[OFFSET]], %[[C0]]] : memref<?x?xi32>, vector<[4]xi1>, vector<[4]x[4]xi32>
158
172
func.func @arm_sme_tile_store_hor_with_mask (%tile : vector <[4 ]x[4 ]xi32 >, %dest : memref <?x?xi32 >) {
0 commit comments