@@ -67,13 +67,13 @@ func.func @negative_scalable_unit_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8,
67
67
68
68
// -----
69
69
70
- func.func @contiguous_outer_dyn_inner_most (%a: index , %b: index , %memref: memref <?x?x8 x1 xf32 >) -> vector <8 x1 xf32 > {
70
+ func.func @contiguous_inner_most_dynamic_outer (%a: index , %b: index , %memref: memref <?x?x8 x1 xf32 >) -> vector <8 x1 xf32 > {
71
71
%c0 = arith.constant 0 : index
72
72
%pad = arith.constant 0.0 : f32
73
73
%v = vector.transfer_read %memref [%a , %b , %c0 , %c0 ], %pad {in_bounds = [true , true ]} : memref <?x?x8 x1 xf32 >, vector <8 x1 xf32 >
74
74
return %v : vector <8 x1 xf32 >
75
75
}
76
- // CHECK: func.func @contiguous_outer_dyn_inner_most(
76
+ // CHECK: func.func @contiguous_inner_most_dynamic_outer
77
77
// CHECK-SAME: %[[IDX0:[a-zA-Z0-9]+]]
78
78
// CHECK-SAME: %[[IDX1:[a-zA-Z0-9]+]]
79
79
// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
@@ -89,68 +89,154 @@ func.func @contiguous_outer_dyn_inner_most(%a: index, %b: index, %memref: memref
89
89
// CHECK: %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
90
90
// CHECK: return %[[RESULT]]
91
91
92
+ // Same as the top example within this split, but with the outer vector
93
+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
94
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
95
+
96
+ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim (%a: index , %b: index , %memref: memref <?x?x8 x1 xf32 >) -> vector <[8 ]x1 xf32 > {
97
+ %c0 = arith.constant 0 : index
98
+ %pad = arith.constant 0.0 : f32
99
+ %v = vector.transfer_read %memref [%a , %b , %c0 , %c0 ], %pad {in_bounds = [true , true ]} : memref <?x?x8 x1 xf32 >, vector <[8 ]x1 xf32 >
100
+ return %v : vector <[8 ]x1 xf32 >
101
+ }
102
+ // CHECK-LABEL: func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim
103
+ // CHECK-SAME: %[[IDX0:[a-zA-Z0-9]+]]
104
+ // CHECK-SAME: %[[IDX1:[a-zA-Z0-9]+]]
105
+ // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
106
+ // CHECK: %[[VIEW:.+]] = memref.subview %[[SRC]]{{.*}} memref<?x?x8x1xf32> to memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>
107
+ // CHECK: %[[VEC_READ:.+]] = vector.transfer_read %[[VIEW]]
108
+ // CHECK-SAME: {in_bounds = [true]}
109
+ // CHECK-SAME: memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>, vector<[8]xf32>
110
+ // CHECK: vector.shape_cast %[[VEC_READ]]
111
+
92
112
// -----
93
113
94
- func.func @contiguous_inner_most_dim (%A: memref <16 x1 xf32 >, %i:index , %j:index ) -> (vector <8 x1 xf32 >) {
114
+ func.func @contiguous_inner_most_dim_non_zero_idxs (%A: memref <16 x1 xf32 >, %i:index , %j:index ) -> (vector <8 x1 xf32 >) {
95
115
%c0 = arith.constant 0 : index
96
116
%f0 = arith.constant 0.0 : f32
97
117
%1 = vector.transfer_read %A [%i , %j ], %f0 : memref <16 x1 xf32 >, vector <8 x1 xf32 >
98
118
return %1 : vector <8 x1 xf32 >
99
119
}
100
- // CHECK: func @contiguous_inner_most_dim (%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
120
+ // CHECK: func @contiguous_inner_most_dim_non_zero_idxs (%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
101
121
// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
102
122
// CHECK-SAME: memref<16x1xf32> to memref<16xf32, strided<[1]>>
103
123
// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
104
- // CHECK: %[[RESULT]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
124
+ // CHECK: %[[RESULT:.+ ]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
105
125
// CHECK: return %[[RESULT]]
106
126
127
+ // Same as the top example within this split, but with the outer vector
128
+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
129
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
130
+
131
+ func.func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim (%A: memref <16 x1 xf32 >, %i:index , %j:index ) -> (vector <[8 ]x1 xf32 >) {
132
+ %c0 = arith.constant 0 : index
133
+ %f0 = arith.constant 0.0 : f32
134
+ %1 = vector.transfer_read %A [%i , %j ], %f0 : memref <16 x1 xf32 >, vector <[8 ]x1 xf32 >
135
+ return %1 : vector <[8 ]x1 xf32 >
136
+ }
137
+ // CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim(
138
+ // CHECK-SAME: %[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<[8]x1xf32>
139
+ // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
140
+ // CHECK-SAME: memref<16x1xf32> to memref<16xf32, strided<[1]>>
141
+ // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
142
+ // CHECK: %[[RESULT:.+]] = vector.shape_cast %[[V]] : vector<[8]xf32> to vector<[8]x1xf32>
143
+ // CHECK: return %[[RESULT]]
144
+
107
145
// -----
108
146
109
- func.func @contiguous_inner_most_dim_bounds (%A: memref <1000 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 xf32 >) {
147
+ func.func @contiguous_inner_most_dim_with_subview (%A: memref <1000 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 xf32 >) {
110
148
%c0 = arith.constant 0 : index
111
149
%cst = arith.constant 0.0 : f32
112
150
%0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
113
151
%1 = vector.transfer_read %0 [%ii , %c0 ], %cst {in_bounds = [true , true ]} : memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>, vector <4 x1 xf32 >
114
152
return %1 : vector <4 x1 xf32 >
115
153
}
116
- // CHECK: func @contiguous_inner_most_dim_bounds (%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
154
+ // CHECK: func @contiguous_inner_most_dim_with_subview (%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
117
155
// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
118
156
// CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
119
157
// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
120
158
// CHECK-SAME: {in_bounds = [true]}
121
159
// CHECK-SAME: vector<4xf32>
122
160
161
+ // Same as the top example within this split, but with the outer vector
162
+ // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
163
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
164
+
165
+ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim (%A: memref <1000 x1 xf32 >, %i:index , %ii:index ) -> (vector <[4 ]x1 xf32 >) {
166
+ %c0 = arith.constant 0 : index
167
+ %cst = arith.constant 0.0 : f32
168
+ %0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
169
+ %1 = vector.transfer_read %0 [%ii , %c0 ], %cst {in_bounds = [true , true ]} : memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>, vector <[4 ]x1 xf32 >
170
+ return %1 : vector <[4 ]x1 xf32 >
171
+ }
172
+ // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_scalable_inner_dim
173
+ // CHECK-SAME: %[[SRC:.+]]: memref<1000x1xf32>
174
+ // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
175
+ // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
176
+ // CHECK-SAME: {in_bounds = [true]}
177
+ // CHECK-SAME: vector<[4]xf32>
178
+
123
179
// -----
124
180
125
- func.func @contiguous_inner_most_dim_bounds_2d (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 x1 xf32 >) {
181
+ func.func @contiguous_inner_most_dim_with_subview_2d (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 x1 xf32 >) {
126
182
%c0 = arith.constant 0 : index
127
183
%cst = arith.constant 0.0 : f32
128
184
%0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
129
185
%1 = vector.transfer_read %0 [%ii , %c0 , %c0 ], %cst {in_bounds = [true , true , true ]} : memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>, vector <4 x1 x1 xf32 >
130
186
return %1 : vector <4 x1 x1 xf32 >
131
187
}
132
- // CHECK: func @contiguous_inner_most_dim_bounds_2d (%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
188
+ // CHECK: func @contiguous_inner_most_dim_with_subview_2d (%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
133
189
// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
134
190
// CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
135
191
// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
136
192
// CHECK-SAME: {in_bounds = [true]}
137
193
// CHECK-SAME: vector<4xf32>
138
194
195
+ // Same as the top example within this split, but with the outer vector
196
+ // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
197
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
198
+
199
+ func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index ) -> (vector <[4 ]x1 x1 xf32 >) {
200
+ %c0 = arith.constant 0 : index
201
+ %cst = arith.constant 0.0 : f32
202
+ %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
203
+ %1 = vector.transfer_read %0 [%ii , %c0 , %c0 ], %cst {in_bounds = [true , true , true ]} : memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>, vector <[4 ]x1 x1 xf32 >
204
+ return %1 : vector <[4 ]x1 x1 xf32 >
205
+ }
206
+ // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(
207
+ // CHECK-SAME: %[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<[4]x1x1xf32>
208
+ // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
209
+ // CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
210
+ // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
211
+ // CHECK-SAME: {in_bounds = [true]}
212
+ // CHECK-SAME: vector<[4]xf32>
213
+ // CHECK: vector.shape_cast %[[V]]
214
+
139
215
// -----
140
216
141
- func.func @contiguous_inner_most_dim_out_of_bounds_2d (%arg0: memref <1 x1 xf32 >) -> vector <4 x8 xf32 > {
217
+ // NOTE: This is an out-of-bounds access.
218
+
219
+ func.func @negative_non_unit_inner_vec_dim (%arg0: memref <4 x1 xf32 >) -> vector <4 x8 xf32 > {
142
220
%c0 = arith.constant 0 : index
143
221
%cst = arith.constant 0.000000e+00 : f32
144
- %0 = vector.transfer_read %arg0 [%c0 , %c0 ], %cst : memref <1 x 1 x f32 >, vector <4 x8 xf32 >
222
+ %0 = vector.transfer_read %arg0 [%c0 , %c0 ], %cst : memref <4 x 1 x f32 >, vector <4 x8 xf32 >
145
223
return %0 : vector <4 x8 xf32 >
146
224
}
147
- // The inner most unit dim can not be dropped. In this context, we do not
148
- // generate rank-reduced memref.subview ops.
149
- // CHECK: func.func @contiguous_inner_most_dim_out_of_bounds_2d
150
- // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
225
+ // CHECK: func.func @negative_non_unit_inner_vec_dim
226
+ // CHECK-NOT: memref.subview
227
+ // CHECK: vector.transfer_read
228
+
229
+ // -----
230
+
231
+ func.func @negative_non_unit_inner_memref_dim (%arg0: memref <4 x8 xf32 >) -> vector <4 x1 xf32 > {
232
+ %c0 = arith.constant 0 : index
233
+ %cst = arith.constant 0.000000e+00 : f32
234
+ %0 = vector.transfer_read %arg0 [%c0 , %c0 ], %cst : memref <4 x8 xf32 >, vector <4 x1 xf32 >
235
+ return %0 : vector <4 x1 xf32 >
236
+ }
237
+ // CHECK: func.func @negative_non_unit_inner_memref_dim
151
238
// CHECK-NOT: memref.subview
152
- // CHECK: %[[READ:.+]] = vector.transfer_read %[[SRC]]
153
- // CHECK: return %[[READ]] : vector<4x8xf32>
239
+ // CHECK: vector.transfer_read
154
240
155
241
// -----
156
242
0 commit comments