@@ -67,13 +67,13 @@ func.func @negative_scalable_unit_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8,
6767
6868// -----
6969
70- func.func @contiguous_outer_dyn_inner_most (%a: index , %b: index , %memref: memref <?x?x8 x1 xf32 >) -> vector <8 x1 xf32 > {
70+ func.func @contiguous_inner_most_dynamic_outer (%a: index , %b: index , %memref: memref <?x?x8 x1 xf32 >) -> vector <8 x1 xf32 > {
7171 %c0 = arith.constant 0 : index
7272 %pad = arith.constant 0.0 : f32
7373 %v = vector.transfer_read %memref [%a , %b , %c0 , %c0 ], %pad {in_bounds = [true , true ]} : memref <?x?x8 x1 xf32 >, vector <8 x1 xf32 >
7474 return %v : vector <8 x1 xf32 >
7575}
76- // CHECK: func.func @contiguous_outer_dyn_inner_most(
76+ // CHECK: func.func @contiguous_inner_most_dynamic_outer
7777// CHECK-SAME: %[[IDX0:[a-zA-Z0-9]+]]
7878// CHECK-SAME: %[[IDX1:[a-zA-Z0-9]+]]
7979// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
@@ -89,64 +89,140 @@ func.func @contiguous_outer_dyn_inner_most(%a: index, %b: index, %memref: memref
8989// CHECK: %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
9090// CHECK: return %[[RESULT]]
9191
92+ // Same as the top example within this split, but with the inner vector
93+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
94+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
95+
96+ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim (%a: index , %b: index , %memref: memref <?x?x8 x1 xf32 >) -> vector <[8 ]x1 xf32 > {
97+ %c0 = arith.constant 0 : index
98+ %pad = arith.constant 0.0 : f32
99+ %v = vector.transfer_read %memref [%a , %b , %c0 , %c0 ], %pad {in_bounds = [true , true ]} : memref <?x?x8 x1 xf32 >, vector <[8 ]x1 xf32 >
100+ return %v : vector <[8 ]x1 xf32 >
101+ }
102+ // CHECK-LABEL: func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim
103+ // CHECK-SAME: %[[IDX0:[a-zA-Z0-9]+]]
104+ // CHECK-SAME: %[[IDX1:[a-zA-Z0-9]+]]
105+ // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
106+ // CHECK: %[[VIEW:.+]] = memref.subview %[[SRC]]{{.*}} memref<?x?x8x1xf32> to memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>
107+ // CHECK: %[[VEC_READ:.+]] = vector.transfer_read %[[VIEW]]
108+ // CHECK-SAME: {in_bounds = [true]}
109+ // CHECK-SAME: memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>, vector<[8]xf32>
110+ // CHECK: vector.shape_cast %[[VEC_READ]]
111+
92112// -----
93113
94- func.func @contiguous_inner_most_dim (%A: memref <16 x1 xf32 >, %i:index , %j:index ) -> (vector <8 x1 xf32 >) {
114+ func.func @contiguous_inner_most_dim_non_zero_idxs (%A: memref <16 x1 xf32 >, %i:index , %j:index ) -> (vector <8 x1 xf32 >) {
95115 %c0 = arith.constant 0 : index
96116 %f0 = arith.constant 0.0 : f32
97117 %1 = vector.transfer_read %A [%i , %j ], %f0 : memref <16 x1 xf32 >, vector <8 x1 xf32 >
98118 return %1 : vector <8 x1 xf32 >
99119}
100- // CHECK: func @contiguous_inner_most_dim (%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
120+ // CHECK: func @contiguous_inner_most_dim_non_zero_idxs (%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
101121// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
102122// CHECK-SAME: memref<16x1xf32> to memref<16xf32, strided<[1]>>
103123// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
104- // CHECK: %[[RESULT]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
124+ // CHECK: %[[RESULT:.+ ]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
105125// CHECK: return %[[RESULT]]
106126
127+ // Same as the top example within this split, but with the inner vector
128+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
129+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
130+
131+ func.func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim (%A: memref <16 x1 xf32 >, %i:index , %j:index ) -> (vector <[8 ]x1 xf32 >) {
132+ %c0 = arith.constant 0 : index
133+ %f0 = arith.constant 0.0 : f32
134+ %1 = vector.transfer_read %A [%i , %j ], %f0 : memref <16 x1 xf32 >, vector <[8 ]x1 xf32 >
135+ return %1 : vector <[8 ]x1 xf32 >
136+ }
137+ // CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim(
138+ // CHECK-SAME: %[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<[8]x1xf32>
139+ // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
140+ // CHECK-SAME: memref<16x1xf32> to memref<16xf32, strided<[1]>>
141+ // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]]
142+ // CHECK: %[[RESULT:.+]] = vector.shape_cast %[[V]] : vector<[8]xf32> to vector<[8]x1xf32>
143+ // CHECK: return %[[RESULT]]
144+
107145// -----
108146
109- func.func @contiguous_inner_most_dim_bounds (%A: memref <1000 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 xf32 >) {
147+ func.func @contiguous_inner_most_dim_with_subview (%A: memref <1000 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 xf32 >) {
110148 %c0 = arith.constant 0 : index
111149 %cst = arith.constant 0.0 : f32
112150 %0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
113151 %1 = vector.transfer_read %0 [%ii , %c0 ], %cst {in_bounds = [true , true ]} : memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>, vector <4 x1 xf32 >
114152 return %1 : vector <4 x1 xf32 >
115153}
116- // CHECK: func @contiguous_inner_most_dim_bounds (%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
154+ // CHECK: func @contiguous_inner_most_dim_with_subview (%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
117155// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
118156// CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
119157// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
120158// CHECK-SAME: {in_bounds = [true]}
121159// CHECK-SAME: vector<4xf32>
122160
161+ // Same as the top example within this split, but with the inner vector
162+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
163+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
164+
165+ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim (%A: memref <1000 x?xf32 >, %i:index , %ii:index , %j:index ) -> (vector <[4 ]x1 xf32 >) {
166+ %c0 = arith.constant 0 : index
167+ %cst = arith.constant 0.0 : f32
168+ %0 = memref.subview %A [%i , 0 ] [40 , %j ] [1 , 1 ] : memref <1000 x?xf32 > to memref <40 x?xf32 , strided <[?, 1 ], offset : ?>>
169+ %1 = vector.transfer_read %0 [%ii , %c0 ], %cst {in_bounds = [true , true ]} : memref <40 x?xf32 , strided <[?, 1 ], offset : ?>>, vector <[4 ]x1 xf32 >
170+ return %1 : vector <[4 ]x1 xf32 >
171+ }
172+ // CHECK-LABEL: func @contiguous_inner_most_dim_with_bounds_trailing_dim_dyn_scalable_vec
173+ // CHECK-SAME: %[[SRC:.+]]: memref<1000x?xf32>
174+ // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
175+ // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
176+ // CHECK-SAME: {in_bounds = [true]}
177+ // CHECK-SAME: vector<[4]xf32>
178+
123179// -----
124180
125- func.func @contiguous_inner_most_dim_bounds_2d (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 x1 xf32 >) {
181+ func.func @contiguous_inner_most_dim_2d (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index ) -> (vector <4 x1 x1 xf32 >) {
126182 %c0 = arith.constant 0 : index
127183 %cst = arith.constant 0.0 : f32
128184 %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
129185 %1 = vector.transfer_read %0 [%ii , %c0 , %c0 ], %cst {in_bounds = [true , true , true ]} : memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>, vector <4 x1 x1 xf32 >
130186 return %1 : vector <4 x1 x1 xf32 >
131187}
132- // CHECK: func @contiguous_inner_most_dim_bounds_2d (%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
188+ // CHECK: func @contiguous_inner_most_dim_2d (%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
133189// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
134190// CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
135191// CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
136192// CHECK-SAME: {in_bounds = [true]}
137193// CHECK-SAME: vector<4xf32>
138194
195+ // Same as the top example within this split, but with the inner vector
196+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
197+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
198+
199+ func.func @contiguous_inner_most_dim_2d_scalable_inner_dim (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index ) -> (vector <[4 ]x1 x1 xf32 >) {
200+ %c0 = arith.constant 0 : index
201+ %cst = arith.constant 0.0 : f32
202+ %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
203+ %1 = vector.transfer_read %0 [%ii , %c0 , %c0 ], %cst {in_bounds = [true , true , true ]} : memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>, vector <[4 ]x1 x1 xf32 >
204+ return %1 : vector <[4 ]x1 x1 xf32 >
205+ }
206+ // CHECK-LABEL: func @contiguous_inner_most_dim_2d_scalable_inner_dim(
207+ // CHECK-SAME: %[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<[4]x1x1xf32>
208+ // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]]
209+ // CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
210+ // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]]
211+ // CHECK-SAME: {in_bounds = [true]}
212+ // CHECK-SAME: vector<[4]xf32>
213+ // CHECK: vector.shape_cast %[[V]]
214+
139215// -----
140216
141- func.func @contiguous_inner_most_dim_out_of_bounds_2d (%arg0: memref <1 x1 xf32 >) -> vector <4 x8 xf32 > {
217+ func.func @negative_out_of_bounds (%arg0: memref <1 x1 xf32 >) -> vector <4 x8 xf32 > {
142218 %c0 = arith.constant 0 : index
143219 %cst = arith.constant 0.000000e+00 : f32
144220 %0 = vector.transfer_read %arg0 [%c0 , %c0 ], %cst : memref <1 x1 xf32 >, vector <4 x8 xf32 >
145221 return %0 : vector <4 x8 xf32 >
146222}
147223// The inner most unit dim can not be dropped. In this context, we do not
148224// generate rank-reduced memref.subview ops.
149- // CHECK: func.func @contiguous_inner_most_dim_out_of_bounds_2d
225+ // CHECK: func.func @negative_out_of_bounds
150226// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
151227// CHECK-NOT: memref.subview
152228// CHECK: %[[READ:.+]] = vector.transfer_read %[[SRC]]
0 commit comments