@@ -449,6 +449,101 @@ func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0:
449
449
// CHECK-NOT: memref.shape_cast
450
450
// CHECK: vector.transfer_write
451
451
452
+ // Same as the top example within this split, but with the outer vector
453
+ // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
454
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
455
+
456
+ func.func @contiguous_inner_most_non_zero_idxs_scalable (%arg0: memref <16 x1 xf32 >, %arg1: vector <[8 ]x1 xf32 >, %i: index ) {
457
+ %c0 = arith.constant 0 : index
458
+ vector.transfer_write %arg1 , %arg0 [%i , %c0 ] {in_bounds = [true , true ]} : vector <[8 ]x1 xf32 >, memref <16 x1 xf32 >
459
+ return
460
+ }
461
+ // CHECK-LABEL: func.func @contiguous_inner_most_non_zero_idxs_scalable(
462
+ // CHECK-SAME: %[[MEM:.*]]: memref<16x1xf32>,
463
+ // CHECK-SAME: %[[VEC:.*]]: vector<[8]x1xf32>,
464
+ // CHECK-SAME: %[[IDX:.*]]: index) {
465
+ // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
466
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[8]x1xf32> to vector<[8]xf32>
467
+ // CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<[8]xf32>, memref<16xf32, strided<[1]>>
468
+
469
+ // -----
470
+
471
+ func.func @contiguous_inner_most_dim_with_subview (%A: memref <1000 x1 xf32 >, %i:index , %ii:index , %vec: vector <4 x1 xf32 >) {
472
+ %c0 = arith.constant 0 : index
473
+ %cst = arith.constant 0.0 : f32
474
+ %0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
475
+ vector.transfer_write %vec , %0 [%ii , %c0 ] {in_bounds = [true , true ]} : vector <4 x1 xf32 >, memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
476
+ return
477
+ }
478
+
479
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview(
480
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1xf32>,
481
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
482
+ // CHECK-SAME: %[[VEC:.*]]: vector<4x1xf32>) {
483
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
484
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
485
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1xf32> to vector<4xf32>
486
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>>
487
+
488
+ // Same as the top example within this split, but with the outer vector
489
+ // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
490
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
491
+
492
+ func.func @contiguous_inner_most_dim_with_subview_scalable (%A: memref <1000 x1 xf32 >, %i:index , %ii:index , %vec: vector <[4 ]x1 xf32 >) {
493
+ %c0 = arith.constant 0 : index
494
+ %cst = arith.constant 0.0 : f32
495
+ %0 = memref.subview %A [%i , 0 ] [40 , 1 ] [1 , 1 ] : memref <1000 x1 xf32 > to memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
496
+ vector.transfer_write %vec , %0 [%ii , %c0 ] {in_bounds = [true , true ]} : vector <[4 ]x1 xf32 >, memref <40 x1 xf32 , strided <[1 , 1 ], offset : ?>>
497
+ return
498
+ }
499
+
500
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_scalable
501
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1xf32>,
502
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
503
+ // CHECK-SAME: %[[VEC:.*]]: vector<[4]x1xf32>) {
504
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
505
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
506
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1xf32> to vector<[4]xf32>
507
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>>
508
+
509
+ // -----
510
+
511
+ func.func @contiguous_inner_most_dim_with_subview_2d (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index , %vec: vector <4 x1 x1 xf32 >) {
512
+ %c0 = arith.constant 0 : index
513
+ %cst = arith.constant 0.0 : f32
514
+ %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
515
+ vector.transfer_write %vec , %0 [%ii , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <4 x1 x1 xf32 >, memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
516
+ return
517
+ }
518
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_2d(
519
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1x1xf32>,
520
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
521
+ // CHECK-SAME: %[[VEC:.*]]: vector<4x1x1xf32>) {
522
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
523
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
524
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1x1xf32> to vector<4xf32>
525
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>>
526
+
527
+ // Same as the top example within this split, but with the outer vector
528
+ // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
529
+ // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
530
+
531
+ func.func @contiguous_inner_most_dim_with_subview_2d_scalable (%A: memref <1000 x1 x1 xf32 >, %i:index , %ii:index , %vec: vector <[4 ]x1 x1 xf32 >) {
532
+ %c0 = arith.constant 0 : index
533
+ %cst = arith.constant 0.0 : f32
534
+ %0 = memref.subview %A [%i , 0 , 0 ] [40 , 1 , 1 ] [1 , 1 , 1 ] : memref <1000 x1 x1 xf32 > to memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
535
+ vector.transfer_write %vec , %0 [%ii , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <[4 ]x1 x1 xf32 >, memref <40 x1 x1 xf32 , strided <[1 , 1 , 1 ], offset : ?>>
536
+ return
537
+ }
538
+ // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_2d_scalable
539
+ // CHECK-SAME: %[[MEM:.*]]: memref<1000x1x1xf32>,
540
+ // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
541
+ // CHECK-SAME: %[[VEC:.*]]: vector<[4]x1x1xf32>) {
542
+ // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
543
+ // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
544
+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1x1xf32> to vector<[4]xf32>
545
+ // CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>>
546
+
452
547
// -----
453
548
454
549
func.func @drop_inner_most_dim (%arg0: memref <1 x512 x16 x1 xf32 , strided <[8192 , 16 , 1 , 1 ], offset : ?>>, %arg1: vector <1 x16 x16 x1 xf32 >, %arg2: index ) {
@@ -471,6 +566,30 @@ func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16,
471
566
472
567
// -----
473
568
569
+ // NOTE: This is an out-of-bounds access.
570
+
571
+ func.func @negative_non_unit_inner_vec_dim (%arg0: memref <4 x1 xf32 >, %vec: vector <4 x8 xf32 >) {
572
+ %c0 = arith.constant 0 : index
573
+ vector.transfer_write %vec , %arg0 [%c0 , %c0 ] : vector <4 x8 xf32 >, memref <4 x1 xf32 >
574
+ return
575
+ }
576
+ // CHECK: func.func @negative_non_unit_inner_vec_dim
577
+ // CHECK-NOT: memref.subview
578
+ // CHECK: vector.transfer_write
579
+
580
+ // -----
581
+
582
+ func.func @negative_non_unit_inner_memref_dim (%arg0: memref <4 x8 xf32 >, %vec: vector <4 x1 xf32 >) {
583
+ %c0 = arith.constant 0 : index
584
+ vector.transfer_write %vec , %arg0 [%c0 , %c0 ] : vector <4 x1 xf32 >, memref <4 x8 xf32 >
585
+ return
586
+ }
587
+ // CHECK: func.func @negative_non_unit_inner_memref_dim
588
+ // CHECK-NOT: memref.subview
589
+ // CHECK: vector.transfer_write
590
+
591
+ // -----
592
+
474
593
func.func @non_unit_strides (%arg0: memref <512 x16 x1 xf32 , strided <[8192 , 16 , 4 ], offset : ?>>, %arg1: vector <16 x16 x1 xf32 >, %arg2: index ) {
475
594
%c0 = arith.constant 0 : index
476
595
vector.transfer_write %arg1 , %arg0 [%arg2 , %c0 , %c0 ]
0 commit comments