-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[mlir][linalg] Consolidate tests for scalable vectorization #141469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][linalg] Consolidate tests for scalable vectorization #141469
Conversation
@llvm/pr-subscribers-mlir-linalg @llvm/pr-subscribers-mlir Author: Andrzej Warzyński (banach-space) ChangesThis patch moves scalable vectorization tests into an existing generic
Rationale:
Notable changes beyond moving tests:
This patch contributes to the implementation of #141025 — please refer Patch is 49.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141469.diff 2 Files Affected:
diff --git a/mlir/test/Dialect/Linalg/vectorization-scalable.mlir b/mlir/test/Dialect/Linalg/vectorization-scalable.mlir
deleted file mode 100644
index 227829238a3d7..0000000000000
--- a/mlir/test/Dialect/Linalg/vectorization-scalable.mlir
+++ /dev/null
@@ -1,355 +0,0 @@
-// RUN: mlir-opt %s -transform-interpreter -split-input-file | FileCheck %s
-
-func.func @vectorize_dynamic_identity(%arg0: tensor<?xf32>,
- %arg1: tensor<?xf32>,
- %arg2: tensor<?xf32>) -> tensor<?xf32> {
- %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>,
- affine_map<(d0) -> (d0)>,
- affine_map<(d0) -> (d0)>],
- iterator_types = ["parallel"] }
- ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
- outs(%arg2 : tensor<?xf32>) {
- ^bb(%in0: f32, %in1: f32, %out: f32) :
- %0 = arith.addf %in0, %in1 : f32
- linalg.yield %0 : f32
- } -> tensor<?xf32>
- return %0 : tensor<?xf32>
-}
-
-// CHECK-LABEL: @vectorize_dynamic_identity
-// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor<?xf32>
-// CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<[4]xi1>
-// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32>
-// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32>
-// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32>
-// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<[4]xf32>
-// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<[4]xf32>, tensor<?xf32> } : vector<[4]xi1> -> tensor<?xf32>
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [[4]] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-func.func @vectorize_partial_dynamic_identity(%arg0: tensor<8x?xf32>,
- %arg1: tensor<8x?xf32>,
- %arg2: tensor<8x?xf32>) -> tensor<8x?xf32> {
- %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d0, d1)>],
- iterator_types = ["parallel", "parallel"] }
- ins(%arg0, %arg1 : tensor<8x?xf32>, tensor<8x?xf32>)
- outs(%arg2 : tensor<8x?xf32>) {
- ^bb(%in0: f32, %in1: f32, %out: f32) :
- %0 = arith.addf %in0, %in1 : f32
- linalg.yield %0 : f32
- } -> tensor<8x?xf32>
- return %0 : tensor<8x?xf32>
-}
-
-// CHECK-LABEL: func.func @vectorize_partial_dynamic_identity(
-// CHECK-SAME: %[[VAL_0:.*]]: tensor<8x?xf32>, %[[VAL_1:.*]]: tensor<8x?xf32>, %[[VAL_2:.*]]: tensor<8x?xf32>) -> tensor<8x?xf32> {
-// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-DAG: %[[VAL_4:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<8x?xf32>
-// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 8 : index
-// CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_4]] : vector<8x[32]xi1>
-// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_6]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32>
-// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_1]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_10]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32>
-// CHECK: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_2]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_12]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32>
-// CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] : vector<8x[32]xf32>
-// CHECK: %[[VAL_15:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_16:.*]] = vector.mask %[[VAL_8]] { vector.transfer_write %[[VAL_14]], %[[VAL_2]][%[[VAL_15]], %[[VAL_15]]] {in_bounds = [true, true]} : vector<8x[32]xf32>, tensor<8x?xf32> } : vector<8x[32]xi1> -> tensor<8x?xf32>
-
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [8, [32]] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-func.func @vectorize_static_shape_with_mask(%arg0: tensor<8x30xf32>,
- %arg1: tensor<8x30xf32>,
- %arg2: tensor<8x30xf32>) -> tensor<8x30xf32> {
- %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d0, d1)>],
- iterator_types = ["parallel", "parallel"] }
- ins(%arg0, %arg1 : tensor<8x30xf32>, tensor<8x30xf32>)
- outs(%arg2 : tensor<8x30xf32>) {
- ^bb(%in0: f32, %in1: f32, %out: f32) :
- %0 = arith.addf %in0, %in1 : f32
- linalg.yield %0 : f32
- } -> tensor<8x30xf32>
- return %0 : tensor<8x30xf32>
-}
-
-// CHECK-LABEL: func.func @vectorize_static_shape_with_mask(
-// CHECK-SAME: %[[VAL_0:.*]]: tensor<8x30xf32>, %[[VAL_1:.*]]: tensor<8x30xf32>, %[[VAL_2:.*]]: tensor<8x30xf32>) -> tensor<8x30xf32> {
-// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 8 : index
-// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 30 : index
-// CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_6]] : vector<8x[32]xi1>
-// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_0]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_4]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32>
-// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_1]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32>
-// CHECK: %[[VAL_11:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_2]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_11]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32>
-// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<8x[32]xf32>
-// CHECK: %[[VAL_14:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %[[VAL_13]], %[[VAL_2]][%[[VAL_14]], %[[VAL_14]]] {in_bounds = [true, true]} : vector<8x[32]xf32>, tensor<8x30xf32> } : vector<8x[32]xi1> -> tensor<8x30xf32>
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [8, [32]] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// NOTE: Often, non-trailing scalable sizes are problematic - there are no
-// "scalable" arrays of vectors at the LLVM level (multi-dim vectors are
-// decomposed into arrays of aggregates). However, the trailing dim in this
-// case is 1 and that can be folded away later.
-
-func.func @vectorize_dynamic_fill_leading_scalable(%A : tensor<?x?xf32>, %arg0 : f32) -> tensor<?x?xf32> {
- %0 = linalg.fill ins(%arg0 : f32) outs(%A : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL: func.func @vectorize_dynamic_fill_leading_scalable
-// CHECK: %[[DIM0:.*]] = tensor.dim
-// CHECK: %[[DIM1:.*]] = tensor.dim
-// CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM1]] : vector<[8]x1xi1>
-// CHECK: %[[BCAST:.*]] = vector.broadcast %{{.*}} : f32 to vector<[8]x1xf32>
-// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[BCAST]], {{.*}} {in_bounds = [true, true]} : vector<[8]x1xf32>, tensor<?x?xf32> } : vector<[8]x1xi1>
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [[8], 1] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-#map = affine_map<(d0) -> (d0)>
-func.func @vectorize_linalg_index(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
- %0 = linalg.generic {
- indexing_maps = [#map],
- iterator_types = ["parallel"]
- } outs(%arg1 : tensor<?xf32>) {
- ^bb0(%in: f32):
- %1 = linalg.index 0 : index
- %2 = tensor.extract %arg0[%1] : tensor<?xf32>
- linalg.yield %2 : f32
- } -> tensor<?xf32>
- return %0 : tensor<?xf32>
-}
-
-// CHECK-LABEL: @vectorize_linalg_index
-// CHECK-SAME: %[[SRC:.*]]: tensor<?xf32>, %[[DST:.*]]: tensor<?xf32>
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK: %[[DST_DIM0:.*]] = tensor.dim %[[DST]], %[[C0]] : tensor<?xf32>
-// CHECK: %[[MASK:.*]] = vector.create_mask %[[DST_DIM0]] : vector<[4]xi1>
-// CHECK-DAG: %[[STEP:.+]] = vector.step : vector<[4]xindex>
-// CHECK-DAG: %[[STEP_ELEMENT:.+]] = vector.extract %[[STEP]][0] : index from vector<[4]xindex>
-
-// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]][%[[STEP_ELEMENT]]], %cst {in_bounds = [true]} : tensor<?xf32>, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32>
-// CHECK: %[[OUT:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[DST]]{{\[}}%[[C0]]] {in_bounds = [true]} : vector<[4]xf32>, tensor<?xf32> } : vector<[4]xi1> -> tensor<?xf32>
-// CHECK: return %[[OUT]] : tensor<?xf32>
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [[4]] {vectorize_nd_extract} : !transform.any_op
-
- %func = transform.structured.match ops{["func.func"]} in %arg1
- : (!transform.any_op) -> !transform.any_op
- transform.apply_patterns to %func {
- transform.apply_patterns.linalg.tiling_canonicalization
- } : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-func.func @vectorize_dynamic_reduction_scalable_1d(%arg0: tensor<?xf32>,
- %arg1: tensor<f32>) -> tensor<f32> {
-
- %0 = linalg.reduce ins(%arg0 : tensor<?xf32>) outs(%arg1 : tensor<f32>) dimensions = [0]
- (%in: f32, %init: f32) {
- %0 = arith.addf %in, %init : f32
- linalg.yield %0 : f32
- }
- return %0 : tensor<f32>
-}
-
-// CHECK-LABEL: func.func @vectorize_dynamic_reduction_scalable_1d(
-// CHECK-SAME: %[[ARG_0:.*]]: tensor<?xf32>, %[[ARG_1:.*]]: tensor<f32>) -> tensor<f32> {
-// CHECK: %[[C0_idx:.*]] = arith.constant 0 : index
-// CHECK: %[[DIM_A0_0:.*]] = tensor.dim %[[ARG_0]], %[[C0_idx]] : tensor<?xf32>
-// CHECK: %[[C0_idx:.*]] = arith.constant 0 : index
-// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<[4]xi1>
-// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true]} : tensor<?xf32>, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32>
-// CHECK: %[[C0_F32:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[VEC_RD_1:.*]] = vector.transfer_read %[[ARG_1]][], %[[C0_F32]] : tensor<f32>, vector<f32>
-// CHECK: %[[ACC_f32:.*]] = vector.extract %[[VEC_RD_1]][] : f32 from vector<f32>
-// CHECK: %[[REDUCE:.*]] = vector.mask %[[MASK]] { vector.multi_reduction <add>, %[[VEC_RD_0]], %[[ACC_f32]] [0] : vector<[4]xf32> to f32 } : vector<[4]xi1> -> f32
-// CHECK: %[[VEC_f32:.*]] = vector.broadcast %[[REDUCE]] : f32 to vector<f32>
-// CHECK: %{{.*}} = vector.transfer_write %[[VEC_f32]], %[[ARG_1]][] : vector<f32>, tensor<f32>
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.reduce"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [[4]] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// Note: scalable version of `vectorize_dynamic_reduction` in test/Dialect/Linalg/vectorization.mlir.
-func.func @vectorize_dynamic_reduction_scalable_2d(%arg0: tensor<?x?xf32>,
- %arg1: tensor<?xf32>) -> tensor<?xf32> {
- %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d0)>],
- iterator_types = ["parallel", "reduction"] }
- ins(%arg0 : tensor<?x?xf32>)
- outs(%arg1 : tensor<?xf32>) {
- ^bb(%in: f32, %out: f32) :
- %0 = arith.addf %in, %out : f32
- linalg.yield %0 : f32
- } -> tensor<?xf32>
- return %0 : tensor<?xf32>
-}
-
-// CHECK-LABEL: func.func @vectorize_dynamic_reduction_scalable_2d(
-// CHECK-SAME: %[[ARG_0:.*]]: tensor<?x?xf32>, %[[ARG_1:.*]]: tensor<?xf32>) -> tensor<?xf32> {
-// CHECK: %[[C0_idx:.*]] = arith.constant 0 : index
-// CHECK: %[[DIM_A0_0:.*]] = tensor.dim %[[ARG_0]], %[[C0_idx]] : tensor<?x?xf32>
-// CHECK: %[[C1_idx:.*]] = arith.constant 1 : index
-// CHECK: %[[DIM_A0_1:.*]] = tensor.dim %[[ARG_0]], %[[C1_idx]] : tensor<?x?xf32>
-// CHECK: %[[C0_idx:.*]] = arith.constant 0 : index
-// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[MASK_2d:.*]] = vector.create_mask %[[DIM_A0_0]], %[[DIM_A0_1]] : vector<4x[8]xi1>
-// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true]} : tensor<?x?xf32>, vector<4x[8]xf32> } : vector<4x[8]xi1> -> vector<4x[8]xf32>
-// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[MASK_1d:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<4xi1>
-// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_1d]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32>
-// CHECK: %[[REDUCE:.*]] = vector.mask %[[MASK_2d]] { vector.multi_reduction <add>, %[[VEC_RD_0]], %[[VEC_RD_1]] [1] : vector<4x[8]xf32> to vector<4xf32> } : vector<4x[8]xi1> -> vector<4xf32>
-// CHECK: %[[C0_idx:.*]] = arith.constant 0 : index
-// CHECK: %{{.*}} = vector.mask %[[MASK_1d]] { vector.transfer_write %[[REDUCE]], %[[ARG_1]][%[[C0_idx]]] {in_bounds = [true]} : vector<4xf32>, tensor<?xf32> } : vector<4xi1> -> tensor<?xf32>
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [4, [8]] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-func.func @vectorize_dynamic_matvec_trailing_reduction_dim(%arg0: tensor<?x?xf32>,
- %arg1: tensor<?xf32>,
- %arg2: tensor<?xf32>) {
- linalg.matvec ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
- outs(%arg2 : tensor<?xf32>) -> tensor<?xf32>
- return
-}
-
-// CHECK-LABEL: func.func @vectorize_dynamic_matvec_trailing_reduction_dim(
-// CHECK-SAME: %[[ARG_0:.*]]: tensor<?x?xf32>, %[[ARG_1:.*]]: tensor<?xf32>, %[[ARG_2:.*]]: tensor<?xf32>) {
-// CHECK: %[[C0_idx:.*]] = arith.constant 0 : index
-// CHECK: %[[DIM_A0_0:.*]] = tensor.dim %[[ARG_0]], %[[C0_idx]] : tensor<?x?xf32>
-// CHECK: %[[C1_idx:.*]] = arith.constant 1 : index
-// CHECK: %[[DIM_A0_1:.*]] = tensor.dim %[[ARG_0]], %[[C1_idx]] : tensor<?x?xf32>
-// CHECK: %[[C0_idx:.*]] = arith.constant 0 : index
-// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[MASK_2d:.*]] = vector.create_mask %[[DIM_A0_0]], %[[DIM_A0_1]] : vector<4x[4]xi1>
-// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true]} : tensor<?x?xf32>, vector<4x[4]xf32> } : vector<4x[4]xi1> -> vector<4x[4]xf32>
-// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[MASK_d1:.*]] = vector.create_mask %[[DIM_A0_1]] : vector<[4]xi1>
-// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_d1]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true], permutation_map = #map} : tensor<?xf32>, vector<4x[4]xf32> } : vector<[4]xi1> -> vector<4x[4]xf32>
-// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[MASK_d2:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<4xi1>
-// CHECK: %[[VEC_RD_2:.*]] = vector.mask %[[MASK_d2]] { vector.transfer_read %[[ARG_2]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32>
-// CHECK: %[[MUL:.*]] = arith.mulf %[[VEC_RD_0:.*]], %[[VEC_RD_1:.*]] : vector<4x[4]xf32>
-// CHECK: %[[REDUCE:.*]] = vector.mask %[[MASK_2d]] { vector.multi_reduction <add>, %[[MUL]], %[[VEC_RD_2]] [1] : vector<4x[4]xf32> to vector<4xf32> } : vector<4x[4]xi1> -> vector<4xf32>
-// CHECK: %[[C0_idx:.*]] = arith.constant 0 : index
-// CHECK: %{{.*}} = vector.mask %[[MASK_d2]] { vector.transfer_write %[[REDUCE]], %[[ARG_2]][%[[C0_idx]]] {in_bounds = [true]} : vector<4xf32>, tensor<?xf32> } : vector<4xi1> -> tensor<?xf32>
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [4, [4]] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-func.func @vectorize_dynamic_generic_matvec_leading_parallel_dim(%arg0: tensor<?x?xf32>,
- %arg1: tensor<?xf32>,
- %arg2: tensor<?xf32>) -> tensor<?xf32> {
- %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d1)>,
- ...
[truncated]
|
This patch moves scalable vectorization tests into an existing generic vectorization test file: * vectorization-scalable.mlir --> merged into vectorization.mlir Rationale: * Most tests in vectorization-scalable.mlir are variants of existing tests in vectorization.mlir. Keeping them together improves maintainability. * Consolidating tests makes it easier to spot gaps in coverage for regular vectorization. * In the Vector dialect, we don't separate tests for scalable vectors; this change aligns Linalg with that convention. Notable changes beyond moving tests: * Updated one of the two matrix-vector multiplication tests to use `linalg.matvec` instead of `linalg.generic`. CHECK lines remain unchanged. * Simplified the lone `linalg.index` test by removing an unnecessary `tensor.extract`. Also removed canonicalization patterns from the TD sequence for consistency with other tests. This patch contributes to the implementation of llvm#141025 — please refer to that ticket for full context.
0e103f6
to
940cd17
Compare
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: remove one blank line
} | ||
|
||
// CHECK-LABEL: @vectorize_dynamic_identity_scalable |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
optional: I personally don't like a blank line between the end of the function and the start of the check. I don't find any LLVM style guide about it, so I marked it optional. Also, I think different people have different style.
Sharing a rule that I learned from google c++ style guide: The more code that fits on one screen, the easier it is to follow and understand the control flow of the program.
, and I think it can be applied to checks. (https://google.github.io/styleguide/cppguide.html#Vertical_Whitespace)
Note: it already happens on my laptop monitor. E.g., the last line is cut in my laptop monitor in the below vectorize_dynamic_reduction_2d_scalable
test. I'll be able to see the full checks if the blank line is removed.

There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for sharing your perspective! I always appreciate when people explain what works for them - it's genuinely helpful. In this case, I’m just following the existing convention in the file, so I’d prefer not to diverge 😅
Regarding empty lines more generally, I find them useful for separating high-level logic. My reading of the Google C++ Style Guide supports that too:
Use whitespace purposefully to provide separation in that flow.
That said, what's considered “good density” of empty lines can be pretty subjective - definitely feels like personal preference territory. My main point is just that there are reasonable arguments on both sides.
Note: it already happens on my laptop monitor. E.g., the last line is cut in my laptop monitor in the below vectorize_dynamic_reduction_2d_scalable test.
That’s fair. But I also think there are broader factors affecting readability on smaller screens:
- The
CHECK
lines are super wide - manageable on big screens but noisy on laptops, especially with auto-wrap. Perhaps wide-lines should be discouraged? - There’s a lot of repetition in the output (e.g., 3x
arith.constant 0 : index
and 3xarith.constant 0.000000e+00 : f32 lines
). That’s something we’re trying to improve via constant caching: [mlir][linalg] Simplify vectorization test output using-canonicalize -cse
#138265 (see my bottom comments there).
I'll be able to see the full checks if the blank line is removed.
True! Removing the blank line could help fit in one more line of code, which is definitely a positive. Maybe something like this could be added to the MLIR Testing Guide as a tip? (*)
(*) That page doesn’t render well - I’ve been meaning to tweak the layout when I get a moment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The CHECK lines are super wide - manageable on big screens but noisy on laptops, especially with auto-wrap. Perhaps wide-lines should be discouraged?
I was not going to bring up this topic, but I can share what I learned recently:
https://mlir.llvm.org/getting_started/TestingGuide/#filecheck-best-practices
FileCheck tests should be as self-contained as possible and focus on testing the minimal set of functionalities needed.
IMO, people (including me) have added some unnecessary checks to the lit tests. There are several cases. E.g., sometimes I think maybe we should not check some types. E.g., I think we don't really care what the type is for C1_IDX
and DIM_A0_1
. It is hard to make a clear rule, though.
// CHECK: %[[C1_IDX:.*]] = arith.constant 1 : index
// CHECK: %[[DIM_A0_1:.*]] = tensor.dim %[[ARG_0]], %[[C1_IDX]] : tensor<?x?xf32>
We should also request changes for future checks from dumb tools. (Maybe there are good tools that can generate good checks.) Some existing VAL_XXX
checks are very dumb to me, and I appreciate that you spent plenty of time to fix them. :)
For some wide-lines checks, e.g., masking, we may break them into a couple of lines using CHECK-SAME
trick, IMO.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was not going to bring up this topic, but I can share what I learned recently:
https://mlir.llvm.org/getting_started/TestingGuide/#filecheck-best-practices
I try to implement it in all my tests, but I am also discovering that:
- Some MLIR outputs are just inherently long and there is little that can be done about it (some examples here: https://github.com/llvm/llvm-project/blob/main/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir).
minimal set of functionalities
can be hard to identify if you are new to some area. Folks with more experience will know better what's e.g. "trivial and does not require checking".
That said, I totally agree we can and should do better - including me 😅
For some wide-lines checks, e.g., masking, we may break them into a couple of lines using CHECK-SAME trick, IMO.
Yes, I really like this idea!
I think we don't really care what the type is for C1_IDX and DIM_A0_1
We usually don't. However, in many cases the only option is to either add the type (which caries useful info) or some arbitrary index. Basically:
// With type info:
// CHECK: %[[C0_IDX:.*]] = arith.constant 0 : index
(...)
// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32
vs:
// With an index:
// CHECK: %[[C0_0:.*]] = arith.constant 0 : index
(...)
// CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
🤷🏻 In practice, I find that %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
usually represents the padding value, so %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
often works.
That's quite a tangent here 😅 😂
Remove redundant empty line, fix capitalisation
1d50c6c
to
1c58b6e
Compare
) This patch moves scalable vectorization tests into an existing generic vectorization test file: * vectorization-scalable.mlir --> merged into vectorization.mlir Rationale: * Most tests in vectorization-scalable.mlir are variants of existing tests in vectorization.mlir. Keeping them together improves maintainability. * Consolidating tests makes it easier to spot gaps in coverage for regular vectorization. * In the Vector dialect, we don't separate tests for scalable vectors; this change aligns Linalg with that convention. Notable changes beyond moving tests: * Updated one of the two matrix-vector multiplication tests to use `linalg.matvec` instead of `linalg.generic`. CHECK lines remain unchanged. * Simplified the lone `linalg.index` test by removing an unnecessary `tensor.extract`. Also removed canonicalization patterns from the TD sequence for consistency with other tests. This patch contributes to the implementation of llvm#141025 — please refer to that ticket for full context.
This patch moves scalable vectorization tests into an existing generic
vectorization test file:
Rationale:
tests in vectorization.mlir. Keeping them together improves
maintainability.
regular vectorization.
this change aligns Linalg with that convention.
Notable changes beyond moving tests:
linalg.matvec
instead oflinalg.generic
. CHECK lines remainunchanged.
linalg.index
test by removing an unnecessarytensor.extract
. Also removed canonicalization patterns from theTD sequence for consistency with other tests.
This patch contributes to the implementation of #141025 — please refer
to that ticket for full context.