llvm · charithaintc · Mar 18, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -322,21 +322,22 @@ LogicalResult TensorDescType::verify(
 // ---------------------------------------------------------------------
 // Case 1: Regular loads/stores.
 // ---------------------------------------------------------------------
-// Distributed vector shape must be:
-//        [chunk_size / lane_data_size, lane_data_size]
-// If the tensor descriptor shape is 1D, first dimension is ignored (set to 1).
-//        [lane_data_size]
+// The following conditions must be met:
+//        * tensor_desc[0] == lane_layout[0]
+// Distributed vector is a 1D vector with shape:
+//        [chunk_size]
 // ---------------------------------------------------------------------
 // Case 2: Block loads/stores
 // ---------------------------------------------------------------------
 // Additional definitions:
 //        tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length
 //        n_distribution_units = tensor_size / distribution_unit_size
+//        fragment_size = n_distribution_units * lane_data_size
 // Given above definitions, the following conditions must be met:
 //        * tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
 //        * tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0
-// Distributed vector shape must be:
-//        [n_distribution_units, lane_data_size]
+// Distributed vector is a 1D vector with shape:
+//        [fragment_size]
 FailureOr<VectorType> TensorDescType::getDistributedVectorType() {
   auto layout = llvm::dyn_cast_if_present<LayoutAttr>(getLayout());
   // It only works for subgroup level layout, which only has lane_layout

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -295,7 +295,6 @@ LogicalResult LoadNdOp::verify() {
   }
 
   // Check SIMD mode.
-  // adjusted tensor descriptor shape tracks the expected shape of the result.
   auto tdescShape = getShapeOf(tdescTy);
   auto valueShape = getShapeOf(valueTy);
 
@@ -547,38 +546,27 @@ LogicalResult LoadGatherOp::verify() {
     return emitOpError("dim-0 of the Mask and TensorDesc should be the same.");
 
   auto chunkSize = tdescTy.getChunkSize();
-  // for SIMT code, the value should be 1D vector with size of chunkSize.
-  if (valueTy.getRank() == 1 && valueTy.getNumElements() != tdescShape[0]) {
-    if (valueTy.getNumElements() != chunkSize) {
+
+  // a valid shape for SIMT case
+  if (valueTy.getRank() == 1 && valueTy.getNumElements() == chunkSize) {
+    if (tdescTy.getLayoutAttr())
       return emitOpError()
-             << "Result shape " << makeString(valueShape)
-             << " is not a valid distribution for tensor descriptor "
-             << tdescTy;
-    } else { // valid SIMT code doesn't need LayoutAttr and TransposeAttr.
-      if (tdescTy.getLayoutAttr())
-        return emitOpError()
-               << "TensorDesc doesn't need LayoutAttr for SIMT code";
-      if (getTransposeAttr())
-        return emitOpError() << "doesn't need TransposeAttr for SIMT code";
-    }
-    return success();
-  } else if (valueTy.getRank() == 1 && tdescShape[0] == chunkSize) {
-    // for 1D vector and valueTy.getNumElements() == tdescShape[0] case,
-    // it is a valid SIMT code if chunkSize happens to be the same as
-    // subgroup size, e.g., tensor_desc<16x16xf16, chunkSize = 16>
+             << "TensorDesc doesn't need LayoutAttr for SIMT code";
+    if (getTransposeAttr())
+      return emitOpError() << "doesn't need TransposeAttr for SIMT code";
     return success();
   }
 
-  // For SIMD code verification.
-  if (tdescTy.getRank() == 2) {
+  if (tdescTy.getRank() == 2 && valueTy.getRank() == 2) {
     if (!getTransposeAttr())
       return emitOpError("load of rank-2 tensor has to be transposed.");
     transpose({1, 0}, tdescShape);
   }
 
   if (tdescShape != valueShape)
     return emitOpError() << "Result shape " << makeString(valueShape)
-                         << " is not consistent with tensor descriptor "
+                         << " is neither a valid distribution for SIMT nor "
+                            "consistent with the tensor descriptor for SIMD "
                          << tdescTy;
   return success();
 }
@@ -613,38 +601,27 @@ LogicalResult StoreScatterOp::verify() {
     return emitOpError("dim-0 of the Mask and TensorDesc should be the same.");
 
   auto chunkSize = tdescTy.getChunkSize();
-  // for SIMT code, the value should be 1D vector with size of chunkSize.
-  if (valueTy.getRank() == 1 && valueTy.getNumElements() != tdescShape[0]) {
-    if (valueTy.getNumElements() != chunkSize) {
+
+  // a valid shape for SIMT case
+  if (valueTy.getRank() == 1 && valueTy.getNumElements() == chunkSize) {
+    if (tdescTy.getLayoutAttr())
       return emitOpError()
-             << "Value shape " << makeString(valueShape)
-             << " is not a valid distribution for tensor descriptor "
-             << tdescTy;
-    } else { // valid SIMT code doesn't need LayoutAttr and TransposeAttr.
-      if (tdescTy.getLayoutAttr())
-        return emitOpError()
-               << "TensorDesc doesn't need LayoutAttr for SIMT code";
-      if (getTransposeAttr())
-        return emitOpError() << "doesn't need TransposeAttr for SIMT code";
-    }
-    return success();
-  } else if (valueTy.getRank() == 1 && tdescShape[0] == chunkSize) {
-    // for 1D vector and valueTy.getNumElements() == tdescShape[0] case,
-    // it is a valid SIMT code if chunkSize happens to be the same as
-    // subgroup size, e.g., tensor_desc<16x16xf16, chunkSize = 16>
+             << "TensorDesc doesn't need LayoutAttr for SIMT code";
+    if (getTransposeAttr())
+      return emitOpError() << "doesn't need TransposeAttr for SIMT code";
     return success();
   }
 
-  // for SIMD code verification.
-  if (tdescTy.getRank() == 2) {
+  if (tdescTy.getRank() == 2 && valueTy.getRank() == 2) {
     if (!getTransposeAttr())
       return emitOpError("Store of a rank-2 tensor has to be transposed.");
     transpose({1, 0}, tdescShape);
   }
 
   if (tdescShape != valueShape)
     return emitOpError() << "Value shape " << makeString(valueShape)
-                         << " is not consistent with tensor descriptor "
+                         << " is neither a valid distribution for SIMT nor "
+                            "consistent with the tensor descriptor for SIMD "
                          << tdescTy;
 
   return success();

diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -255,7 +255,7 @@ func.func @test_load_gather_simt_1(%src: ui64) {
   %0 = arith.constant dense<1>: vector<4xi1>
   %cst = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
   %1 = xegpu.create_tdesc %src, %cst : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2>>
-  // expected-error@+1 {{Result shape [6] is not a valid distribution for tensor descriptor}}
+  // expected-error@+1 {{Result shape [6] is neither a valid distribution for SIMT nor consistent with the tensor descriptor for SIMD}}
   %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint<cached>}> : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2>>, vector<4xi1> -> vector<6xf32>
   return
 }
@@ -266,7 +266,7 @@ func.func @test_store_scatter_simt_1(%src: ui64) {
   %cst = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
   %val = arith.constant dense<2.9>: vector<6xf32>
   %1 = xegpu.create_tdesc %src, %cst : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2>>
-  // expected-error@+1 {{Value shape [6] is not a valid distribution for tensor descriptor}}
+  // expected-error@+1 {{Value shape [6] is neither a valid distribution for SIMT nor consistent with the tensor descriptor for SIMD}}
   xegpu.store %val, %1, %0 <{l1_hint = #xegpu.cache_hint<cached>}> : vector<6xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2>>, vector<4xi1>
   return
 }