-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[MLIR][XeGPU] refine verifier for TensorDescType #137226
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[MLIR][XeGPU] refine verifier for TensorDescType #137226
Conversation
@llvm/pr-subscribers-mlir-gpu @llvm/pr-subscribers-mlir Author: Chao Chen (chencha3) ChangesThis PR updates the verifier of TensorDescType after the extension of LayoutAttr in #132425. Full diff: https://github.com/llvm/llvm-project/pull/137226.diff 5 Files Affected:
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index d6c51d20571fd..8e2784f40ad39 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -25,12 +25,14 @@ class TensorDescType;
} // namespace xegpu
} // namespace mlir
-#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.h.inc>
#define GET_ATTRDEF_CLASSES
#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.h.inc>
#define GET_TYPEDEF_CLASSES
#include <mlir/Dialect/XeGPU/IR/XeGPUTypes.h.inc>
+
+#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
+
#define GET_OP_CLASSES
#include <mlir/Dialect/XeGPU/IR/XeGPU.h.inc>
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
index fb5a1e6f1db0c..549018b61d6fb 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
@@ -36,6 +36,12 @@ def XeGPU_Dialect : Dialect {
let useDefaultTypePrinterParser = true;
let useDefaultAttributePrinterParser = true;
+
+ let extraClassDeclaration = [{
+ /// Checks if the given shape can be evenly distributed based on the layout
+ /// and data factors provided by the LayoutAttr.
+ static bool isEvenlyDistributable(llvm::ArrayRef<int64_t> shape, xegpu::LayoutAttr attr);
+ }];
}
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index b865b80f0075e..8694d2f950dd9 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/DialectImplementation.h"
@@ -30,6 +31,61 @@ void XeGPUDialect::initialize() {
>();
}
+bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
+ xegpu::LayoutAttr attr) {
+ assert(attr && "Layout attribute is missing.");
+
+ auto getSubShapeOrNull =
+ [&](llvm::ArrayRef<int64_t> shape, DenseI32ArrayAttr layout,
+ DenseI32ArrayAttr data,
+ bool use_rr = true) -> std::optional<SmallVector<int64_t>> {
+ llvm::SmallVector<int64_t> newShape(shape);
+ if (layout) {
+ auto vec = llvm::to_vector_of<int64_t>(layout.asArrayRef());
+ if (vec.size() != shape.size())
+ return std::nullopt;
+ auto ratio = computeShapeRatio(shape, vec);
+ if (!ratio.has_value())
+ return std::nullopt;
+ newShape = ratio.value();
+ }
+
+ if (data) {
+ auto vec = llvm::to_vector_of<int64_t>(data.asArrayRef());
+ if (vec.size() != shape.size())
+ return std::nullopt;
+ auto ratio = computeShapeRatio(newShape, vec);
+ if (!ratio.has_value() && use_rr)
+ ratio = computeShapeRatio(vec, newShape);
+ if (!ratio.has_value())
+ return std::nullopt;
+
+ // if data is not null, we always return it for next phase.
+ newShape = vec;
+ }
+ return newShape;
+ };
+
+ // check the sgLayout and sgData
+ auto maybeSgShape =
+ getSubShapeOrNull(shape, attr.getSgLayout(), attr.getSgData());
+ if (!maybeSgShape)
+ return false;
+ auto sgShape = maybeSgShape.value();
+
+ // check InstData, it neither have layout nor need round-robin
+ auto maybeInstShape =
+ getSubShapeOrNull(sgShape, nullptr, attr.getInstData(), false);
+ if (!maybeInstShape)
+ return false;
+ auto instShape = maybeInstShape.value();
+
+ // check LaneLayout and LaneData
+ auto maybeLaneShape = getSubShapeOrNull(instShape, attr.getLaneLayout(),
+ attr.getLaneData(), false);
+ return maybeLaneShape.has_value();
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_BlockTensorDescAttr
//===----------------------------------------------------------------------===//
@@ -241,7 +297,7 @@ LogicalResult TensorDescType::verify(
llvm::ArrayRef<int64_t> shape, mlir::Type elementType,
mlir::Attribute encoding, mlir::Attribute layout) {
size_t rank = shape.size();
- // Low-pressure types are packed in 32-bit units.
+ // Low-precision types are packed in 32-bit units.
int32_t packingFactor = 32 / elementType.getIntOrFloatBitWidth();
if (rank != 1 && rank != 2)
return emitError() << "expected 1D or 2D tensor";
@@ -268,23 +324,21 @@ LogicalResult TensorDescType::verify(
}
}
- if (auto blockAttr =
- mlir::dyn_cast_if_present<BlockTensorDescAttr>(encoding)) {
+ auto blockAttr = mlir::dyn_cast_if_present<BlockTensorDescAttr>(encoding);
+ if (blockAttr) {
MemorySpaceAttr memorySpaceAttr = blockAttr.getMemorySpace();
if (rank == 2 && memorySpaceAttr &&
memorySpaceAttr.getValue() == MemorySpace::SLM)
return emitError() << "SLM is not supported for 2D block tensor";
}
- if (auto layoutAttr = llvm::dyn_cast_if_present<LayoutAttr>(layout)) {
-
+ auto layoutAttr = llvm::dyn_cast_if_present<LayoutAttr>(layout);
+ if (layoutAttr) {
if (rank != (size_t)layoutAttr.getRank())
return emitError() << "expected layout rank to match tensor rank";
- ArrayRef<int32_t> laneLayout = layoutAttr.getLaneLayout().asArrayRef();
- ArrayRef<int32_t> laneData = layoutAttr.getLaneData().asArrayRef();
-
- if (scatterAttr) {
+ auto laneData = layoutAttr.getLaneData();
+ if (scatterAttr && laneData) {
// Validate subgroup mapping rules for scattered tensors.
// A work-item's slice of the tensor with shape [sg_size] or
// [sg_size, chunk_size] will be [1] or [1, 32/element_ty_bit_width]
@@ -294,20 +348,19 @@ LogicalResult TensorDescType::verify(
if (rank > 1 && laneData[0] != 1)
return emitError()
<< "cannot map over non-contiguous scattered row elements";
- if (laneData.back() != packingFactor)
+ if (laneData[rank - 1] != packingFactor)
return emitError() << "work item data mapping must match the number of "
"contiguous elements";
}
- for (size_t i = 0; i < shape.size(); ++i) {
- uint32_t numElemPerWi = laneLayout[i] * laneData[i];
- if (shape[i] < numElemPerWi || shape[i] % numElemPerWi != 0)
- return emitError() << "cannot distribute " << shape[i] << " over "
- << laneLayout[i] << " work items with "
- << laneData[i] << " elements each";
+ if (!XeGPUDialect::isEvenlyDistributable(shape, layoutAttr)) {
+ std::string shapeStr;
+ llvm::raw_string_ostream stream(shapeStr);
+ llvm::interleaveComma(shape, stream);
+ return emitError() << "cannot distribute [" << shapeStr << "] using "
+ << layoutAttr;
}
}
-
return success();
}
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index e0e25365220b5..f9d7e013826ed 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -73,34 +73,6 @@ static bool isWriteHintOrNone(const CachePolicyAttr &attr) {
kind == CachePolicy::WRITE_BACK || kind == CachePolicy::WRITE_THROUGH;
}
-// Checks if the given shape is evenly distributed based on the layout
-// and data factors provided by the LayoutAttr. The function ensures that
-// each dimension of the shape can be evenly divided by the corresponding
-// data factor, and the resulting quotient can be evenly divided by the
-// layout factor. Returns `true` if the shape is evenly distributed,
-// otherwise `false`.
-static bool isEvenDistributed(llvm::ArrayRef<int64_t> shape,
- xegpu::LayoutAttr attr) {
- assert(attr && "Layout attribute is missing.");
- llvm::SmallVector<int32_t> defaults(shape.size(), 1);
- llvm::ArrayRef<int32_t> layout, data;
- if (auto sg_layout = attr.getSgLayout()) {
- layout = sg_layout.asArrayRef();
- auto sg_data = attr.getSgData();
- data = sg_data ? sg_data.asArrayRef() : defaults;
- } else {
- layout = attr.getLaneLayout().asArrayRef();
- auto lane_data = attr.getLaneData();
- data = lane_data ? lane_data.asArrayRef() : defaults;
- }
- for (auto [dimSize, dataFactor, layoutFactor] :
- llvm::zip_equal(shape, data, layout)) {
- if (dimSize % dataFactor != 0 || (dimSize / dataFactor) % layoutFactor != 0)
- return false;
- }
- return true;
-}
-
static LogicalResult
isValidGatherScatterParams(Type maskTy, VectorType valueTy,
TensorDescType tdescTy, UnitAttr transposeAttr,
@@ -685,10 +657,10 @@ LogicalResult ConvertLayoutOp::verify() {
"expected srcMap and resMap be WgLayout or SgLayout at the same time.");
auto shape = getSource().getType().getShape();
- if (!isEvenDistributed(shape, srcMap))
+ if (!XeGPUDialect::isEvenlyDistributable(shape, srcMap))
return emitOpError("invalid srcMap, data cannot be evenly distributed.");
- if (!isEvenDistributed(shape, resMap))
+ if (!XeGPUDialect::isEvenlyDistributable(shape, resMap))
return emitOpError("invalid resMap, data cannot be evenly distributed.");
return mlir::success();
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
index 67ed89e11b4c9..2fd4d6280649c 100644
--- a/mlir/test/Dialect/XeGPU/invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -404,7 +404,7 @@ func.func @tensor_desc_1D_invalid_map_data(%src: memref<24x32xf32>) {
// -----
func.func @tensor_desc_invalid_map_layout(%src: memref<24x32xf32>) {
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
- // expected-error@+1 {{cannot distribute 8 over 16 work items with 1 elements each}}
+ // expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}}
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
return
}
@@ -412,7 +412,7 @@ func.func @tensor_desc_invalid_map_layout(%src: memref<24x32xf32>) {
// -----
func.func @tensor_desc_invalid_map_layout_1(%src: memref<24x32xf32>) {
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
- // expected-error@+1 {{cannot distribute 4 over 8 work items with 1 elements each}}
+ // expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 1]>}}
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 1]>>
return
}
@@ -420,7 +420,7 @@ func.func @tensor_desc_invalid_map_layout_1(%src: memref<24x32xf32>) {
// -----
func.func @tensor_desc_invalid_map_data(%src: memref<24x32xf32>) {
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
- // expected-error@+1 {{cannot distribute 4 over 2 work items with 4 elements each}}
+ // expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [2, 8], lane_data = [4, 1]>}}
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [2, 8], lane_data = [4, 1]>>
return
}
@@ -428,7 +428,7 @@ func.func @tensor_desc_invalid_map_data(%src: memref<24x32xf32>) {
// -----
func.func @tensor_desc_invalid_map_data_1(%src: memref<24x32xf32>) {
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
- // expected-error@+1 {{cannot distribute 4 over 8 work items with 1 elements each}}
+ // expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 2]>}}
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 2]>>
return
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
let extraClassDeclaration = [{ | ||
/// Checks if the given shape can be evenly distributed based on the layout | ||
/// and data factors provided by the LayoutAttr. | ||
static bool isEvenlyDistributable(llvm::ArrayRef<int64_t> shape, xegpu::LayoutAttr attr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can be moved to XeGPU/Utils after #135271
xegpu::LayoutAttr attr) { | ||
assert(attr && "Layout attribute is missing."); | ||
|
||
auto getSubShapeOrNull = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would rename this function. to smth like tryDistribute
because it works on both sg_layout/data and lane_layout/data.
auto vec = llvm::to_vector_of<int64_t>(layout.asArrayRef()); | ||
if (vec.size() != shape.size()) | ||
return std::nullopt; | ||
auto ratio = computeShapeRatio(shape, vec); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe tryComputeShapeRatio
here?
if (vec.size() != shape.size()) | ||
return std::nullopt; | ||
auto ratio = computeShapeRatio(newShape, vec); | ||
if (!ratio.has_value() && use_rr) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please document what use_rr mean?
This PR updates the verifier of TensorDescType after the extension of LayoutAttr in #132425.