-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[mlir][gpu] Make offset and width in gpu.rotate as attributes #150901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
`offset` and `width` must be constants and there are constraints on their values. Update the operation definition to use attributes instead of operands.
|
@llvm/pr-subscribers-mlir-spirv @llvm/pr-subscribers-mlir-gpu Author: Hsiangkai Wang (Hsiangkai) Changes
Full diff: https://github.com/llvm/llvm-project/pull/150901.diff 6 Files Affected:
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 1dbaf5db7b618..170616f03be2c 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1368,12 +1368,12 @@ def GPU_ShuffleOp : GPU_Op<
def GPU_RotateOp : GPU_Op<
"rotate", [Pure, AllTypesMatch<["value", "rotateResult"]>]>,
- Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32:$offset, I32:$width)>,
+ Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32Attr:$offset, I32Attr:$width)>,
Results<(outs AnyIntegerOrFloatOr1DVector:$rotateResult, I1:$valid)> {
let summary = "Rotate values within a subgroup.";
let description = [{
The "rotate" op moves values across lanes in a subgroup (a.k.a., local
- invocations) within the same subgroup. The `width` argument specifies the
+ invocations) within the same subgroup. The `width` attribute specifies the
number of lanes that participate in the rotation, and must be uniform across
all participating lanes. Further, the first `width` lanes of the subgroup
must be active.
@@ -1394,9 +1394,7 @@ def GPU_RotateOp : GPU_Op<
example:
```mlir
- %offset = arith.constant 1 : i32
- %width = arith.constant 16 : i32
- %1, %2 = gpu.rotate %0, %offset, %width : f32
+ %1, %2 = gpu.rotate %0, 1, 16 : f32
```
For lane `k`, returns the value from lane `(k + cst1) % width`.
@@ -1406,11 +1404,6 @@ def GPU_RotateOp : GPU_Op<
$value `,` $offset `,` $width attr-dict `:` type($value)
}];
- let builders = [
- // Helper function that creates a rotate with constant offset/width.
- OpBuilder<(ins "Value":$value, "int32_t":$offset, "int32_t":$width)>
- ];
-
let hasVerifier = 1;
}
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
index 75e65632b0cb7..a156fdd3d383e 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
@@ -507,25 +507,25 @@ LogicalResult GPURotateConversion::matchAndRewrite(
getTypeConverter<SPIRVTypeConverter>()->getTargetEnv();
unsigned subgroupSize =
targetEnv.getAttr().getResourceLimits().getSubgroupSize();
- IntegerAttr widthAttr;
- if (!matchPattern(rotateOp.getWidth(), m_Constant(&widthAttr)) ||
- widthAttr.getValue().getZExtValue() > subgroupSize)
+ unsigned width = rotateOp.getWidth();
+ if (width > subgroupSize)
return rewriter.notifyMatchFailure(
- rotateOp,
- "rotate width is not a constant or larger than target subgroup size");
+ rotateOp, "rotate width is larger than target subgroup size");
Location loc = rotateOp.getLoc();
auto scope = rewriter.getAttr<spirv::ScopeAttr>(spirv::Scope::Subgroup);
+ Value offsetVal = arith::ConstantOp::create(rewriter, loc, adaptor.getOffsetAttr());
+ Value widthVal = arith::ConstantOp::create(rewriter, loc, adaptor.getWidthAttr());
Value rotateResult = spirv::GroupNonUniformRotateKHROp::create(
- rewriter, loc, scope, adaptor.getValue(), adaptor.getOffset(),
- adaptor.getWidth());
+ rewriter, loc, scope, adaptor.getValue(), offsetVal, widthVal);
Value validVal;
- if (widthAttr.getValue().getZExtValue() == subgroupSize) {
+ if (width == subgroupSize) {
validVal = spirv::ConstantOp::getOne(rewriter.getI1Type(), loc, rewriter);
} else {
+ IntegerAttr widthAttr = adaptor.getWidthAttr();
Value laneId = gpu::LaneIdOp::create(rewriter, loc, widthAttr);
validVal = arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::ult,
- laneId, adaptor.getWidth());
+ laneId, widthVal);
}
rewriter.replaceOp(rotateOp, {rotateResult, validVal});
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index d186a480c0ce5..abd2ba5680471 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -1395,40 +1395,15 @@ void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value,
// RotateOp
//===----------------------------------------------------------------------===//
-void RotateOp::build(OpBuilder &builder, OperationState &result, Value value,
- int32_t offset, int32_t width) {
- build(builder, result, value,
- arith::ConstantOp::create(builder, result.location,
- builder.getI32IntegerAttr(offset)),
- arith::ConstantOp::create(builder, result.location,
- builder.getI32IntegerAttr(width)));
-}
-
LogicalResult RotateOp::verify() {
- auto offsetConstOp = getOffset().getDefiningOp<arith::ConstantOp>();
- if (!offsetConstOp)
- return emitOpError() << "offset is not a constant value";
-
- auto offsetIntAttr =
- llvm::dyn_cast<mlir::IntegerAttr>(offsetConstOp.getValue());
-
- auto widthConstOp = getWidth().getDefiningOp<arith::ConstantOp>();
- if (!widthConstOp)
- return emitOpError() << "width is not a constant value";
-
- auto widthIntAttr =
- llvm::dyn_cast<mlir::IntegerAttr>(widthConstOp.getValue());
-
- llvm::APInt offsetValue = offsetIntAttr.getValue();
- llvm::APInt widthValue = widthIntAttr.getValue();
+ uint32_t offset = getOffset();
+ uint32_t width = getWidth();
- if (!widthValue.isPowerOf2())
+ if (!llvm::isPowerOf2_32(width))
return emitOpError() << "width must be a power of two";
- if (offsetValue.sge(widthValue) || offsetValue.slt(0)) {
- int64_t widthValueInt = widthValue.getSExtValue();
- return emitOpError() << "offset must be in the range [0, " << widthValueInt
- << ")";
+ if (offset >= width) {
+ return emitOpError() << "offset must be in the range [0, " << width << ")";
}
return success();
diff --git a/mlir/test/Conversion/GPUToSPIRV/rotate.mlir b/mlir/test/Conversion/GPUToSPIRV/rotate.mlir
index b96dd37219b46..c71d22017698a 100644
--- a/mlir/test/Conversion/GPUToSPIRV/rotate.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/rotate.mlir
@@ -10,16 +10,14 @@ gpu.module @kernels {
// CHECK-LABEL: spirv.func @rotate()
gpu.func @rotate() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %width = arith.constant 16 : i32
%val = arith.constant 42.0 : f32
+ // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
// CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32
- // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR <Subgroup> %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32
// CHECK: %{{.+}} = spirv.Constant true
- %result, %valid = gpu.rotate %val, %offset, %width : f32
+ %result, %valid = gpu.rotate %val, 4, 16 : f32
gpu.return
}
}
@@ -38,18 +36,16 @@ gpu.module @kernels {
// CHECK-LABEL: spirv.func @rotate_width_less_than_subgroup_size()
gpu.func @rotate_width_less_than_subgroup_size() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %width = arith.constant 8 : i32
%val = arith.constant 42.0 : f32
+ // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
// CHECK: %[[WIDTH:.+]] = spirv.Constant 8 : i32
- // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR <Subgroup> %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32
// CHECK: %[[INVOCATION_ID_ADDR:.+]] = spirv.mlir.addressof @__builtin__SubgroupLocalInvocationId__
// CHECK: %[[INVOCATION_ID:.+]] = spirv.Load "Input" %[[INVOCATION_ID_ADDR]]
// CHECK: %{{.+}} = spirv.ULessThan %[[INVOCATION_ID]], %[[WIDTH]]
- %result, %valid = gpu.rotate %val, %offset, %width : f32
+ %result, %valid = gpu.rotate %val, 4, 8 : f32
gpu.return
}
}
@@ -67,34 +63,10 @@ module attributes {
gpu.module @kernels {
gpu.func @rotate_with_bigger_than_subgroup_size() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %width = arith.constant 32 : i32
%val = arith.constant 42.0 : f32
// expected-error @+1 {{failed to legalize operation 'gpu.rotate'}}
- %result, %valid = gpu.rotate %val, %offset, %width : f32
- gpu.return
- }
-}
-
-}
-
-// -----
-
-module attributes {
- gpu.container_module,
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformRotateKHR], []>,
- #spirv.resource_limits<subgroup_size = 16>>
-} {
-
-gpu.module @kernels {
- gpu.func @rotate_non_const_width(%width: i32) kernel
- attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %val = arith.constant 42.0 : f32
-
- // expected-error @+1 {{'gpu.rotate' op width is not a constant value}}
- %result, %valid = gpu.rotate %val, %offset, %width : f32
+ %result, %valid = gpu.rotate %val, 4, 32 : f32
gpu.return
}
}
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 162ff0662e91e..0d2dc6fa34b02 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -479,20 +479,16 @@ func.func @shuffle_unsupported_type_vec(%arg0 : vector<[4]xf32>, %arg1 : i32, %a
// -----
func.func @rotate_mismatching_type(%arg0 : f32) {
- %offset = arith.constant 4 : i32
- %width = arith.constant 16 : i32
// expected-error@+1 {{op failed to verify that all of {value, rotateResult} have same type}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (i32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 16 : i32 } : (f32) -> (i32, i1)
return
}
// -----
func.func @rotate_unsupported_type(%arg0 : index) {
- %offset = arith.constant 4 : i32
- %width = arith.constant 16 : i32
// expected-error@+1 {{op operand #0 must be Integer or Float or fixed-length vector of Integer or Float values of ranks 1, but got 'index'}}
- %rotate, %valid = gpu.rotate %arg0, %offset, %width : index
+ %rotate, %valid = gpu.rotate %arg0, 4, 16 : index
return
}
@@ -502,55 +498,31 @@ func.func @rotate_unsupported_type_vec(%arg0 : vector<[4]xf32>) {
%offset = arith.constant 4 : i32
%width = arith.constant 16 : i32
// expected-error@+1 {{op operand #0 must be Integer or Float or fixed-length vector of Integer or Float values of ranks 1, but got 'vector<[4]xf32>'}}
- %rotate, %valid = gpu.rotate %arg0, %offset, %width : vector<[4]xf32>
+ %rotate, %valid = gpu.rotate %arg0, 4, 16 : vector<[4]xf32>
return
}
// -----
func.func @rotate_unsupported_width(%arg0 : f32) {
- %offset = arith.constant 4 : i32
- %width = arith.constant 15 : i32
// expected-error@+1 {{op width must be a power of two}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 15 : i32 } : (f32) -> (f32, i1)
return
}
// -----
func.func @rotate_unsupported_offset(%arg0 : f32) {
- %offset = arith.constant 16 : i32
- %width = arith.constant 16 : i32
// expected-error@+1 {{op offset must be in the range [0, 16)}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = 16 : i32, width = 16 : i32 }: (f32) -> (f32, i1)
return
}
// -----
func.func @rotate_unsupported_offset_minus(%arg0 : f32) {
- %offset = arith.constant -1 : i32
- %width = arith.constant 16 : i32
// expected-error@+1 {{op offset must be in the range [0, 16)}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
- return
-}
-
-// -----
-
-func.func @rotate_offset_non_constant(%arg0 : f32, %offset : i32) {
- %width = arith.constant 16 : i32
- // expected-error@+1 {{op offset is not a constant value}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
- return
-}
-
-// -----
-
-func.func @rotate_width_non_constant(%arg0 : f32, %width : i32) {
- %offset = arith.constant 0 : i32
- // expected-error@+1 {{op width is not a constant value}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = -1 : i32, width = 16 : i32 } : (f32) -> (f32, i1)
return
}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 2aef80f73feb3..ee1fdfa4d02f0 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -140,9 +140,8 @@ module attributes {gpu.container_module} {
// CHECK: gpu.shuffle idx %{{.*}}, %{{.*}}, %{{.*}} : f32
%shfl3, %pred3 = gpu.shuffle idx %arg0, %offset, %width : f32
- // CHECK: gpu.rotate %{{.*}}, %{{.*}}, %{{.*}} : f32
- %rotate_width = arith.constant 16 : i32
- %rotate, %pred4 = gpu.rotate %arg0, %offset, %rotate_width : f32
+ // CHECK: gpu.rotate %{{.*}}, 3, 16 : f32
+ %rotate, %pred4 = gpu.rotate %arg0, 3, 16 : f32
"gpu.barrier"() : () -> ()
|
|
@llvm/pr-subscribers-mlir Author: Hsiangkai Wang (Hsiangkai) Changes
Full diff: https://github.com/llvm/llvm-project/pull/150901.diff 6 Files Affected:
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 1dbaf5db7b618..170616f03be2c 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1368,12 +1368,12 @@ def GPU_ShuffleOp : GPU_Op<
def GPU_RotateOp : GPU_Op<
"rotate", [Pure, AllTypesMatch<["value", "rotateResult"]>]>,
- Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32:$offset, I32:$width)>,
+ Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32Attr:$offset, I32Attr:$width)>,
Results<(outs AnyIntegerOrFloatOr1DVector:$rotateResult, I1:$valid)> {
let summary = "Rotate values within a subgroup.";
let description = [{
The "rotate" op moves values across lanes in a subgroup (a.k.a., local
- invocations) within the same subgroup. The `width` argument specifies the
+ invocations) within the same subgroup. The `width` attribute specifies the
number of lanes that participate in the rotation, and must be uniform across
all participating lanes. Further, the first `width` lanes of the subgroup
must be active.
@@ -1394,9 +1394,7 @@ def GPU_RotateOp : GPU_Op<
example:
```mlir
- %offset = arith.constant 1 : i32
- %width = arith.constant 16 : i32
- %1, %2 = gpu.rotate %0, %offset, %width : f32
+ %1, %2 = gpu.rotate %0, 1, 16 : f32
```
For lane `k`, returns the value from lane `(k + cst1) % width`.
@@ -1406,11 +1404,6 @@ def GPU_RotateOp : GPU_Op<
$value `,` $offset `,` $width attr-dict `:` type($value)
}];
- let builders = [
- // Helper function that creates a rotate with constant offset/width.
- OpBuilder<(ins "Value":$value, "int32_t":$offset, "int32_t":$width)>
- ];
-
let hasVerifier = 1;
}
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
index 75e65632b0cb7..a156fdd3d383e 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
@@ -507,25 +507,25 @@ LogicalResult GPURotateConversion::matchAndRewrite(
getTypeConverter<SPIRVTypeConverter>()->getTargetEnv();
unsigned subgroupSize =
targetEnv.getAttr().getResourceLimits().getSubgroupSize();
- IntegerAttr widthAttr;
- if (!matchPattern(rotateOp.getWidth(), m_Constant(&widthAttr)) ||
- widthAttr.getValue().getZExtValue() > subgroupSize)
+ unsigned width = rotateOp.getWidth();
+ if (width > subgroupSize)
return rewriter.notifyMatchFailure(
- rotateOp,
- "rotate width is not a constant or larger than target subgroup size");
+ rotateOp, "rotate width is larger than target subgroup size");
Location loc = rotateOp.getLoc();
auto scope = rewriter.getAttr<spirv::ScopeAttr>(spirv::Scope::Subgroup);
+ Value offsetVal = arith::ConstantOp::create(rewriter, loc, adaptor.getOffsetAttr());
+ Value widthVal = arith::ConstantOp::create(rewriter, loc, adaptor.getWidthAttr());
Value rotateResult = spirv::GroupNonUniformRotateKHROp::create(
- rewriter, loc, scope, adaptor.getValue(), adaptor.getOffset(),
- adaptor.getWidth());
+ rewriter, loc, scope, adaptor.getValue(), offsetVal, widthVal);
Value validVal;
- if (widthAttr.getValue().getZExtValue() == subgroupSize) {
+ if (width == subgroupSize) {
validVal = spirv::ConstantOp::getOne(rewriter.getI1Type(), loc, rewriter);
} else {
+ IntegerAttr widthAttr = adaptor.getWidthAttr();
Value laneId = gpu::LaneIdOp::create(rewriter, loc, widthAttr);
validVal = arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::ult,
- laneId, adaptor.getWidth());
+ laneId, widthVal);
}
rewriter.replaceOp(rotateOp, {rotateResult, validVal});
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index d186a480c0ce5..abd2ba5680471 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -1395,40 +1395,15 @@ void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value,
// RotateOp
//===----------------------------------------------------------------------===//
-void RotateOp::build(OpBuilder &builder, OperationState &result, Value value,
- int32_t offset, int32_t width) {
- build(builder, result, value,
- arith::ConstantOp::create(builder, result.location,
- builder.getI32IntegerAttr(offset)),
- arith::ConstantOp::create(builder, result.location,
- builder.getI32IntegerAttr(width)));
-}
-
LogicalResult RotateOp::verify() {
- auto offsetConstOp = getOffset().getDefiningOp<arith::ConstantOp>();
- if (!offsetConstOp)
- return emitOpError() << "offset is not a constant value";
-
- auto offsetIntAttr =
- llvm::dyn_cast<mlir::IntegerAttr>(offsetConstOp.getValue());
-
- auto widthConstOp = getWidth().getDefiningOp<arith::ConstantOp>();
- if (!widthConstOp)
- return emitOpError() << "width is not a constant value";
-
- auto widthIntAttr =
- llvm::dyn_cast<mlir::IntegerAttr>(widthConstOp.getValue());
-
- llvm::APInt offsetValue = offsetIntAttr.getValue();
- llvm::APInt widthValue = widthIntAttr.getValue();
+ uint32_t offset = getOffset();
+ uint32_t width = getWidth();
- if (!widthValue.isPowerOf2())
+ if (!llvm::isPowerOf2_32(width))
return emitOpError() << "width must be a power of two";
- if (offsetValue.sge(widthValue) || offsetValue.slt(0)) {
- int64_t widthValueInt = widthValue.getSExtValue();
- return emitOpError() << "offset must be in the range [0, " << widthValueInt
- << ")";
+ if (offset >= width) {
+ return emitOpError() << "offset must be in the range [0, " << width << ")";
}
return success();
diff --git a/mlir/test/Conversion/GPUToSPIRV/rotate.mlir b/mlir/test/Conversion/GPUToSPIRV/rotate.mlir
index b96dd37219b46..c71d22017698a 100644
--- a/mlir/test/Conversion/GPUToSPIRV/rotate.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/rotate.mlir
@@ -10,16 +10,14 @@ gpu.module @kernels {
// CHECK-LABEL: spirv.func @rotate()
gpu.func @rotate() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %width = arith.constant 16 : i32
%val = arith.constant 42.0 : f32
+ // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
// CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32
- // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR <Subgroup> %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32
// CHECK: %{{.+}} = spirv.Constant true
- %result, %valid = gpu.rotate %val, %offset, %width : f32
+ %result, %valid = gpu.rotate %val, 4, 16 : f32
gpu.return
}
}
@@ -38,18 +36,16 @@ gpu.module @kernels {
// CHECK-LABEL: spirv.func @rotate_width_less_than_subgroup_size()
gpu.func @rotate_width_less_than_subgroup_size() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %width = arith.constant 8 : i32
%val = arith.constant 42.0 : f32
+ // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
// CHECK: %[[WIDTH:.+]] = spirv.Constant 8 : i32
- // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR <Subgroup> %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32
// CHECK: %[[INVOCATION_ID_ADDR:.+]] = spirv.mlir.addressof @__builtin__SubgroupLocalInvocationId__
// CHECK: %[[INVOCATION_ID:.+]] = spirv.Load "Input" %[[INVOCATION_ID_ADDR]]
// CHECK: %{{.+}} = spirv.ULessThan %[[INVOCATION_ID]], %[[WIDTH]]
- %result, %valid = gpu.rotate %val, %offset, %width : f32
+ %result, %valid = gpu.rotate %val, 4, 8 : f32
gpu.return
}
}
@@ -67,34 +63,10 @@ module attributes {
gpu.module @kernels {
gpu.func @rotate_with_bigger_than_subgroup_size() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %width = arith.constant 32 : i32
%val = arith.constant 42.0 : f32
// expected-error @+1 {{failed to legalize operation 'gpu.rotate'}}
- %result, %valid = gpu.rotate %val, %offset, %width : f32
- gpu.return
- }
-}
-
-}
-
-// -----
-
-module attributes {
- gpu.container_module,
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformRotateKHR], []>,
- #spirv.resource_limits<subgroup_size = 16>>
-} {
-
-gpu.module @kernels {
- gpu.func @rotate_non_const_width(%width: i32) kernel
- attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %val = arith.constant 42.0 : f32
-
- // expected-error @+1 {{'gpu.rotate' op width is not a constant value}}
- %result, %valid = gpu.rotate %val, %offset, %width : f32
+ %result, %valid = gpu.rotate %val, 4, 32 : f32
gpu.return
}
}
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 162ff0662e91e..0d2dc6fa34b02 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -479,20 +479,16 @@ func.func @shuffle_unsupported_type_vec(%arg0 : vector<[4]xf32>, %arg1 : i32, %a
// -----
func.func @rotate_mismatching_type(%arg0 : f32) {
- %offset = arith.constant 4 : i32
- %width = arith.constant 16 : i32
// expected-error@+1 {{op failed to verify that all of {value, rotateResult} have same type}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (i32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 16 : i32 } : (f32) -> (i32, i1)
return
}
// -----
func.func @rotate_unsupported_type(%arg0 : index) {
- %offset = arith.constant 4 : i32
- %width = arith.constant 16 : i32
// expected-error@+1 {{op operand #0 must be Integer or Float or fixed-length vector of Integer or Float values of ranks 1, but got 'index'}}
- %rotate, %valid = gpu.rotate %arg0, %offset, %width : index
+ %rotate, %valid = gpu.rotate %arg0, 4, 16 : index
return
}
@@ -502,55 +498,31 @@ func.func @rotate_unsupported_type_vec(%arg0 : vector<[4]xf32>) {
%offset = arith.constant 4 : i32
%width = arith.constant 16 : i32
// expected-error@+1 {{op operand #0 must be Integer or Float or fixed-length vector of Integer or Float values of ranks 1, but got 'vector<[4]xf32>'}}
- %rotate, %valid = gpu.rotate %arg0, %offset, %width : vector<[4]xf32>
+ %rotate, %valid = gpu.rotate %arg0, 4, 16 : vector<[4]xf32>
return
}
// -----
func.func @rotate_unsupported_width(%arg0 : f32) {
- %offset = arith.constant 4 : i32
- %width = arith.constant 15 : i32
// expected-error@+1 {{op width must be a power of two}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 15 : i32 } : (f32) -> (f32, i1)
return
}
// -----
func.func @rotate_unsupported_offset(%arg0 : f32) {
- %offset = arith.constant 16 : i32
- %width = arith.constant 16 : i32
// expected-error@+1 {{op offset must be in the range [0, 16)}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = 16 : i32, width = 16 : i32 }: (f32) -> (f32, i1)
return
}
// -----
func.func @rotate_unsupported_offset_minus(%arg0 : f32) {
- %offset = arith.constant -1 : i32
- %width = arith.constant 16 : i32
// expected-error@+1 {{op offset must be in the range [0, 16)}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
- return
-}
-
-// -----
-
-func.func @rotate_offset_non_constant(%arg0 : f32, %offset : i32) {
- %width = arith.constant 16 : i32
- // expected-error@+1 {{op offset is not a constant value}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
- return
-}
-
-// -----
-
-func.func @rotate_width_non_constant(%arg0 : f32, %width : i32) {
- %offset = arith.constant 0 : i32
- // expected-error@+1 {{op width is not a constant value}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = -1 : i32, width = 16 : i32 } : (f32) -> (f32, i1)
return
}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 2aef80f73feb3..ee1fdfa4d02f0 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -140,9 +140,8 @@ module attributes {gpu.container_module} {
// CHECK: gpu.shuffle idx %{{.*}}, %{{.*}}, %{{.*}} : f32
%shfl3, %pred3 = gpu.shuffle idx %arg0, %offset, %width : f32
- // CHECK: gpu.rotate %{{.*}}, %{{.*}}, %{{.*}} : f32
- %rotate_width = arith.constant 16 : i32
- %rotate, %pred4 = gpu.rotate %arg0, %offset, %rotate_width : f32
+ // CHECK: gpu.rotate %{{.*}}, 3, 16 : f32
+ %rotate, %pred4 = gpu.rotate %arg0, 3, 16 : f32
"gpu.barrier"() : () -> ()
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
IgWod-IMG
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible to use ConfinedAttr even if it's just for a part of the verification? (see: https://mlir.llvm.org/docs/DefiningDialects/Operations/#confining-attributes)
| uint32_t width = getWidth(); | ||
|
|
||
| if (!widthValue.isPowerOf2()) | ||
| if (!llvm::isPowerOf2_32(width)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems that ConfinedAttr would work here as there is IntPowerOf2 constraint.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for your suggestions. Updated.
kuhar
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for fixing this!
offsetandwidthmust be constants and there are constraints on their values. Update the operation definition to use attributes instead of operands.