Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1368,12 +1368,14 @@ def GPU_ShuffleOp : GPU_Op<

def GPU_RotateOp : GPU_Op<
"rotate", [Pure, AllTypesMatch<["value", "rotateResult"]>]>,
Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32:$offset, I32:$width)>,
Arguments<(ins AnyIntegerOrFloatOr1DVector:$value,
ConfinedAttr<I32Attr, [IntMinValue<0>]>:$offset,
ConfinedAttr<I32Attr, [IntPowerOf2]>:$width)>,
Results<(outs AnyIntegerOrFloatOr1DVector:$rotateResult, I1:$valid)> {
let summary = "Rotate values within a subgroup.";
let description = [{
The "rotate" op moves values across lanes in a subgroup (a.k.a., local
invocations) within the same subgroup. The `width` argument specifies the
invocations) within the same subgroup. The `width` attribute specifies the
number of lanes that participate in the rotation, and must be uniform across
all participating lanes. Further, the first `width` lanes of the subgroup
must be active.
Expand All @@ -1394,9 +1396,7 @@ def GPU_RotateOp : GPU_Op<
example:

```mlir
%offset = arith.constant 1 : i32
%width = arith.constant 16 : i32
%1, %2 = gpu.rotate %0, %offset, %width : f32
%1, %2 = gpu.rotate %0, 1, 16 : f32
```

For lane `k`, returns the value from lane `(k + cst1) % width`.
Expand All @@ -1406,11 +1406,6 @@ def GPU_RotateOp : GPU_Op<
$value `,` $offset `,` $width attr-dict `:` type($value)
}];

let builders = [
// Helper function that creates a rotate with constant offset/width.
OpBuilder<(ins "Value":$value, "int32_t":$offset, "int32_t":$width)>
];

let hasVerifier = 1;
}

Expand Down
20 changes: 11 additions & 9 deletions mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,25 +507,27 @@ LogicalResult GPURotateConversion::matchAndRewrite(
getTypeConverter<SPIRVTypeConverter>()->getTargetEnv();
unsigned subgroupSize =
targetEnv.getAttr().getResourceLimits().getSubgroupSize();
IntegerAttr widthAttr;
if (!matchPattern(rotateOp.getWidth(), m_Constant(&widthAttr)) ||
widthAttr.getValue().getZExtValue() > subgroupSize)
unsigned width = rotateOp.getWidth();
if (width > subgroupSize)
return rewriter.notifyMatchFailure(
rotateOp,
"rotate width is not a constant or larger than target subgroup size");
rotateOp, "rotate width is larger than target subgroup size");

Location loc = rotateOp.getLoc();
auto scope = rewriter.getAttr<spirv::ScopeAttr>(spirv::Scope::Subgroup);
Value offsetVal =
arith::ConstantOp::create(rewriter, loc, adaptor.getOffsetAttr());
Value widthVal =
arith::ConstantOp::create(rewriter, loc, adaptor.getWidthAttr());
Value rotateResult = spirv::GroupNonUniformRotateKHROp::create(
rewriter, loc, scope, adaptor.getValue(), adaptor.getOffset(),
adaptor.getWidth());
rewriter, loc, scope, adaptor.getValue(), offsetVal, widthVal);
Value validVal;
if (widthAttr.getValue().getZExtValue() == subgroupSize) {
if (width == subgroupSize) {
validVal = spirv::ConstantOp::getOne(rewriter.getI1Type(), loc, rewriter);
} else {
IntegerAttr widthAttr = adaptor.getWidthAttr();
Value laneId = gpu::LaneIdOp::create(rewriter, loc, widthAttr);
validVal = arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::ult,
laneId, adaptor.getWidth());
laneId, widthVal);
}

rewriter.replaceOp(rotateOp, {rotateResult, validVal});
Expand Down
36 changes: 4 additions & 32 deletions mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1395,40 +1395,12 @@ void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value,
// RotateOp
//===----------------------------------------------------------------------===//

void RotateOp::build(OpBuilder &builder, OperationState &result, Value value,
int32_t offset, int32_t width) {
build(builder, result, value,
arith::ConstantOp::create(builder, result.location,
builder.getI32IntegerAttr(offset)),
arith::ConstantOp::create(builder, result.location,
builder.getI32IntegerAttr(width)));
}

LogicalResult RotateOp::verify() {
auto offsetConstOp = getOffset().getDefiningOp<arith::ConstantOp>();
if (!offsetConstOp)
return emitOpError() << "offset is not a constant value";

auto offsetIntAttr =
llvm::dyn_cast<mlir::IntegerAttr>(offsetConstOp.getValue());

auto widthConstOp = getWidth().getDefiningOp<arith::ConstantOp>();
if (!widthConstOp)
return emitOpError() << "width is not a constant value";

auto widthIntAttr =
llvm::dyn_cast<mlir::IntegerAttr>(widthConstOp.getValue());

llvm::APInt offsetValue = offsetIntAttr.getValue();
llvm::APInt widthValue = widthIntAttr.getValue();

if (!widthValue.isPowerOf2())
return emitOpError() << "width must be a power of two";
uint32_t offset = getOffset();
uint32_t width = getWidth();

if (offsetValue.sge(widthValue) || offsetValue.slt(0)) {
int64_t widthValueInt = widthValue.getSExtValue();
return emitOpError() << "offset must be in the range [0, " << widthValueInt
<< ")";
if (offset >= width) {
return emitOpError() << "offset must be in the range [0, " << width << ")";
}

return success();
Expand Down
38 changes: 5 additions & 33 deletions mlir/test/Conversion/GPUToSPIRV/rotate.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,14 @@ gpu.module @kernels {
// CHECK-LABEL: spirv.func @rotate()
gpu.func @rotate() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
%offset = arith.constant 4 : i32
%width = arith.constant 16 : i32
%val = arith.constant 42.0 : f32

// CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
// CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32
// CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR <Subgroup> %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32
// CHECK: %{{.+}} = spirv.Constant true
%result, %valid = gpu.rotate %val, %offset, %width : f32
%result, %valid = gpu.rotate %val, 4, 16 : f32
gpu.return
}
}
Expand All @@ -38,18 +36,16 @@ gpu.module @kernels {
// CHECK-LABEL: spirv.func @rotate_width_less_than_subgroup_size()
gpu.func @rotate_width_less_than_subgroup_size() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
%offset = arith.constant 4 : i32
%width = arith.constant 8 : i32
%val = arith.constant 42.0 : f32

// CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
// CHECK: %[[WIDTH:.+]] = spirv.Constant 8 : i32
// CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR <Subgroup> %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32
// CHECK: %[[INVOCATION_ID_ADDR:.+]] = spirv.mlir.addressof @__builtin__SubgroupLocalInvocationId__
// CHECK: %[[INVOCATION_ID:.+]] = spirv.Load "Input" %[[INVOCATION_ID_ADDR]]
// CHECK: %{{.+}} = spirv.ULessThan %[[INVOCATION_ID]], %[[WIDTH]]
%result, %valid = gpu.rotate %val, %offset, %width : f32
%result, %valid = gpu.rotate %val, 4, 8 : f32
gpu.return
}
}
Expand All @@ -67,34 +63,10 @@ module attributes {
gpu.module @kernels {
gpu.func @rotate_with_bigger_than_subgroup_size() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
%offset = arith.constant 4 : i32
%width = arith.constant 32 : i32
%val = arith.constant 42.0 : f32

// expected-error @+1 {{failed to legalize operation 'gpu.rotate'}}
%result, %valid = gpu.rotate %val, %offset, %width : f32
gpu.return
}
}

}

// -----

module attributes {
gpu.container_module,
spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformRotateKHR], []>,
#spirv.resource_limits<subgroup_size = 16>>
} {

gpu.module @kernels {
gpu.func @rotate_non_const_width(%width: i32) kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
%offset = arith.constant 4 : i32
%val = arith.constant 42.0 : f32

// expected-error @+1 {{'gpu.rotate' op width is not a constant value}}
%result, %valid = gpu.rotate %val, %offset, %width : f32
%result, %valid = gpu.rotate %val, 4, 32 : f32
gpu.return
}
}
Expand Down
44 changes: 8 additions & 36 deletions mlir/test/Dialect/GPU/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -479,20 +479,16 @@ func.func @shuffle_unsupported_type_vec(%arg0 : vector<[4]xf32>, %arg1 : i32, %a
// -----

func.func @rotate_mismatching_type(%arg0 : f32) {
%offset = arith.constant 4 : i32
%width = arith.constant 16 : i32
// expected-error@+1 {{op failed to verify that all of {value, rotateResult} have same type}}
%rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (i32, i1)
%rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 16 : i32 } : (f32) -> (i32, i1)
return
}

// -----

func.func @rotate_unsupported_type(%arg0 : index) {
%offset = arith.constant 4 : i32
%width = arith.constant 16 : i32
// expected-error@+1 {{op operand #0 must be Integer or Float or fixed-length vector of Integer or Float values of ranks 1, but got 'index'}}
%rotate, %valid = gpu.rotate %arg0, %offset, %width : index
%rotate, %valid = gpu.rotate %arg0, 4, 16 : index
return
}

Expand All @@ -502,55 +498,31 @@ func.func @rotate_unsupported_type_vec(%arg0 : vector<[4]xf32>) {
%offset = arith.constant 4 : i32
%width = arith.constant 16 : i32
// expected-error@+1 {{op operand #0 must be Integer or Float or fixed-length vector of Integer or Float values of ranks 1, but got 'vector<[4]xf32>'}}
%rotate, %valid = gpu.rotate %arg0, %offset, %width : vector<[4]xf32>
%rotate, %valid = gpu.rotate %arg0, 4, 16 : vector<[4]xf32>
return
}

// -----

func.func @rotate_unsupported_width(%arg0 : f32) {
%offset = arith.constant 4 : i32
%width = arith.constant 15 : i32
// expected-error@+1 {{op width must be a power of two}}
%rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
// expected-error@+1 {{'gpu.rotate' op attribute 'width' failed to satisfy constraint: 32-bit signless integer attribute whose value is a power of two > 0}}
%rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 15 : i32 } : (f32) -> (f32, i1)
return
}

// -----

func.func @rotate_unsupported_offset(%arg0 : f32) {
%offset = arith.constant 16 : i32
%width = arith.constant 16 : i32
// expected-error@+1 {{op offset must be in the range [0, 16)}}
%rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
%rotate, %valid = "gpu.rotate"(%arg0) { offset = 16 : i32, width = 16 : i32 }: (f32) -> (f32, i1)
return
}

// -----

func.func @rotate_unsupported_offset_minus(%arg0 : f32) {
%offset = arith.constant -1 : i32
%width = arith.constant 16 : i32
// expected-error@+1 {{op offset must be in the range [0, 16)}}
%rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
return
}

// -----

func.func @rotate_offset_non_constant(%arg0 : f32, %offset : i32) {
%width = arith.constant 16 : i32
// expected-error@+1 {{op offset is not a constant value}}
%rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
return
}

// -----

func.func @rotate_width_non_constant(%arg0 : f32, %width : i32) {
%offset = arith.constant 0 : i32
// expected-error@+1 {{op width is not a constant value}}
%rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
// expected-error@+1 {{'gpu.rotate' op attribute 'offset' failed to satisfy constraint: 32-bit signless integer attribute whose minimum value is 0}}
%rotate, %valid = "gpu.rotate"(%arg0) { offset = -1 : i32, width = 16 : i32 } : (f32) -> (f32, i1)
return
}

Expand Down
5 changes: 2 additions & 3 deletions mlir/test/Dialect/GPU/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,8 @@ module attributes {gpu.container_module} {
// CHECK: gpu.shuffle idx %{{.*}}, %{{.*}}, %{{.*}} : f32
%shfl3, %pred3 = gpu.shuffle idx %arg0, %offset, %width : f32

// CHECK: gpu.rotate %{{.*}}, %{{.*}}, %{{.*}} : f32
%rotate_width = arith.constant 16 : i32
%rotate, %pred4 = gpu.rotate %arg0, %offset, %rotate_width : f32
// CHECK: gpu.rotate %{{.*}}, 3, 16 : f32
%rotate, %pred4 = gpu.rotate %arg0, 3, 16 : f32

"gpu.barrier"() : () -> ()

Expand Down