Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ The included licenses apply to the following files:
- Header file `dxcpix.h` was added to the release package.
- Moved Linear Algebra (Cooperative Vector) DXIL Opcodes to experimental Shader Model 6.10
- Added support for `long long` and `unsigned long long` compile-time constant evaluation, fixes [#7952](https://github.com/microsoft/DirectXShaderCompiler/issues/7952).
- Implement GetGroupWaveIndex and GetGroupWaveCount in experimental Shader Model 6.10
- [proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0048-group-wave-index.md)
- GetGroupWaveIndex: New intrinsic for Compute, Mesh, Amplification and Node shaders which returns the index of the wave within the thread group that the the thread is executing.
- GetGroupWaveCount: New intrinsic for Compute, Mesh, Amplification and Node shaders which returns the total number of waves executing within the thread group.

### Version 1.8.2505

Expand Down
2 changes: 1 addition & 1 deletion lib/DXIL/DxilOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3938,7 +3938,7 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
if ((2147483649 <= op && op <= 2147483650)) {
major = 6;
minor = 10;
mask = SFLAG(Compute) | SFLAG(Mesh) | SFLAG(Amplification) | SFLAG(Library);
mask = SFLAG(Compute) | SFLAG(Mesh) | SFLAG(Amplification) | SFLAG(Node);
return;
}
// Instructions: ClusterID=2147483651, TriangleObjectPosition=2147483655
Expand Down
4 changes: 4 additions & 0 deletions lib/DXIL/DxilShaderFlags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,10 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
if (OP::BarrierRequiresGroup(CI))
requiresGroup = true;
break;
case DXIL::OpCode::GetGroupWaveIndex:
case DXIL::OpCode::GetGroupWaveCount:
requiresGroup = true;
break;
default:
// Normal opcodes.
break;
Expand Down
6 changes: 4 additions & 2 deletions lib/HLSL/HLOperationLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7515,9 +7515,9 @@ constexpr IntrinsicLower gLowerTable[] = {

{IntrinsicOp::IOP_isnormal, TrivialIsSpecialFloat, DXIL::OpCode::IsNormal},

{IntrinsicOp::IOP_GetGroupWaveCount, EmptyLower,
{IntrinsicOp::IOP_GetGroupWaveCount, TranslateWaveToVal,
DXIL::OpCode::GetGroupWaveCount},
{IntrinsicOp::IOP_GetGroupWaveIndex, EmptyLower,
{IntrinsicOp::IOP_GetGroupWaveIndex, TranslateWaveToVal,
DXIL::OpCode::GetGroupWaveIndex},

{IntrinsicOp::IOP_ClusterID, EmptyLower, DXIL::OpCode::ClusterID},
Expand Down Expand Up @@ -7616,6 +7616,8 @@ static void TranslateBuiltinIntrinsic(CallInst *CI,
bool &Translated) {
unsigned opcode = hlsl::GetHLOpcode(CI);
const IntrinsicLower &lower = gLowerTable[opcode];
DXASSERT((unsigned)lower.IntriOpcode == opcode,
"Intrinsic lowering table index must match intrinsic opcode.");
Value *Result = lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode,
helper, pObjHelper, Translated);
if (Result)
Expand Down
2 changes: 2 additions & 0 deletions tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4239,6 +4239,8 @@ SpirvVariable *DeclResultIdMapper::getBuiltinVar(spv::BuiltIn builtIn,
case spv::BuiltIn::LocalInvocationIndex:
case spv::BuiltIn::RemainingRecursionLevelsAMDX:
case spv::BuiltIn::ShaderIndexAMDX:
case spv::BuiltIn::SubgroupId:
case spv::BuiltIn::NumSubgroups:
sc = spv::StorageClass::Input;
break;
case spv::BuiltIn::TaskCountNV:
Expand Down
16 changes: 16 additions & 0 deletions tools/clang/lib/SPIRV/SpirvEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9469,6 +9469,22 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
case hlsl::IntrinsicOp::IOP_WaveActiveCountBits:
retVal = processWaveCountBits(callExpr, spv::GroupOperation::Reduce);
break;
case hlsl::IntrinsicOp::IOP_GetGroupWaveIndex: {
featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_1, "GetGroupWaveIndex",
srcLoc);
const QualType retType = callExpr->getCallReturnType(astContext);
auto *var =
declIdMapper.getBuiltinVar(spv::BuiltIn::SubgroupId, retType, srcLoc);
retVal = spvBuilder.createLoad(retType, var, srcLoc, srcRange);
} break;
case hlsl::IntrinsicOp::IOP_GetGroupWaveCount: {
featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_1, "GetGroupWaveCount",
srcLoc);
const QualType retType = callExpr->getCallReturnType(astContext);
auto *var =
declIdMapper.getBuiltinVar(spv::BuiltIn::NumSubgroups, retType, srcLoc);
retVal = spvBuilder.createLoad(retType, var, srcLoc, srcRange);
} break;
case hlsl::IntrinsicOp::IOP_WaveActiveUSum:
case hlsl::IntrinsicOp::IOP_WaveActiveSum:
case hlsl::IntrinsicOp::IOP_WaveActiveUProduct:
Expand Down
20 changes: 20 additions & 0 deletions tools/clang/test/CodeGenSPIRV/sm6_10.group-wave-count.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// RUN: %dxc -T cs_6_10 -E main -fspv-target-env=vulkan1.1 -fcgl %s -spirv | FileCheck %s

// CHECK: ; Version: 1.3

RWStructuredBuffer<uint> output: register(u0);

// CHECK: OpCapability GroupNonUniform

// CHECK: OpEntryPoint GLCompute
// CHECK-SAME: %NumSubgroups

// CHECK: OpDecorate %NumSubgroups BuiltIn NumSubgroups

// CHECK: %NumSubgroups = OpVariable %_ptr_Input_uint Input

[numthreads(64, 1, 1)]
void main(uint3 id: SV_DispatchThreadID) {
// CHECK: OpLoad %uint %NumSubgroups
output[id.x] = GetGroupWaveCount();
}
20 changes: 20 additions & 0 deletions tools/clang/test/CodeGenSPIRV/sm6_10.group-wave-index.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// RUN: %dxc -T cs_6_10 -E main -fspv-target-env=vulkan1.1 -fcgl %s -spirv | FileCheck %s

// CHECK: ; Version: 1.3

RWStructuredBuffer<uint> output: register(u0);

// CHECK: OpCapability GroupNonUniform

// CHECK: OpEntryPoint GLCompute
// CHECK-SAME: %SubgroupId

// CHECK: OpDecorate %SubgroupId BuiltIn SubgroupId

// CHECK: %SubgroupId = OpVariable %_ptr_Input_uint Input

[numthreads(64, 1, 1)]
void main(uint3 id: SV_DispatchThreadID) {
// CHECK: OpLoad %uint %SubgroupId
output[id.x] = GetGroupWaveIndex();
}
92 changes: 92 additions & 0 deletions tools/clang/test/DXC/Passes/DxilGen/group-wave-index.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s

; CHECK: call i32 @dx.op.getGroupWaveIndex(i32 -2147483647)
; CHECK: call i32 @dx.op.getGroupWaveCount(i32 -2147483646)

; Generated from:
; utils/hct/ExtractIRForPassTest.py -p dxilgen -o tools/clang/test/DXC/Passes/DxilGen/group-wave-index.ll tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/wave/group-wave-index.hlsl -- -T cs_6_10 -E main
; Debug info manually stripped.

target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-ms-dx"

%"class.RWStructuredBuffer<unsigned int>" = type { i32 }
%dx.types.Handle = type { i8* }
%dx.types.ResourceProperties = type { i32, i32 }

@"\01?output0@@3V?$RWStructuredBuffer@I@@A" = external global %"class.RWStructuredBuffer<unsigned int>", align 4

; Function Attrs: nounwind
define void @main(<3 x i32> %id) #0 {
entry:
%0 = call i32 @"dx.hl.op.rn.i32 (i32)"(i32 396)
%1 = call i32 @"dx.hl.op.rn.i32 (i32)"(i32 395)
%2 = load %"class.RWStructuredBuffer<unsigned int>", %"class.RWStructuredBuffer<unsigned int>"* @"\01?output0@@3V?$RWStructuredBuffer@I@@A"
%3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<unsigned int>\22)"(i32 0, %"class.RWStructuredBuffer<unsigned int>" %2)
%4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<unsigned int>\22)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 4108, i32 4 }, %"class.RWStructuredBuffer<unsigned int>" zeroinitializer)
%5 = call i32* @"dx.hl.subscript.[].rn.i32* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %4, i32 0)
store i32 %0, i32* %5
%6 = load %"class.RWStructuredBuffer<unsigned int>", %"class.RWStructuredBuffer<unsigned int>"* @"\01?output0@@3V?$RWStructuredBuffer@I@@A"
%7 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<unsigned int>\22)"(i32 0, %"class.RWStructuredBuffer<unsigned int>" %6)
%8 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<unsigned int>\22)"(i32 14, %dx.types.Handle %7, %dx.types.ResourceProperties { i32 4108, i32 4 }, %"class.RWStructuredBuffer<unsigned int>" zeroinitializer)
%9 = call i32* @"dx.hl.subscript.[].rn.i32* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %8, i32 16)
store i32 %1, i32* %9
ret void
}

; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #0

; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #0

; Function Attrs: nounwind readnone
declare i32 @"dx.hl.op.rn.i32 (i32)"(i32) #1

; Function Attrs: nounwind readnone
declare i32* @"dx.hl.subscript.[].rn.i32* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1

; Function Attrs: nounwind readnone
declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<unsigned int>\22)"(i32, %"class.RWStructuredBuffer<unsigned int>") #1

; Function Attrs: nounwind readnone
declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<unsigned int>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<unsigned int>") #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

!pauseresume = !{!1}
!llvm.ident = !{!2}
!dx.version = !{!3}
!dx.valver = !{!3}
!dx.shaderModel = !{!4}
!dx.typeAnnotations = !{!5, !11}
!dx.entryPoints = !{!18}
!dx.fnprops = !{!23}
!dx.options = !{!24, !25}

!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
!2 = !{!"dxc(private) 1.8.0.5134 (Group-Wave-Intrinsics, 84e7262d3)"}
!3 = !{i32 1, i32 10}
!4 = !{!"cs", i32 6, i32 10}
!5 = !{i32 0, %"class.RWStructuredBuffer<unsigned int>" undef, !6}
!6 = !{i32 4, !7, !8}
!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5}
!8 = !{i32 0, !9}
!9 = !{!10}
!10 = !{i32 0, i32 undef}
!11 = !{i32 1, void (<3 x i32>)* @main, !12}
!12 = !{!13, !15}
!13 = !{i32 1, !14, !14}
!14 = !{}
!15 = !{i32 0, !16, !17}
!16 = !{i32 4, !"SV_DispatchThreadID", i32 7, i32 5, i32 13, i32 3}
!17 = !{i32 0}
!18 = !{void (<3 x i32>)* @main, !"main", null, !19, null}
!19 = !{null, !20, null, null}
!20 = !{!21}
!21 = !{i32 0, %"class.RWStructuredBuffer<unsigned int>"* @"\01?output0@@3V?$RWStructuredBuffer@I@@A", !"output0", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !22}
!22 = !{i32 1, i32 4}
!23 = !{void (<3 x i32>)* @main, i32 5, i32 1, i32 1, i32 1}
!24 = !{i32 64}
!25 = !{i32 -1}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// REQUIRES: dxil-1-10

// RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=FCGL
// RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s

// FCGL: call i32 @"dx.hl.op.rn.i32 (i32)"(i32 396)
// FCGL: call i32 @"dx.hl.op.rn.i32 (i32)"(i32 395)

// CHECK: %[[Index:[^ ]+]] = call i32 @dx.op.getGroupWaveIndex(i32 -2147483647) ; GetGroupWaveIndex()
// CHECK: %[[Count:[^ ]+]] = call i32 @dx.op.getGroupWaveCount(i32 -2147483646) ; GetGroupWaveCount()
// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %{{[^,]+}}, i32 0, i32 0, i32 %[[Index]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %{{[^,]+}}, i32 16, i32 0, i32 %[[Count]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)

RWStructuredBuffer<uint> output0 : register(u0);

[numthreads(1, 1, 1)]
void main(uint3 id: SV_DispatchThreadID) {
uint waveIdx = GetGroupWaveIndex();
uint waveCount = GetGroupWaveCount();

output0[0] = waveIdx;
output0[16] = waveCount;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// REQUIRES: dxil-1-10

// RUN: not %dxc -T lib_6_10 %s 2>&1 | FileCheck %s

// CHECK-DAG: error: Function requires a visible group, but is called from a shader without one.

struct InputRecord {
uint value;
};

struct OutputRecord {
uint value;
};

RWStructuredBuffer<uint> output : register(u0);

// Thread launch - no thread group, should FAIL
[Shader("node")]
[NodeLaunch("thread")]
void ThreadNode(
RWThreadNodeInputRecord<InputRecord> inputData,
[MaxRecords(1)] NodeOutput<OutputRecord> outputData) {
uint waveIdx = GetGroupWaveIndex();
uint waveCount = GetGroupWaveCount();
ThreadNodeOutputRecords<OutputRecord> outRec = outputData.GetThreadNodeOutputRecords(1);
outRec.Get().value = waveIdx + waveCount;
outRec.OutputComplete();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// REQUIRES: dxil-1-10

// RUN: not %dxc -T lib_6_10 %s 2>&1 | FileCheck %s

// CHECK-DAG: error: Opcode GetGroupWaveCount not valid in shader model lib_6_10(callable).
// CHECK-DAG: error: Opcode GetGroupWaveCount not valid in shader model lib_6_10(intersection).
// CHECK-DAG: error: Opcode GetGroupWaveCount not valid in shader model lib_6_10(anyhit).
// CHECK-DAG: error: Opcode GetGroupWaveCount not valid in shader model lib_6_10(miss).
// CHECK-DAG: error: Opcode GetGroupWaveCount not valid in shader model lib_6_10(closesthit).
// CHECK-DAG: error: Opcode GetGroupWaveCount not valid in shader model lib_6_10(raygeneration).
// CHECK-DAG: error: Opcode GetGroupWaveIndex not valid in shader model lib_6_10(callable).
// CHECK-DAG: error: Opcode GetGroupWaveIndex not valid in shader model lib_6_10(intersection).
// CHECK-DAG: error: Opcode GetGroupWaveIndex not valid in shader model lib_6_10(anyhit).
// CHECK-DAG: error: Opcode GetGroupWaveIndex not valid in shader model lib_6_10(miss).
// CHECK-DAG: error: Opcode GetGroupWaveIndex not valid in shader model lib_6_10(closesthit).
// CHECK-DAG: error: Opcode GetGroupWaveIndex not valid in shader model lib_6_10(raygeneration).

struct [raypayload] Payload {
float value : write(closesthit, miss, anyhit, caller) : read(caller);
};
struct Attributes {
float2 barycentrics;
};

RWStructuredBuffer<uint> output : register(u0);

[shader("raygeneration")]
void RayGenMain() {
uint waveIdx = GetGroupWaveIndex();
uint waveCount = GetGroupWaveCount();
output[0] = waveIdx + waveCount;
}

[shader("closesthit")]
void ClosestHitMain(inout Payload payload, in Attributes attribs) {
uint waveIdx = GetGroupWaveIndex();
uint waveCount = GetGroupWaveCount();
payload.value = waveIdx + waveCount;
}

[shader("miss")]
void MissMain(inout Payload payload) {
uint waveIdx = GetGroupWaveIndex();
uint waveCount = GetGroupWaveCount();
payload.value = waveIdx + waveCount;
}

[shader("anyhit")]
void AnyHitMain(inout Payload payload, in Attributes attribs) {
uint waveIdx = GetGroupWaveIndex();
uint waveCount = GetGroupWaveCount();
payload.value = waveIdx + waveCount;
}

[shader("intersection")]
void IntersectionMain() {
uint waveIdx = GetGroupWaveIndex();
uint waveCount = GetGroupWaveCount();
output[0] = waveIdx + waveCount;
}

[shader("callable")]
void CallableMain(inout Payload payload) {
uint waveIdx = GetGroupWaveIndex();
uint waveCount = GetGroupWaveCount();
payload.value = waveIdx + waveCount;
}
Loading