Skip to content

Commit f695c8d

Browse files
authored
[DirectX][SPIRV] Fix the lowering of dot4add (#140315)
There were some issues with these ops: - The overload wasn't being specified (`dx.op.dot4AddPacked` vs `dx.op.dot4AddPacked.i32`) - The versioning wasn't correct (These ops were added in SM 6.4) - The argument order was off - while the HLSL function has the accumulator as the last argument, the DXIL op lists it first. This fixes the DXIL.td definition and adjusts the LLVM DX and SPIRV intrinsics to match the argument order in DXIL rather than the argument order in HLSL. Fixes #139018
1 parent 2f05451 commit f695c8d

File tree

11 files changed

+140
-95
lines changed

11 files changed

+140
-95
lines changed

clang/lib/CodeGen/CGHLSLBuiltins.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -412,24 +412,28 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
412412
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
413413
}
414414
case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
415-
Value *A = EmitScalarExpr(E->getArg(0));
416-
Value *B = EmitScalarExpr(E->getArg(1));
417-
Value *C = EmitScalarExpr(E->getArg(2));
415+
Value *X = EmitScalarExpr(E->getArg(0));
416+
Value *Y = EmitScalarExpr(E->getArg(1));
417+
Value *Acc = EmitScalarExpr(E->getArg(2));
418418

419419
Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
420+
// Note that the argument order disagrees between the builtin and the
421+
// intrinsic here.
420422
return Builder.CreateIntrinsic(
421-
/*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
422-
"hlsl.dot4add.i8packed");
423+
/*ReturnType=*/Acc->getType(), ID, ArrayRef<Value *>{Acc, X, Y},
424+
nullptr, "hlsl.dot4add.i8packed");
423425
}
424426
case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
425-
Value *A = EmitScalarExpr(E->getArg(0));
426-
Value *B = EmitScalarExpr(E->getArg(1));
427-
Value *C = EmitScalarExpr(E->getArg(2));
427+
Value *X = EmitScalarExpr(E->getArg(0));
428+
Value *Y = EmitScalarExpr(E->getArg(1));
429+
Value *Acc = EmitScalarExpr(E->getArg(2));
428430

429431
Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
432+
// Note that the argument order disagrees between the builtin and the
433+
// intrinsic here.
430434
return Builder.CreateIntrinsic(
431-
/*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
432-
"hlsl.dot4add.u8packed");
435+
/*ReturnType=*/Acc->getType(), ID, ArrayRef<Value *>{Acc, X, Y},
436+
nullptr, "hlsl.dot4add.u8packed");
433437
}
434438
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
435439
Value *X = EmitScalarExpr(E->getArg(0));
Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
1-
// RUN: %clang_cc1 -finclude-default-header -triple \
2-
// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
3-
// RUN: FileCheck %s -DTARGET=dx
4-
// RUN: %clang_cc1 -finclude-default-header -triple \
5-
// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
6-
// RUN: FileCheck %s -DTARGET=spv
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.4-compute %s -emit-llvm -o - | FileCheck %s -DTARGET=dx
2+
// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-compute %s -emit-llvm -o - | FileCheck %s -DTARGET=spv
73

84
// Test basic lowering to runtime function call.
95

106
// CHECK-LABEL: test
11-
int test(uint a, uint b, int c) {
12-
// CHECK: %[[RET:.*]] = call [[TY:i32]] @llvm.[[TARGET]].dot4add.i8packed([[TY]] %[[#]], [[TY]] %[[#]], [[TY]] %[[#]])
13-
// CHECK: ret [[TY]] %[[RET]]
14-
return dot4add_i8packed(a, b, c);
7+
int test(uint x, uint y, int acc) {
8+
// CHECK: [[X_ADDR:%.*]] = alloca i32, align 4
9+
// CHECK: [[Y_ADDR:%.*]] = alloca i32, align 4
10+
// CHECK: [[ACC_ADDR:%.*]] = alloca i32, align 4
11+
// CHECK: store i32 %x, ptr [[X_ADDR]], align 4
12+
// CHECK: store i32 %y, ptr [[Y_ADDR]], align 4
13+
// CHECK: store i32 %acc, ptr [[ACC_ADDR]], align 4
14+
// CHECK: [[X0:%.*]] = load i32, ptr [[X_ADDR]], align 4
15+
// CHECK: [[Y0:%.*]] = load i32, ptr [[Y_ADDR]], align 4
16+
// CHECK: [[ACC0:%.*]] = load i32, ptr [[ACC_ADDR]], align 4
17+
// CHECK: call i32 @llvm.[[TARGET]].dot4add.i8packed(i32 [[ACC0]], i32 [[X0]], i32 [[Y0]])
18+
return dot4add_i8packed(x, y, acc);
1519
}
1620

17-
// CHECK: declare [[TY]] @llvm.[[TARGET]].dot4add.i8packed([[TY]], [[TY]], [[TY]])
21+
[numthreads(1,1,1)]
22+
void main() {
23+
test(0, 0, 0);
24+
}
Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,24 @@
1-
2-
// RUN: %clang_cc1 -finclude-default-header -triple \
3-
// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
4-
// RUN: FileCheck %s -DTARGET=dx
5-
// RUN: %clang_cc1 -finclude-default-header -triple \
6-
// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
7-
// RUN: FileCheck %s -DTARGET=spv
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.4-compute %s -emit-llvm -o - | FileCheck %s -DTARGET=dx
2+
// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-compute %s -emit-llvm -o - | FileCheck %s -DTARGET=spv
83

94
// Test basic lowering to runtime function call.
105

11-
// CHECK-LABEL: define {{.*}}test
12-
uint test(uint a, uint b, uint c) {
13-
// CHECK: %[[RET:.*]] = call [[TY:i32]] @llvm.[[TARGET]].dot4add.u8packed([[TY]] %[[#]], [[TY]] %[[#]], [[TY]] %[[#]])
14-
// CHECK: ret [[TY]] %[[RET]]
15-
return dot4add_u8packed(a, b, c);
6+
// CHECK-LABEL: test
7+
int test(uint x, uint y, int acc) {
8+
// CHECK: [[X_ADDR:%.*]] = alloca i32, align 4
9+
// CHECK: [[Y_ADDR:%.*]] = alloca i32, align 4
10+
// CHECK: [[ACC_ADDR:%.*]] = alloca i32, align 4
11+
// CHECK: store i32 %x, ptr [[X_ADDR]], align 4
12+
// CHECK: store i32 %y, ptr [[Y_ADDR]], align 4
13+
// CHECK: store i32 %acc, ptr [[ACC_ADDR]], align 4
14+
// CHECK: [[X0:%.*]] = load i32, ptr [[X_ADDR]], align 4
15+
// CHECK: [[Y0:%.*]] = load i32, ptr [[Y_ADDR]], align 4
16+
// CHECK: [[ACC0:%.*]] = load i32, ptr [[ACC_ADDR]], align 4
17+
// CHECK: call i32 @llvm.[[TARGET]].dot4add.u8packed(i32 [[ACC0]], i32 [[X0]], i32 [[Y0]])
18+
return dot4add_u8packed(x, y, acc);
1619
}
1720

18-
// CHECK: declare [[TY]] @llvm.[[TARGET]].dot4add.u8packed([[TY]], [[TY]], [[TY]])
21+
[numthreads(1,1,1)]
22+
void main() {
23+
test(0, 0, 0);
24+
}

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,19 +1119,21 @@ def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
11191119
"accumulate to i32";
11201120
let intrinsics = [IntrinSelect<int_dx_dot4add_i8packed>];
11211121
let arguments = [Int32Ty, Int32Ty, Int32Ty];
1122-
let result = Int32Ty;
1123-
let stages = [Stages<DXIL1_0, [all_stages]>];
1124-
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
1122+
let result = OverloadTy;
1123+
let overloads = [Overloads<DXIL1_4, [Int32Ty]>];
1124+
let stages = [Stages<DXIL1_4, [all_stages]>];
1125+
let attributes = [Attributes<DXIL1_4, [ReadNone]>];
11251126
}
11261127

11271128
def Dot4AddU8Packed : DXILOp<164, dot4AddPacked> {
11281129
let Doc = "unsigned dot product of 4 x i8 vectors packed into i32, with "
11291130
"accumulate to i32";
11301131
let intrinsics = [IntrinSelect<int_dx_dot4add_u8packed>];
11311132
let arguments = [Int32Ty, Int32Ty, Int32Ty];
1132-
let result = Int32Ty;
1133-
let stages = [Stages<DXIL1_0, [all_stages]>];
1134-
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
1133+
let result = OverloadTy;
1134+
let overloads = [Overloads<DXIL1_4, [Int32Ty]>];
1135+
let stages = [Stages<DXIL1_4, [all_stages]>];
1136+
let attributes = [Attributes<DXIL1_4, [ReadNone]>];
11351137
}
11361138

11371139
def AnnotateHandle : DXILOp<216, annotateHandle> {

llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2021,20 +2021,24 @@ bool SPIRVInstructionSelector::selectDot4AddPacked(Register ResVReg,
20212021
assert(I.getOperand(4).isReg());
20222022
MachineBasicBlock &BB = *I.getParent();
20232023

2024+
Register Acc = I.getOperand(2).getReg();
2025+
Register X = I.getOperand(3).getReg();
2026+
Register Y = I.getOperand(4).getReg();
2027+
20242028
auto DotOp = Signed ? SPIRV::OpSDot : SPIRV::OpUDot;
20252029
Register Dot = MRI->createVirtualRegister(GR.getRegClass(ResType));
20262030
bool Result = BuildMI(BB, I, I.getDebugLoc(), TII.get(DotOp))
20272031
.addDef(Dot)
20282032
.addUse(GR.getSPIRVTypeID(ResType))
2029-
.addUse(I.getOperand(2).getReg())
2030-
.addUse(I.getOperand(3).getReg())
2033+
.addUse(X)
2034+
.addUse(Y)
20312035
.constrainAllUses(TII, TRI, RBI);
20322036

20332037
return Result && BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpIAddS))
20342038
.addDef(ResVReg)
20352039
.addUse(GR.getSPIRVTypeID(ResType))
20362040
.addUse(Dot)
2037-
.addUse(I.getOperand(4).getReg())
2041+
.addUse(Acc)
20382042
.constrainAllUses(TII, TRI, RBI);
20392043
}
20402044

@@ -2052,8 +2056,10 @@ bool SPIRVInstructionSelector::selectDot4AddPackedExpansion(
20522056

20532057
bool Result = true;
20542058

2055-
// Acc = C
2056-
Register Acc = I.getOperand(4).getReg();
2059+
Register Acc = I.getOperand(2).getReg();
2060+
Register X = I.getOperand(3).getReg();
2061+
Register Y = I.getOperand(4).getReg();
2062+
20572063
SPIRVType *EltType = GR.getOrCreateSPIRVIntegerType(8, I, TII);
20582064
auto ExtractOp =
20592065
Signed ? SPIRV::OpBitFieldSExtract : SPIRV::OpBitFieldUExtract;
@@ -2067,7 +2073,7 @@ bool SPIRVInstructionSelector::selectDot4AddPackedExpansion(
20672073
BuildMI(BB, I, I.getDebugLoc(), TII.get(ExtractOp))
20682074
.addDef(AElt)
20692075
.addUse(GR.getSPIRVTypeID(ResType))
2070-
.addUse(I.getOperand(2).getReg())
2076+
.addUse(X)
20712077
.addUse(GR.getOrCreateConstInt(i * 8, I, EltType, TII, ZeroAsNull))
20722078
.addUse(GR.getOrCreateConstInt(8, I, EltType, TII, ZeroAsNull))
20732079
.constrainAllUses(TII, TRI, RBI);
@@ -2078,7 +2084,7 @@ bool SPIRVInstructionSelector::selectDot4AddPackedExpansion(
20782084
BuildMI(BB, I, I.getDebugLoc(), TII.get(ExtractOp))
20792085
.addDef(BElt)
20802086
.addUse(GR.getSPIRVTypeID(ResType))
2081-
.addUse(I.getOperand(3).getReg())
2087+
.addUse(Y)
20822088
.addUse(GR.getOrCreateConstInt(i * 8, I, EltType, TII, ZeroAsNull))
20832089
.addUse(GR.getOrCreateConstInt(8, I, EltType, TII, ZeroAsNull))
20842090
.constrainAllUses(TII, TRI, RBI);

llvm/test/CodeGen/DirectX/dot4add_i8packed.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
1+
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.4-compute %s | FileCheck %s
22

3-
define void @main(i32 %a, i32 %b, i32 %c) {
3+
define void @main(i32 %acc, i32 %x, i32 %y) {
44
entry:
5-
; CHECK: call i32 @dx.op.dot4AddPacked(i32 163, i32 %a, i32 %b, i32 %c) #[[#ATTR:]]
6-
%0 = call i32 @llvm.dx.dot4add.i8packed(i32 %a, i32 %b, i32 %c)
5+
; CHECK: call i32 @dx.op.dot4AddPacked.i32(i32 163, i32 %acc, i32 %x, i32 %y) #[[#ATTR:]]
6+
%0 = call i32 @llvm.dx.dot4add.i8packed(i32 %acc, i32 %x, i32 %y)
77
ret void
88
}
99

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s 2>&1 | FileCheck %s
2+
3+
; CHECK: in function f
4+
; CHECK-SAME: Cannot create Dot4AddI8Packed operation: No valid overloads for DXIL version 1.3
5+
6+
define void @f(i32 %acc, i32 %x, i32 %y) {
7+
entry:
8+
%0 = call i32 @llvm.dx.dot4add.i8packed(i32 %acc, i32 %x, i32 %y)
9+
ret void
10+
}

llvm/test/CodeGen/DirectX/dot4add_u8packed.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
1+
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.4-compute %s | FileCheck %s
22

3-
define void @main(i32 %a, i32 %b, i32 %c) {
3+
define void @main(i32 %acc, i32 %x, i32 %y) {
44
entry:
5-
; CHECK: call i32 @dx.op.dot4AddPacked(i32 164, i32 %a, i32 %b, i32 %c) #[[#ATTR:]]
6-
%0 = call i32 @llvm.dx.dot4add.u8packed(i32 %a, i32 %b, i32 %c)
5+
; CHECK: call i32 @dx.op.dot4AddPacked.i32(i32 164, i32 %acc, i32 %x, i32 %y) #[[#ATTR:]]
6+
%0 = call i32 @llvm.dx.dot4add.u8packed(i32 %acc, i32 %x, i32 %y)
77
ret void
88
}
99

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s 2>&1 | FileCheck %s
2+
3+
; CHECK: in function f
4+
; CHECK-SAME: Cannot create Dot4AddU8Packed operation: No valid overloads for DXIL version 1.3
5+
6+
define void @f(i32 %acc, i32 %x, i32 %y) {
7+
entry:
8+
%0 = call i32 @llvm.dx.dot4add.u8packed(i32 %acc, i32 %x, i32 %y)
9+
ret void
10+
}

llvm/test/CodeGen/SPIRV/hlsl-intrinsics/dot4add_i8packed.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,49 +17,49 @@
1717
; CHECK-EXP-DAG: %[[#twentyfour:]] = OpConstant %[[#int_8]] 24
1818

1919
; CHECK-LABEL: Begin function test_dot
20-
define noundef i32 @test_dot(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
20+
define noundef i32 @test_dot(i32 noundef %acc, i32 noundef %x, i32 noundef %y) {
2121
entry:
22-
; CHECK: %[[#A:]] = OpFunctionParameter %[[#int_32]]
23-
; CHECK: %[[#B:]] = OpFunctionParameter %[[#int_32]]
24-
; CHECK: %[[#C:]] = OpFunctionParameter %[[#int_32]]
22+
; CHECK: %[[#ACC:]] = OpFunctionParameter %[[#int_32]]
23+
; CHECK: %[[#X:]] = OpFunctionParameter %[[#int_32]]
24+
; CHECK: %[[#Y:]] = OpFunctionParameter %[[#int_32]]
2525

2626
; Test that we use the dot product op when capabilities allow
2727

28-
; CHECK-DOT: %[[#DOT:]] = OpSDot %[[#int_32]] %[[#A]] %[[#B]]
29-
; CHECK-DOT: %[[#RES:]] = OpIAdd %[[#int_32]] %[[#DOT]] %[[#C]]
28+
; CHECK-DOT: %[[#DOT:]] = OpSDot %[[#int_32]] %[[#X]] %[[#Y]]
29+
; CHECK-DOT: %[[#RES:]] = OpIAdd %[[#int_32]] %[[#DOT]] %[[#ACC]]
3030

3131
; Test expansion is used when spirv dot product capabilities aren't available:
3232

3333
; First element of the packed vector
34-
; CHECK-EXP: %[[#A0:]] = OpBitFieldSExtract %[[#int_32]] %[[#A]] %[[#zero]] %[[#eight]]
35-
; CHECK-EXP: %[[#B0:]] = OpBitFieldSExtract %[[#int_32]] %[[#B]] %[[#zero]] %[[#eight]]
36-
; CHECK-EXP: %[[#MUL0:]] = OpIMul %[[#int_32]] %[[#A0]] %[[#B0]]
34+
; CHECK-EXP: %[[#X0:]] = OpBitFieldSExtract %[[#int_32]] %[[#X]] %[[#zero]] %[[#eight]]
35+
; CHECK-EXP: %[[#Y0:]] = OpBitFieldSExtract %[[#int_32]] %[[#Y]] %[[#zero]] %[[#eight]]
36+
; CHECK-EXP: %[[#MUL0:]] = OpIMul %[[#int_32]] %[[#X0]] %[[#Y0]]
3737
; CHECK-EXP: %[[#MASK0:]] = OpBitFieldSExtract %[[#int_32]] %[[#MUL0]] %[[#zero]] %[[#eight]]
38-
; CHECK-EXP: %[[#ACC0:]] = OpIAdd %[[#int_32]] %[[#C]] %[[#MASK0]]
38+
; CHECK-EXP: %[[#ACC0:]] = OpIAdd %[[#int_32]] %[[#ACC]] %[[#MASK0]]
3939

4040
; Second element of the packed vector
41-
; CHECK-EXP: %[[#A1:]] = OpBitFieldSExtract %[[#int_32]] %[[#A]] %[[#eight]] %[[#eight]]
42-
; CHECK-EXP: %[[#B1:]] = OpBitFieldSExtract %[[#int_32]] %[[#B]] %[[#eight]] %[[#eight]]
43-
; CHECK-EXP: %[[#MUL1:]] = OpIMul %[[#int_32]] %[[#A1]] %[[#B1]]
41+
; CHECK-EXP: %[[#X1:]] = OpBitFieldSExtract %[[#int_32]] %[[#X]] %[[#eight]] %[[#eight]]
42+
; CHECK-EXP: %[[#Y1:]] = OpBitFieldSExtract %[[#int_32]] %[[#Y]] %[[#eight]] %[[#eight]]
43+
; CHECK-EXP: %[[#MUL1:]] = OpIMul %[[#int_32]] %[[#X1]] %[[#Y1]]
4444
; CHECK-EXP: %[[#MASK1:]] = OpBitFieldSExtract %[[#int_32]] %[[#MUL1]] %[[#zero]] %[[#eight]]
4545
; CHECK-EXP: %[[#ACC1:]] = OpIAdd %[[#int_32]] %[[#ACC0]] %[[#MASK1]]
4646

4747
; Third element of the packed vector
48-
; CHECK-EXP: %[[#A2:]] = OpBitFieldSExtract %[[#int_32]] %[[#A]] %[[#sixteen]] %[[#eight]]
49-
; CHECK-EXP: %[[#B2:]] = OpBitFieldSExtract %[[#int_32]] %[[#B]] %[[#sixteen]] %[[#eight]]
50-
; CHECK-EXP: %[[#MUL2:]] = OpIMul %[[#int_32]] %[[#A2]] %[[#B2]]
48+
; CHECK-EXP: %[[#X2:]] = OpBitFieldSExtract %[[#int_32]] %[[#X]] %[[#sixteen]] %[[#eight]]
49+
; CHECK-EXP: %[[#Y2:]] = OpBitFieldSExtract %[[#int_32]] %[[#Y]] %[[#sixteen]] %[[#eight]]
50+
; CHECK-EXP: %[[#MUL2:]] = OpIMul %[[#int_32]] %[[#X2]] %[[#Y2]]
5151
; CHECK-EXP: %[[#MASK2:]] = OpBitFieldSExtract %[[#int_32]] %[[#MUL2]] %[[#zero]] %[[#eight]]
5252
; CHECK-EXP: %[[#ACC2:]] = OpIAdd %[[#int_32]] %[[#ACC1]] %[[#MASK2]]
5353

5454
; Fourth element of the packed vector
55-
; CHECK-EXP: %[[#A3:]] = OpBitFieldSExtract %[[#int_32]] %[[#A]] %[[#twentyfour]] %[[#eight]]
56-
; CHECK-EXP: %[[#B3:]] = OpBitFieldSExtract %[[#int_32]] %[[#B]] %[[#twentyfour]] %[[#eight]]
57-
; CHECK-EXP: %[[#MUL3:]] = OpIMul %[[#int_32]] %[[#A3]] %[[#B3]]
55+
; CHECK-EXP: %[[#X3:]] = OpBitFieldSExtract %[[#int_32]] %[[#X]] %[[#twentyfour]] %[[#eight]]
56+
; CHECK-EXP: %[[#Y3:]] = OpBitFieldSExtract %[[#int_32]] %[[#Y]] %[[#twentyfour]] %[[#eight]]
57+
; CHECK-EXP: %[[#MUL3:]] = OpIMul %[[#int_32]] %[[#X3]] %[[#Y3]]
5858
; CHECK-EXP: %[[#MASK3:]] = OpBitFieldSExtract %[[#int_32]] %[[#MUL3]] %[[#zero]] %[[#eight]]
5959

6060
; CHECK-EXP: %[[#RES:]] = OpIAdd %[[#int_32]] %[[#ACC2]] %[[#MASK3]]
6161
; CHECK: OpReturnValue %[[#RES]]
62-
%spv.dot = call i32 @llvm.spv.dot4add.i8packed(i32 %a, i32 %b, i32 %c)
62+
%spv.dot = call i32 @llvm.spv.dot4add.i8packed(i32 %acc, i32 %x, i32 %y)
6363

6464
ret i32 %spv.dot
6565
}

llvm/test/CodeGen/SPIRV/hlsl-intrinsics/dot4add_u8packed.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,49 +17,49 @@
1717
; CHECK-EXP-DAG: %[[#twentyfour:]] = OpConstant %[[#int_8]] 24
1818

1919
; CHECK-LABEL: Begin function test_dot
20-
define noundef i32 @test_dot(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
20+
define noundef i32 @test_dot(i32 noundef %acc, i32 noundef %x, i32 noundef %y) {
2121
entry:
22-
; CHECK: %[[#A:]] = OpFunctionParameter %[[#int_32]]
23-
; CHECK: %[[#B:]] = OpFunctionParameter %[[#int_32]]
24-
; CHECK: %[[#C:]] = OpFunctionParameter %[[#int_32]]
22+
; CHECK: %[[#ACC:]] = OpFunctionParameter %[[#int_32]]
23+
; CHECK: %[[#X:]] = OpFunctionParameter %[[#int_32]]
24+
; CHECK: %[[#Y:]] = OpFunctionParameter %[[#int_32]]
2525

2626
; Test that we use the dot product op when capabilities allow
2727

28-
; CHECK-DOT: %[[#DOT:]] = OpUDot %[[#int_32]] %[[#A]] %[[#B]]
29-
; CHECK-DOT: %[[#RES:]] = OpIAdd %[[#int_32]] %[[#DOT]] %[[#C]]
28+
; CHECK-DOT: %[[#DOT:]] = OpUDot %[[#int_32]] %[[#X]] %[[#Y]]
29+
; CHECK-DOT: %[[#RES:]] = OpIAdd %[[#int_32]] %[[#DOT]] %[[#ACC]]
3030

3131
; Test expansion is used when spirv dot product capabilities aren't available:
3232

3333
; First element of the packed vector
34-
; CHECK-EXP: %[[#A0:]] = OpBitFieldUExtract %[[#int_32]] %[[#A]] %[[#zero]] %[[#eight]]
35-
; CHECK-EXP: %[[#B0:]] = OpBitFieldUExtract %[[#int_32]] %[[#B]] %[[#zero]] %[[#eight]]
36-
; CHECK-EXP: %[[#MUL0:]] = OpIMul %[[#int_32]] %[[#A0]] %[[#B0]]
34+
; CHECK-EXP: %[[#X0:]] = OpBitFieldUExtract %[[#int_32]] %[[#X]] %[[#zero]] %[[#eight]]
35+
; CHECK-EXP: %[[#Y0:]] = OpBitFieldUExtract %[[#int_32]] %[[#Y]] %[[#zero]] %[[#eight]]
36+
; CHECK-EXP: %[[#MUL0:]] = OpIMul %[[#int_32]] %[[#X0]] %[[#Y0]]
3737
; CHECK-EXP: %[[#MASK0:]] = OpBitFieldUExtract %[[#int_32]] %[[#MUL0]] %[[#zero]] %[[#eight]]
38-
; CHECK-EXP: %[[#ACC0:]] = OpIAdd %[[#int_32]] %[[#C]] %[[#MASK0]]
38+
; CHECK-EXP: %[[#ACC0:]] = OpIAdd %[[#int_32]] %[[#ACC]] %[[#MASK0]]
3939

4040
; Second element of the packed vector
41-
; CHECK-EXP: %[[#A1:]] = OpBitFieldUExtract %[[#int_32]] %[[#A]] %[[#eight]] %[[#eight]]
42-
; CHECK-EXP: %[[#B1:]] = OpBitFieldUExtract %[[#int_32]] %[[#B]] %[[#eight]] %[[#eight]]
43-
; CHECK-EXP: %[[#MUL1:]] = OpIMul %[[#int_32]] %[[#A1]] %[[#B1]]
41+
; CHECK-EXP: %[[#X1:]] = OpBitFieldUExtract %[[#int_32]] %[[#X]] %[[#eight]] %[[#eight]]
42+
; CHECK-EXP: %[[#Y1:]] = OpBitFieldUExtract %[[#int_32]] %[[#Y]] %[[#eight]] %[[#eight]]
43+
; CHECK-EXP: %[[#MUL1:]] = OpIMul %[[#int_32]] %[[#X1]] %[[#Y1]]
4444
; CHECK-EXP: %[[#MASK1:]] = OpBitFieldUExtract %[[#int_32]] %[[#MUL1]] %[[#zero]] %[[#eight]]
4545
; CHECK-EXP: %[[#ACC1:]] = OpIAdd %[[#int_32]] %[[#ACC0]] %[[#MASK1]]
4646

4747
; Third element of the packed vector
48-
; CHECK-EXP: %[[#A2:]] = OpBitFieldUExtract %[[#int_32]] %[[#A]] %[[#sixteen]] %[[#eight]]
49-
; CHECK-EXP: %[[#B2:]] = OpBitFieldUExtract %[[#int_32]] %[[#B]] %[[#sixteen]] %[[#eight]]
50-
; CHECK-EXP: %[[#MUL2:]] = OpIMul %[[#int_32]] %[[#A2]] %[[#B2]]
48+
; CHECK-EXP: %[[#X2:]] = OpBitFieldUExtract %[[#int_32]] %[[#X]] %[[#sixteen]] %[[#eight]]
49+
; CHECK-EXP: %[[#Y2:]] = OpBitFieldUExtract %[[#int_32]] %[[#Y]] %[[#sixteen]] %[[#eight]]
50+
; CHECK-EXP: %[[#MUL2:]] = OpIMul %[[#int_32]] %[[#X2]] %[[#Y2]]
5151
; CHECK-EXP: %[[#MASK2:]] = OpBitFieldUExtract %[[#int_32]] %[[#MUL2]] %[[#zero]] %[[#eight]]
5252
; CHECK-EXP: %[[#ACC2:]] = OpIAdd %[[#int_32]] %[[#ACC1]] %[[#MASK2]]
5353

5454
; Fourth element of the packed vector
55-
; CHECK-EXP: %[[#A3:]] = OpBitFieldUExtract %[[#int_32]] %[[#A]] %[[#twentyfour]] %[[#eight]]
56-
; CHECK-EXP: %[[#B3:]] = OpBitFieldUExtract %[[#int_32]] %[[#B]] %[[#twentyfour]] %[[#eight]]
57-
; CHECK-EXP: %[[#MUL3:]] = OpIMul %[[#int_32]] %[[#A3]] %[[#B3]]
55+
; CHECK-EXP: %[[#X3:]] = OpBitFieldUExtract %[[#int_32]] %[[#X]] %[[#twentyfour]] %[[#eight]]
56+
; CHECK-EXP: %[[#Y3:]] = OpBitFieldUExtract %[[#int_32]] %[[#Y]] %[[#twentyfour]] %[[#eight]]
57+
; CHECK-EXP: %[[#MUL3:]] = OpIMul %[[#int_32]] %[[#X3]] %[[#Y3]]
5858
; CHECK-EXP: %[[#MASK3:]] = OpBitFieldUExtract %[[#int_32]] %[[#MUL3]] %[[#zero]] %[[#eight]]
5959

6060
; CHECK-EXP: %[[#RES:]] = OpIAdd %[[#int_32]] %[[#ACC2]] %[[#MASK3]]
6161
; CHECK: OpReturnValue %[[#RES]]
62-
%spv.dot = call i32 @llvm.spv.dot4add.u8packed(i32 %a, i32 %b, i32 %c)
62+
%spv.dot = call i32 @llvm.spv.dot4add.u8packed(i32 %acc, i32 %x, i32 %y)
6363

6464
ret i32 %spv.dot
6565
}

0 commit comments

Comments
 (0)