-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[DirectX] Add support for typedBufferLoad and Store for RWBuffer<double2> and RWBuffer<double> #139996
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-directx Author: Sarah Spall (spall) ChangestypedBufferLoad of double/double2 is expanded to a typedBufferLoad of a <2 x i32>/<4 x i32> and asdouble Full diff: https://github.com/llvm/llvm-project/pull/139996.diff 5 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index cff8d637dcb87..bfa41b36166aa 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -70,6 +70,15 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_fadd:
return true;
+ case Intrinsic::dx_resource_load_typedbuffer: // want to transform double and
+ // double2
+ return F.getReturnType()
+ ->getStructElementType(0)
+ ->getScalarType()
+ ->isDoubleTy();
+ case Intrinsic::dx_resource_store_typedbuffer: // want to transform double and
+ // double2
+ return F.getFunctionType()->getParamType(2)->getScalarType()->isDoubleTy();
}
return false;
}
@@ -532,6 +541,80 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
return Builder.CreateFMul(X, PiOver180);
}
+static void expandTypedBufferLoadIntrinsic(CallInst *Orig) {
+ IRBuilder<> Builder(Orig);
+
+ unsigned ExtractNum =
+ Orig->getType()->getStructElementType(0)->isVectorTy() ? 4 : 2;
+ Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);
+
+ Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
+ auto *X =
+ Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
+ {Orig->getOperand(0), Orig->getOperand(1)});
+
+ // create new extract value
+ Value *Extract = Builder.CreateExtractValue(X, {0});
+
+ SmallVector<Value *> ExtractElements;
+ for (unsigned I = 0; I < ExtractNum; ++I)
+ ExtractElements.push_back(
+ Builder.CreateExtractElement(Extract, (uint64_t)I));
+
+ // combine into double(s)
+ Value *Result =
+ PoisonValue::get(VectorType::get(Builder.getDoubleTy(), 2, false));
+ for (unsigned I = 0; I < ExtractNum; I += 2) {
+ Value *Dbl =
+ Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
+ {ExtractElements[I], ExtractElements[I + 1]});
+ if (ExtractNum == 4)
+ Result = Builder.CreateInsertElement(Result, Dbl, (uint64_t)I / 2);
+ else
+ Result = Dbl;
+ }
+
+ assert(Orig->hasOneUser() && "TypedBufferLoad is expected to have one user");
+ auto *U = Orig->user_back();
+ auto *OldExtract = dyn_cast<ExtractValueInst>(U);
+ if (!OldExtract)
+ llvm_unreachable("TypedBufferLoad's only users should be ExtractValueInst");
+ OldExtract->replaceAllUsesWith(Result);
+ OldExtract->eraseFromParent();
+}
+
+void expandTypedBufferStoreIntrinsic(CallInst *Orig) {
+ IRBuilder<> Builder(Orig);
+
+ unsigned ExtractNum =
+ Orig->getFunctionType()->getParamType(2)->isVectorTy() ? 4 : 2;
+ Type *SplitElementTy = Builder.getInt32Ty();
+ SmallVector<int> Mask = {0, 1};
+ if (ExtractNum == 4) {
+ SplitElementTy = VectorType::get(SplitElementTy, 2, false);
+ Mask = {0, 2, 1, 3};
+ }
+
+ // split our double(s)
+ auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
+ Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
+ Orig->getOperand(2));
+ // create our vector
+ Value *LowBits = Builder.CreateExtractValue(Split, 0);
+ Value *HighBits = Builder.CreateExtractValue(Split, 1);
+ Value *Val;
+ if (ExtractNum == 2) {
+ Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
+ Val = Builder.CreateInsertElement(Val, LowBits, (uint64_t)0);
+ Val = Builder.CreateInsertElement(Val, HighBits, 1);
+ } else
+ Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+
+ Builder.CreateIntrinsic(Builder.getVoidTy(),
+ Intrinsic::dx_resource_store_typedbuffer,
+ {Orig->getOperand(0), Orig->getOperand(1), Val});
+}
+
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
if (ClampIntrinsic == Intrinsic::dx_uclamp)
return Intrinsic::umax;
@@ -660,6 +743,14 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_radians:
Result = expandRadiansIntrinsic(Orig);
break;
+ case Intrinsic::dx_resource_load_typedbuffer:
+ expandTypedBufferLoadIntrinsic(Orig);
+ Orig->eraseFromParent();
+ return true;
+ case Intrinsic::dx_resource_store_typedbuffer:
+ expandTypedBufferStoreIntrinsic(Orig);
+ Orig->eraseFromParent();
+ return true;
case Intrinsic::usub_sat:
Result = expandUsubSat(Orig);
break;
diff --git a/llvm/test/CodeGen/DirectX/BufferLoad.ll b/llvm/test/CodeGen/DirectX/BufferLoad.ll
index 6d5146a9026ce..96bfbb8db95ce 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoad.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoad.ll
@@ -197,4 +197,36 @@ define void @loadv4i16() {
ret void
}
+define void @loadf64() {
+ ; show dxil op lower can handle typedbuffer load where target is double but load type is <2 x i32>
+ ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
+ %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 266 }) #0
+ %load = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
+ %val = extractvalue { <2 x i32>, i1 } %load, 0
+ ret void
+}
+
+define void @loadv2f64() {
+ ; show dxil op lower can handle typedbuffer load where target is double2 but load type is <4 x i32>
+ ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
+ %buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 522 }) #0
+ %load = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
+
+ ; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
+ %val = extractvalue { <4 x i32>, i1 } %load, 0
+ ret void
+}
+
; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
new file mode 100644
index 0000000000000..53ed74b9868e4
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @loadf64() {
+ ; check the handle from binding is unchanged
+ ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
+ %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false)
+
+ ; check we load an <2 x i32> instead of a double
+ ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
+ ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
+ ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)
+ %load0 = call {double, i1} @llvm.dx.resource.load.typedbuffer(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+ ; check we extract the two i32 and construct a double
+ ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
+ ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i64 0
+ ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i64 1
+ ; CHECK: call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+ %data0 = extractvalue {double, i1} %load0, 0
+ ret void
+}
+
+define void @loadv2f64() {
+ ; check the handle from binding is unchanged
+ ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
+ %buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false)
+
+ ; check we load an <4 x i32> instead of a double2
+ ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 }
+ ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2f64_1_0_0t(
+ ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0)
+ %load0 = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
+
+ ; check we extract the 4 i32 and construct a <2 x double>
+ ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0
+ ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i64 0
+ ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i64 1
+ ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i64 2
+ ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i64 3
+ ; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]])
+ ; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i64 0
+ ; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]])
+ ; CHECK: insertelement <2 x double> [[Vec]], double [[Dbl2]], i64 1
+ %data0 = extractvalue { <2 x double>, i1 } %load0, 0
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferStore.ll b/llvm/test/CodeGen/DirectX/BufferStore.ll
index 363a3c723bfd5..e21047c9296d1 100644
--- a/llvm/test/CodeGen/DirectX/BufferStore.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStore.ll
@@ -161,3 +161,44 @@ define void @store_scalarized_floats(float %data0, float %data1, float %data2, f
ret void
}
+
+define void @storef64(<2 x i32> %0) {
+ ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
+ ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]
+
+ %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; The temporary casts should all have been cleaned up
+ ; CHECK-NOT: %dx.resource.casthandle
+
+ ; CHECK: [[D0:%.*]] = extractelement <2 x i32> %0, i32 0
+ ; CHECK: [[D1:%.*]] = extractelement <2 x i32> %0, i32 1
+ ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 %2, i32 %3, i32 %2, i32 %2, i8 15)
+ call void @llvm.dx.resource.store.typedbuffer(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0, <2 x i32> %0)
+ ret void
+}
+
+define void @storev2f64(<4 x i32> %0) {
+ ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
+ ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]
+
+ %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; The temporary casts should all have been cleaned up
+ ; CHECK-NOT: %dx.resource.casthandle
+
+ ; CHECK: [[D0:%.*]] = extractelement <4 x i32> %0, i32 0
+ ; CHECK: [[D1:%.*]] = extractelement <4 x i32> %0, i32 1
+ ; CHECK: [[D2:%.*]] = extractelement <4 x i32> %0, i32 2
+ ; CHECK: [[D3:%.*]] = extractelement <4 x i32> %0, i32 3
+ ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 [[D0]], i32 [[D1]], i32 [[D2]], i32 [[D3]], i8 15)
+ call void @llvm.dx.resource.store.typedbuffer(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
+ <4 x i32> %0)
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
new file mode 100644
index 0000000000000..bb4dbb5efb593
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @storef64(double %0) {
+ ; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false)
+ %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; check we split the double and store the lo and hi bits
+ ; CHECK: [[SD:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double %0)
+ ; CHECK: [[Lo:%.*]] = extractvalue { i32, i32 } [[SD]], 0
+ ; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1
+ ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i64 0
+ ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i64 1
+ ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32(
+ ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]])
+ call void @llvm.dx.resource.store.typedbuffer(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0,
+ double %0)
+ ret void
+}
+
+
+define void @storev2f64(<2 x double> %0) {
+ ; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false)
+ %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[SD:%.*]] = call { <2 x i32>, <2 x i32> }
+ ; CHECK-SAME: @llvm.dx.splitdouble.v2i32(<2 x double> %0)
+ ; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0
+ ; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1
+ ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+ ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v4i32(
+ ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec]])
+ call void @llvm.dx.resource.store.typedbuffer(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
+ <2 x double> %0)
+ ret void
+}
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/test/CodeGen/DirectX/BufferLoadDouble.ll llvm/test/CodeGen/DirectX/BufferStoreDouble.ll llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp llvm/test/CodeGen/DirectX/BufferLoad.ll llvm/test/CodeGen/DirectX/BufferStore.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
typedBufferLoad of double/double2 is expanded to a typedBufferLoad of a <2 x i32>/<4 x i32> and asdouble
typedBufferStore of a double/double2 is expanded to a splitdouble and a typedBufferStore of a <2 x i32>/<4 x i32>
Add tests showing result of intrinsic expansion for typedBufferLoad and typedBufferStore
Add tests showing dxil op lowering can handle typedBufferLoad and typedBufferStore where the target type doesn't match the typedBufferLoad and typedBufferStore type
Closes #104423