-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[DirectX] add GEP i8 legalization #142475
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The i8 legalization code in DXILLegalizePass's `fixI8UseChain` needs to be updated to check for i8 geps. It seems like there are i8 GEPs being left around after we remove all the other i8 instructions and this is causing problem on validation. Since this is cleaning up a missed GEP The approach is to assume the getPointerOperand is to an alloca we further will check if this is an array alloca then do some byte offset arithmetic to figure out the memory index to use. Finally we will emit the new gep and cleanup the old one. Finally needed to update upcastI8AllocasAndUses to account for loads off of GEPs instead of just loads from the alloca.
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-directx Author: Farzon Lotfi (farzonl) Changesfixes #140415 The i8 legalization code in DXILLegalizePass's Since this is cleaning up a missed GEP The approach is to assume the getPointerOperand is to an alloca we further will check if this is an array alloca then do some byte offset arithmetic to figure out the memory index to use. Finally we will emit the new gep and cleanup the old one. Finally needed to update upcastI8AllocasAndUses to account for loads off of GEPs instead of just loads from the alloca. Full diff: https://github.com/llvm/llvm-project/pull/142475.diff 2 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index 23883c936a20d..f6a80ebfc8435 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -95,6 +95,8 @@ static void fixI8UseChain(Instruction &I,
Type *ElementType = NewOperands[0]->getType();
if (auto *AI = dyn_cast<AllocaInst>(NewOperands[0]))
ElementType = AI->getAllocatedType();
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(NewOperands[0]))
+ ElementType = GEP->getSourceElementType();
LoadInst *NewLoad = Builder.CreateLoad(ElementType, NewOperands[0]);
ReplacedValues[Load] = NewLoad;
ToRemove.push_back(Load);
@@ -164,6 +166,36 @@ static void fixI8UseChain(Instruction &I,
if (AdjustedCast)
Cast->replaceAllUsesWith(AdjustedCast);
}
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ if (!GEP->getType()->isPointerTy() ||
+ !GEP->getSourceElementType()->isIntegerTy(8))
+ return;
+
+ Value *BasePtr = GEP->getPointerOperand();
+ if (ReplacedValues.count(BasePtr))
+ BasePtr = ReplacedValues[BasePtr];
+
+ Type *ElementType = BasePtr->getType();
+ if (auto *AI = dyn_cast<AllocaInst>(BasePtr))
+ ElementType = AI->getAllocatedType();
+ if (auto *ArrTy = dyn_cast<ArrayType>(ElementType))
+ ElementType = ArrTy->getArrayElementType();
+
+ ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
+ // Note: the only way to convert an i8 offset to an i32 offset without
+ // emitting code Would be to emit code. We sould expect this value to be a
+ // ConstantInt since Offsets are very regulalrly converted.
+ assert(Offset && "Offset is expected to be a ConstantInt");
+ uint32_t ByteOffset = Offset->getZExtValue();
+ uint32_t ElemSize = GEP->getDataLayout().getTypeAllocSize(ElementType);
+ assert(ElemSize > 0 && "ElementSize must be set");
+ uint32_t Index = ByteOffset / ElemSize;
+ Value *NewGEP =
+ Builder.CreateGEP(ElementType, BasePtr, Builder.getInt32(Index),
+ GEP->getName(), GEP->getNoWrapFlags());
+ ReplacedValues[GEP] = NewGEP;
+ ToRemove.push_back(GEP);
+ }
}
static void upcastI8AllocasAndUses(Instruction &I,
@@ -175,15 +207,12 @@ static void upcastI8AllocasAndUses(Instruction &I,
Type *SmallestType = nullptr;
- for (User *U : AI->users()) {
- auto *Load = dyn_cast<LoadInst>(U);
- if (!Load)
- continue;
+ auto ProcessLoad = [&](LoadInst *Load) {
for (User *LU : Load->users()) {
Type *Ty = nullptr;
- if (auto *Cast = dyn_cast<CastInst>(LU))
+ if (auto *Cast = dyn_cast<CastInst>(LU)) {
Ty = Cast->getType();
- if (CallInst *CI = dyn_cast<CallInst>(LU)) {
+ } else if (auto *CI = dyn_cast<CallInst>(LU)) {
if (CI->getIntrinsicID() == Intrinsic::memset)
Ty = Type::getInt32Ty(CI->getContext());
}
@@ -191,9 +220,22 @@ static void upcastI8AllocasAndUses(Instruction &I,
if (!Ty)
continue;
- if (!SmallestType ||
- Ty->getPrimitiveSizeInBits() < SmallestType->getPrimitiveSizeInBits())
+ if (!SmallestType || Ty->getPrimitiveSizeInBits() <
+ SmallestType->getPrimitiveSizeInBits()) {
SmallestType = Ty;
+ }
+ }
+ };
+
+ for (User *U : AI->users()) {
+ if (auto *Load = dyn_cast<LoadInst>(U))
+ ProcessLoad(Load);
+ else if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ for (User *GU : GEP->users()) {
+ if (auto *Load = dyn_cast<LoadInst>(GU)) {
+ ProcessLoad(Load);
+ }
+ }
}
}
diff --git a/llvm/test/CodeGen/DirectX/legalize-i8.ll b/llvm/test/CodeGen/DirectX/legalize-i8.ll
index 2602be778cd86..d1d76ccf5c76c 100644
--- a/llvm/test/CodeGen/DirectX/legalize-i8.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-i8.ll
@@ -106,3 +106,57 @@ define i32 @all_imm() {
%2 = sext i8 %1 to i32
ret i32 %2
}
+
+define i32 @scalar_i8_geps() {
+ ; CHECK-LABEL: define i32 @scalar_i8_geps(
+ ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 0
+ ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+ ; CHECK-NEXT: ret i32 [[LOAD]]
+ %1 = alloca i8, align 4
+ %2 = getelementptr inbounds nuw i8, ptr %1, i32 0
+ %3 = load i8, ptr %2
+ %4 = sext i8 %3 to i32
+ ret i32 %4
+}
+
+define i32 @i8_geps_index0() {
+ ; CHECK-LABEL: define i32 @i8_geps_index0(
+ ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 0
+ ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+ ; CHECK-NEXT: ret i32 [[LOAD]]
+ %1 = alloca [2 x i32], align 8
+ %2 = getelementptr inbounds nuw i8, ptr %1, i32 0
+ %3 = load i8, ptr %2
+ %4 = sext i8 %3 to i32
+ ret i32 %4
+}
+
+define i32 @i8_geps_index1() {
+ ; CHECK-LABEL: define i32 @i8_geps_index1(
+ ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 1
+ ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]]
+ ; CHECK-NEXT: ret i32 [[LOAD]]
+ %1 = alloca [2 x i32], align 8
+ %2 = getelementptr inbounds nuw i8, ptr %1, i32 4
+ %3 = load i8, ptr %2
+ %4 = sext i8 %3 to i32
+ ret i32 %4
+}
+
+define i32 @i8_gep_store() {
+ ; CHECK-LABEL: define i32 @i8_gep_store(
+ ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 1
+ ; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
+ ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]]
+ ; CHECK-NEXT: ret i32 [[LOAD]]
+ %1 = alloca [2 x i32], align 8
+ %2 = getelementptr inbounds nuw i8, ptr %1, i32 4
+ store i8 1, ptr %2
+ %3 = load i8, ptr %2
+ %4 = sext i8 %3 to i32
+ ret i32 %4
+}
|
|
||
ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1)); | ||
// Note: the only way to convert an i8 offset to an i32 offset without | ||
// emitting code Would be to emit code. We sould expect this value to be a |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should
This PR appears to fail to compile this shader: https://godbolt.org/z/9MPc8Wca5 Stack trace:
|
Found a shader where Clang emits an i8 GEP that fails to legalize with this PR
This shader now compiles, but it currently fails DXIL validation with the message
I think the i8 legalization is producing an incorrect GEP. I see the pass converting these instructions: %2 = getelementptr inbounds nuw i8, ptr addrspace(3) @g, i32 4
%3 = load float, ptr addrspace(3) %2, align 4 into %2 = load float, ptr addrspace(3) getelementptr inbounds nuw (ptr addrspace(3), ptr addrspace(3) @g, i32 1), align 4 but I would have expected this instead: %2 = load float, ptr addrspace(3) getelementptr inbounds nuw ([2 x float], ptr addrspace(3) @g, i32 0, i32 1), align 4 because the global is defined as %arrayidx.i = getelementptr inbounds nuw [2 x float], ptr addrspace(3) @g, i32 0, i32 %1
store float 0.000000e+00, ptr addrspace(3) %arrayidx.i, align 4, !tbaa !5 |
the bug I was looking at used allocas, this one uses a globals its an easy fix to add:
However during my testing I found another gap. we still need to figure out how to support constExpr GEPs.
|
Incorporating that change still results in a shader that doesn't pass validation due to the same message: The DXIL Legalize pass now emits: @g = local_unnamed_addr addrspace(3) global [2 x float] zeroinitializer, align 4
...
define void @CSMain() local_unnamed_addr #0 {
entry:
...
%2 = load float, ptr addrspace(3) getelementptr inbounds nuw (float, ptr addrspace(3) @g, i32 1), align 4 I would expect the %2 = load float, ptr addrspace(3) getelementptr inbounds nuw ([2 x float], ptr addrspace(3) @g, i32 0, i32 1), align 4 Since vectors have been scalarized and arrays have been flattened, this should be straightforward to fix. |
…with constexpr GEPs
221c685
to
74b05e0
Compare
fixes #140415
The i8 legalization code in DXILLegalizePass's
fixI8UseChain
needs to be updated to check for i8 geps.It seems like there are i8 GEPs being left around after we remove all the other i8 instructions and this is causing problem on validation.
Since this is cleaning up a missed GEP The approach is to assume the getPointerOperand is to an alloca we further will check if this is an array alloca then do some byte offset arithmetic to figure out the memory index to use. Finally we will emit the new gep and cleanup the old one.
Finally needed to update upcastI8AllocasAndUses to account for loads off of GEPs instead of just loads from the alloca.