Skip to content

Commit 5411ebd

Browse files
authored
[DirectX] add GEP i8 legalization (#142475)
fixes #140415 The i8 legalization code in DXILLegalizePass's `fixI8UseChain` needs to be updated to check for i8 geps. It seems like there are i8 GEPs being left around after we remove all the other i8 instructions and this is causing problem on validation. Since this is cleaning up a missed GEP The approach is to assume the getPointerOperand is to an alloca we further will check if this is an array alloca then do some byte offset arithmetic to figure out the memory index to use. Finally we will emit the new gep and cleanup the old one. Finally needed to update upcastI8AllocasAndUses to account for loads off of GEPs instead of just loads from the alloca.
1 parent 2eb9655 commit 5411ebd

File tree

2 files changed

+172
-9
lines changed

2 files changed

+172
-9
lines changed

llvm/lib/Target/DirectX/DXILLegalizePass.cpp

Lines changed: 100 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -87,20 +87,63 @@ static void fixI8UseChain(Instruction &I,
8787
return;
8888
}
8989

90-
if (auto *Load = dyn_cast<LoadInst>(&I)) {
91-
if (!I.getType()->isIntegerTy(8))
92-
return;
90+
if (auto *Load = dyn_cast<LoadInst>(&I);
91+
Load && I.getType()->isIntegerTy(8)) {
9392
SmallVector<Value *> NewOperands;
9493
ProcessOperands(NewOperands);
9594
Type *ElementType = NewOperands[0]->getType();
9695
if (auto *AI = dyn_cast<AllocaInst>(NewOperands[0]))
9796
ElementType = AI->getAllocatedType();
97+
if (auto *GEP = dyn_cast<GetElementPtrInst>(NewOperands[0])) {
98+
ElementType = GEP->getSourceElementType();
99+
if (ElementType->isArrayTy())
100+
ElementType = ElementType->getArrayElementType();
101+
}
98102
LoadInst *NewLoad = Builder.CreateLoad(ElementType, NewOperands[0]);
99103
ReplacedValues[Load] = NewLoad;
100104
ToRemove.push_back(Load);
101105
return;
102106
}
103107

108+
if (auto *Load = dyn_cast<LoadInst>(&I);
109+
Load && isa<ConstantExpr>(Load->getPointerOperand())) {
110+
auto *CE = dyn_cast<ConstantExpr>(Load->getPointerOperand());
111+
if (!(CE->getOpcode() == Instruction::GetElementPtr))
112+
return;
113+
auto *GEP = dyn_cast<GEPOperator>(CE);
114+
if (!GEP->getSourceElementType()->isIntegerTy(8))
115+
return;
116+
117+
Type *ElementType = Load->getType();
118+
ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
119+
uint32_t ByteOffset = Offset->getZExtValue();
120+
uint32_t ElemSize = Load->getDataLayout().getTypeAllocSize(ElementType);
121+
uint32_t Index = ByteOffset / ElemSize;
122+
123+
Value *PtrOperand = GEP->getPointerOperand();
124+
Type *GEPType = GEP->getPointerOperandType();
125+
126+
if (auto *GV = dyn_cast<GlobalVariable>(PtrOperand))
127+
GEPType = GV->getValueType();
128+
if (auto *AI = dyn_cast<AllocaInst>(PtrOperand))
129+
GEPType = AI->getAllocatedType();
130+
131+
if (auto *ArrTy = dyn_cast<ArrayType>(GEPType))
132+
GEPType = ArrTy;
133+
else
134+
GEPType = ArrayType::get(ElementType, 1); // its a scalar
135+
136+
Value *NewGEP = Builder.CreateGEP(
137+
GEPType, PtrOperand, {Builder.getInt32(0), Builder.getInt32(Index)},
138+
GEP->getName(), GEP->getNoWrapFlags());
139+
140+
LoadInst *NewLoad = Builder.CreateLoad(ElementType, NewGEP);
141+
ReplacedValues[Load] = NewLoad;
142+
Load->replaceAllUsesWith(NewLoad);
143+
ToRemove.push_back(Load);
144+
return;
145+
}
146+
104147
if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
105148
if (!I.getType()->isIntegerTy(8))
106149
return;
@@ -155,6 +198,7 @@ static void fixI8UseChain(Instruction &I,
155198
Cast->replaceAllUsesWith(Replacement);
156199
return;
157200
}
201+
158202
Value *AdjustedCast = nullptr;
159203
if (Cast->getOpcode() == Instruction::ZExt)
160204
AdjustedCast = Builder.CreateZExtOrTrunc(Replacement, Cast->getType());
@@ -164,6 +208,45 @@ static void fixI8UseChain(Instruction &I,
164208
if (AdjustedCast)
165209
Cast->replaceAllUsesWith(AdjustedCast);
166210
}
211+
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
212+
if (!GEP->getType()->isPointerTy() ||
213+
!GEP->getSourceElementType()->isIntegerTy(8))
214+
return;
215+
216+
Value *BasePtr = GEP->getPointerOperand();
217+
if (ReplacedValues.count(BasePtr))
218+
BasePtr = ReplacedValues[BasePtr];
219+
220+
Type *ElementType = BasePtr->getType();
221+
222+
if (auto *AI = dyn_cast<AllocaInst>(BasePtr))
223+
ElementType = AI->getAllocatedType();
224+
if (auto *GV = dyn_cast<GlobalVariable>(BasePtr))
225+
ElementType = GV->getValueType();
226+
227+
Type *GEPType = ElementType;
228+
if (auto *ArrTy = dyn_cast<ArrayType>(ElementType))
229+
ElementType = ArrTy->getArrayElementType();
230+
else
231+
GEPType = ArrayType::get(ElementType, 1); // its a scalar
232+
233+
ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
234+
// Note: i8 to i32 offset conversion without emitting IR requires constant
235+
// ints. Since offset conversion is common, we can safely assume Offset is
236+
// always a ConstantInt, so no need to have a conditional bail out on
237+
// nullptr, instead assert this is the case.
238+
assert(Offset && "Offset is expected to be a ConstantInt");
239+
uint32_t ByteOffset = Offset->getZExtValue();
240+
uint32_t ElemSize = GEP->getDataLayout().getTypeAllocSize(ElementType);
241+
assert(ElemSize > 0 && "ElementSize must be set");
242+
uint32_t Index = ByteOffset / ElemSize;
243+
Value *NewGEP = Builder.CreateGEP(
244+
GEPType, BasePtr, {Builder.getInt32(0), Builder.getInt32(Index)},
245+
GEP->getName(), GEP->getNoWrapFlags());
246+
ReplacedValues[GEP] = NewGEP;
247+
GEP->replaceAllUsesWith(NewGEP);
248+
ToRemove.push_back(GEP);
249+
}
167250
}
168251

169252
static void upcastI8AllocasAndUses(Instruction &I,
@@ -175,15 +258,12 @@ static void upcastI8AllocasAndUses(Instruction &I,
175258

176259
Type *SmallestType = nullptr;
177260

178-
for (User *U : AI->users()) {
179-
auto *Load = dyn_cast<LoadInst>(U);
180-
if (!Load)
181-
continue;
261+
auto ProcessLoad = [&](LoadInst *Load) {
182262
for (User *LU : Load->users()) {
183263
Type *Ty = nullptr;
184-
if (auto *Cast = dyn_cast<CastInst>(LU))
264+
if (CastInst *Cast = dyn_cast<CastInst>(LU))
185265
Ty = Cast->getType();
186-
if (CallInst *CI = dyn_cast<CallInst>(LU)) {
266+
else if (CallInst *CI = dyn_cast<CallInst>(LU)) {
187267
if (CI->getIntrinsicID() == Intrinsic::memset)
188268
Ty = Type::getInt32Ty(CI->getContext());
189269
}
@@ -195,6 +275,17 @@ static void upcastI8AllocasAndUses(Instruction &I,
195275
Ty->getPrimitiveSizeInBits() < SmallestType->getPrimitiveSizeInBits())
196276
SmallestType = Ty;
197277
}
278+
};
279+
280+
for (User *U : AI->users()) {
281+
if (auto *Load = dyn_cast<LoadInst>(U))
282+
ProcessLoad(Load);
283+
else if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
284+
for (User *GU : GEP->users()) {
285+
if (auto *Load = dyn_cast<LoadInst>(GU))
286+
ProcessLoad(Load);
287+
}
288+
}
198289
}
199290

200291
if (!SmallestType)

llvm/test/CodeGen/DirectX/legalize-i8.ll

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,75 @@ define i32 @all_imm() {
106106
%2 = sext i8 %1 to i32
107107
ret i32 %2
108108
}
109+
110+
define i32 @scalar_i8_geps() {
111+
; CHECK-LABEL: define i32 @scalar_i8_geps(
112+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4
113+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [1 x i32], ptr [[ALLOCA]], i32 0, i32 0
114+
; CHECK: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
115+
; CHECK-NEXT: ret i32 [[LOAD]]
116+
%1 = alloca i8, align 4
117+
%2 = getelementptr inbounds nuw i8, ptr %1, i32 0
118+
%3 = load i8, ptr %2
119+
%4 = sext i8 %3 to i32
120+
ret i32 %4
121+
}
122+
123+
define i32 @i8_geps_index0() {
124+
; CHECK-LABEL: define i32 @i8_geps_index0(
125+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 8
126+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ALLOCA]], i32 0, i32 0
127+
; CHECK: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
128+
; CHECK-NEXT: ret i32 [[LOAD]]
129+
%1 = alloca [2 x i32], align 8
130+
%2 = getelementptr inbounds nuw i8, ptr %1, i32 0
131+
%3 = load i8, ptr %2
132+
%4 = sext i8 %3 to i32
133+
ret i32 %4
134+
}
135+
136+
define i32 @i8_geps_index1() {
137+
; CHECK-LABEL: define i32 @i8_geps_index1(
138+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 8
139+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ALLOCA]], i32 0, i32 1
140+
; CHECK: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
141+
; CHECK-NEXT: ret i32 [[LOAD]]
142+
%1 = alloca [2 x i32], align 8
143+
%2 = getelementptr inbounds nuw i8, ptr %1, i32 4
144+
%3 = load i8, ptr %2
145+
%4 = sext i8 %3 to i32
146+
ret i32 %4
147+
}
148+
149+
define i32 @i8_gep_store() {
150+
; CHECK-LABEL: define i32 @i8_gep_store(
151+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 8
152+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ALLOCA]], i32 0, i32 1
153+
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
154+
; CHECK: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
155+
; CHECK-NEXT: ret i32 [[LOAD]]
156+
%1 = alloca [2 x i32], align 8
157+
%2 = getelementptr inbounds nuw i8, ptr %1, i32 4
158+
store i8 1, ptr %2
159+
%3 = load i8, ptr %2
160+
%4 = sext i8 %3 to i32
161+
ret i32 %4
162+
}
163+
164+
@g = local_unnamed_addr addrspace(3) global [2 x float] zeroinitializer, align 4
165+
define float @i8_gep_global_index() {
166+
; CHECK-LABEL: define float @i8_gep_global_index(
167+
; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr addrspace(3) getelementptr inbounds nuw ([2 x float], ptr addrspace(3) @g, i32 0, i32 1), align 4
168+
; CHECK-NEXT: ret float [[LOAD]]
169+
%1 = getelementptr inbounds nuw i8, ptr addrspace(3) @g, i32 4
170+
%2 = load float, ptr addrspace(3) %1, align 4
171+
ret float %2
172+
}
173+
174+
define float @i8_gep_global_constexpr() {
175+
; CHECK-LABEL: define float @i8_gep_global_constexpr(
176+
; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr addrspace(3) getelementptr inbounds nuw ([2 x float], ptr addrspace(3) @g, i32 0, i32 1), align 4
177+
; CHECK-NEXT: ret float [[LOAD]]
178+
%1 = load float, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g, i32 4), align 4
179+
ret float %1
180+
}

0 commit comments

Comments
 (0)