Skip to content

Commit 90ab192

Browse files
authored
Merge pull request #12 from RadeonOpenCompute/addrspacecast-fixes
Addrspacecast fixes
2 parents 81d8ce9 + 6860c9e commit 90ab192

File tree

4 files changed

+59
-3
lines changed

4 files changed

+59
-3
lines changed

lib/Analysis/Loads.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,10 @@ static bool isDereferenceableAndAlignedPointer(
9191
// then the GEP (== Base + Offset == k_0 * Align + k_1 * Align) is also
9292
// aligned to Align bytes.
9393

94-
return isDereferenceableAndAlignedPointer(Base, Align, Offset + Size, DL,
94+
// Offset and Size may have different bit widths if we have visited an
95+
// addrspacecast, so we can't do arithmetic directly on the APInt values.
96+
return isDereferenceableAndAlignedPointer(Base, Align,
97+
Offset + Size.getSExtValue(), DL,
9598
CtxI, DT, Visited);
9699
}
97100

lib/Analysis/ValueTracking.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2833,11 +2833,17 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
28332833
break;
28342834

28352835
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
2836-
APInt GEPOffset(BitWidth, 0);
2836+
// If one of the values we have visited is an addrspacecast, then
2837+
// the pointer type of this GEP may be different from the type
2838+
// of the Ptr parameter which was passed to this function. This
2839+
// means when we construct GEPOffset, we need to use the size
2840+
// of GEP's pointer type rather than the size of the original
2841+
// pointer type.
2842+
APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
28372843
if (!GEP->accumulateConstantOffset(DL, GEPOffset))
28382844
break;
28392845

2840-
ByteOffset += GEPOffset;
2846+
ByteOffset += GEPOffset.getSExtValue();
28412847

28422848
Ptr = GEP->getPointerOperand();
28432849
} else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; RUN: opt < %s -licm -S | FileCheck %s
2+
3+
target datalayout = "e-p:32:32-p1:64:64-p4:64:64"
4+
5+
; Make sure isDereferenceableAndAlignePointer() doesn't crash when looking
6+
; walking pointer defs with an addrspacecast that changes pointer size.
7+
; CHECK-LABEL: @addrspacecast_crash
8+
define void @addrspacecast_crash() {
9+
bb:
10+
%tmp = alloca [256 x i32]
11+
br label %bb1
12+
13+
bb1:
14+
%tmp2 = getelementptr inbounds [256 x i32], [256 x i32]* %tmp, i32 0, i32 36
15+
%tmp3 = bitcast i32* %tmp2 to <4 x i32>*
16+
%tmp4 = addrspacecast <4 x i32>* %tmp3 to <4 x i32> addrspace(4)*
17+
%tmp5 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp4
18+
%tmp6 = xor <4 x i32> %tmp5, undef
19+
store <4 x i32> %tmp6, <4 x i32> addrspace(1)* undef
20+
br label %bb1
21+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; RUN: opt -gvn -S < %s | FileCheck %s
2+
3+
; Make sure we don't crash when analyzing an addrspacecast in
4+
; GetPointerBaseWithConstantOffset()
5+
6+
target datalayout = "e-p:32:32-p4:64:64"
7+
8+
define i32 @addrspacecast-crash() {
9+
; CHECK-LABEL: @addrspacecast-crash
10+
; CHECK: %tmp = alloca [25 x i64]
11+
; CHECK: %tmp1 = getelementptr inbounds [25 x i64], [25 x i64]* %tmp, i32 0, i32 0
12+
; CHECK: %tmp2 = addrspacecast i64* %tmp1 to <8 x i64> addrspace(4)*
13+
; CHECK: store <8 x i64> zeroinitializer, <8 x i64> addrspace(4)* %tmp2
14+
; CHECK-NOT: load
15+
bb:
16+
%tmp = alloca [25 x i64]
17+
%tmp1 = getelementptr inbounds [25 x i64], [25 x i64]* %tmp, i32 0, i32 0
18+
%tmp2 = addrspacecast i64* %tmp1 to <8 x i64> addrspace(4)*
19+
%tmp3 = getelementptr inbounds <8 x i64>, <8 x i64> addrspace(4)* %tmp2, i64 0
20+
store <8 x i64> zeroinitializer, <8 x i64> addrspace(4)* %tmp3
21+
%tmp4 = getelementptr inbounds [25 x i64], [25 x i64]* %tmp, i32 0, i32 0
22+
%tmp5 = addrspacecast i64* %tmp4 to i32 addrspace(4)*
23+
%tmp6 = getelementptr inbounds i32, i32 addrspace(4)* %tmp5, i64 10
24+
%tmp7 = load i32, i32 addrspace(4)* %tmp6, align 4
25+
ret i32 %tmp7
26+
}

0 commit comments

Comments
 (0)