Skip to content

Commit 37ea3b3

Browse files
committed
Revert "Reapply "[AMDGPU] Make getAssumedAddrSpace return AS1 for pointer kernel arguments (#137488)""
This reverts commit 4efc13f.
1 parent 628a3f0 commit 37ea3b3

File tree

7 files changed

+68
-94
lines changed

7 files changed

+68
-94
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -977,10 +977,6 @@ bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
977977
}
978978

979979
unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
980-
if (auto *Arg = dyn_cast<Argument>(V);
981-
Arg && AMDGPU::isKernelCC(Arg->getParent()) && !Arg->hasByRefAttr())
982-
return AMDGPUAS::GLOBAL_ADDRESS;
983-
984980
const auto *LD = dyn_cast<LoadInst>(V);
985981
if (!LD) // TODO: Handle invariant load like constant.
986982
return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12592,18 +12592,29 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1259212592
}
1259312593

1259412594
ChangeStatus updateImpl(Attributor &A) override {
12595+
unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
1259512596
uint32_t OldAddressSpace = AssumedAddressSpace;
1259612597

1259712598
auto CheckAddressSpace = [&](Value &Obj) {
1259812599
if (isa<UndefValue>(&Obj))
1259912600
return true;
12601+
// If an argument in flat address space only has addrspace cast uses, and
12602+
// those casts are same, then we take the dst addrspace.
1260012603
if (auto *Arg = dyn_cast<Argument>(&Obj)) {
12601-
auto *TTI =
12602-
A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
12603-
*Arg->getParent());
12604-
unsigned AssumedAS = TTI->getAssumedAddrSpace(Arg);
12605-
if (AssumedAS != ~0U)
12606-
return takeAddressSpace(AssumedAS);
12604+
if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
12605+
unsigned CastAddrSpace = FlatAS;
12606+
for (auto *U : Arg->users()) {
12607+
auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
12608+
if (!ASCI)
12609+
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
12610+
if (CastAddrSpace != FlatAS &&
12611+
CastAddrSpace != ASCI->getDestAddressSpace())
12612+
return false;
12613+
CastAddrSpace = ASCI->getDestAddressSpace();
12614+
}
12615+
if (CastAddrSpace != FlatAS)
12616+
return takeAddressSpace(CastAddrSpace);
12617+
}
1260712618
}
1260812619
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
1260912620
};

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,8 @@ define void @foo(ptr addrspace(3) %val) {
246246
define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
247247
; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
248248
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
249-
; CHECK-NEXT: store i32 [[VAL]], ptr [[P]], align 4
249+
; CHECK-NEXT: [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
250+
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
250251
; CHECK-NEXT: ret void
251252
;
252253
%p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
@@ -258,7 +259,8 @@ define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val)
258259
define internal void @use_argument_after_promotion(ptr %p, i32 %val) {
259260
; CHECK-LABEL: define internal void @use_argument_after_promotion(
260261
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
261-
; CHECK-NEXT: store i32 [[VAL]], ptr [[P]], align 4
262+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
263+
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
262264
; CHECK-NEXT: ret void
263265
;
264266
store i32 %val, ptr %p

llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,7 @@ define amdgpu_kernel void @store_global_from_flat(ptr %generic_scalar) #0 {
6666
define amdgpu_kernel void @store_group_from_flat(ptr %generic_scalar) #0 {
6767
; CHECK-LABEL: define amdgpu_kernel void @store_group_from_flat(
6868
; CHECK-SAME: ptr [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
69-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(1)
70-
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr
71-
; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[TMP2]] to ptr addrspace(3)
69+
; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(3)
7270
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(3) [[_TMP0]], align 4
7371
; CHECK-NEXT: ret void
7472
;
@@ -80,9 +78,7 @@ define amdgpu_kernel void @store_group_from_flat(ptr %generic_scalar) #0 {
8078
define amdgpu_kernel void @store_private_from_flat(ptr %generic_scalar) #0 {
8179
; CHECK-LABEL: define amdgpu_kernel void @store_private_from_flat(
8280
; CHECK-SAME: ptr [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
83-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(1)
84-
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr
85-
; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[TMP2]] to ptr addrspace(5)
81+
; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(5)
8682
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[_TMP0]], align 4
8783
; CHECK-NEXT: ret void
8884
;
@@ -140,10 +136,8 @@ define amdgpu_kernel void @load_store_private(ptr addrspace(5) nocapture %input,
140136
define amdgpu_kernel void @load_store_flat(ptr nocapture %input, ptr nocapture %output) #0 {
141137
; CHECK-LABEL: define amdgpu_kernel void @load_store_flat(
142138
; CHECK-SAME: ptr captures(none) [[INPUT:%.*]], ptr captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
143-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(1)
144-
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
145-
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
146-
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP2]], align 4
139+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4
140+
; CHECK-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4
147141
; CHECK-NEXT: ret void
148142
;
149143
%val = load i32, ptr %input, align 4

llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ define amdgpu_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.
4848
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
4949
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(
5050
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
51-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
52-
; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
51+
; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
5352
; CHECK-NEXT: ret void
5453
;
5554
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -60,8 +59,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest,
6059
define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr) #0 {
6160
; CHECK-LABEL: define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(
6261
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]]) #[[ATTR0]] {
63-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
64-
; CHECK-NEXT: call void @llvm.memcpy.inline.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
62+
; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
6563
; CHECK-NEXT: ret void
6664
;
6765
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -72,8 +70,7 @@ define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr
7270
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrspace(3) %dest.group.ptr, ptr %src.ptr, i64 %size) #0 {
7371
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(
7472
; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr [[SRC_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
75-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[SRC_PTR]] to ptr addrspace(1)
76-
; CHECK-NEXT: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[TMP1]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
73+
; CHECK-NEXT: call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr align 4 [[SRC_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
7774
; CHECK-NEXT: ret void
7875
;
7976
%cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr
@@ -119,8 +116,7 @@ define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspac
119116
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
120117
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
121118
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
122-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
123-
; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
119+
; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
124120
; CHECK-NEXT: ret void
125121
;
126122
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -131,8 +127,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struc
131127
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
132128
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(
133129
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
134-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
135-
; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
130+
; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
136131
; CHECK-NEXT: ret void
137132
;
138133
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -143,10 +138,8 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr
143138
define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest0, ptr %dest1, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
144139
; CHECK-LABEL: define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
145140
; CHECK-SAME: ptr [[DEST0:%.*]], ptr [[DEST1:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
146-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST0]] to ptr addrspace(1)
147-
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DEST1]] to ptr addrspace(1)
148-
; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
149-
; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP2]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
141+
; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST0]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
142+
; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
150143
; CHECK-NEXT: ret void
151144
;
152145
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -169,8 +162,7 @@ define amdgpu_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %grou
169162
define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
170163
; CHECK-LABEL: define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(
171164
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
172-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
173-
; CHECK-NEXT: call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
165+
; CHECK-NEXT: call void @llvm.memmove.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
174166
; CHECK-NEXT: ret void
175167
;
176168
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr

llvm/test/Transforms/OpenMP/barrier_removal.ll

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -682,18 +682,11 @@ m:
682682
}
683683

684684
define internal void @write_then_barrier0(ptr %p) {
685-
; MODULE-LABEL: define {{[^@]+}}@write_then_barrier0
686-
; MODULE-SAME: (ptr [[P:%.*]]) {
687-
; MODULE-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
688-
; MODULE-NEXT: store i32 0, ptr addrspace(1) [[TMP1]], align 4
689-
; MODULE-NEXT: call void @aligned_barrier()
690-
; MODULE-NEXT: ret void
691-
;
692-
; CGSCC-LABEL: define {{[^@]+}}@write_then_barrier0
693-
; CGSCC-SAME: (ptr [[P:%.*]]) {
694-
; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
695-
; CGSCC-NEXT: call void @aligned_barrier()
696-
; CGSCC-NEXT: ret void
685+
; CHECK-LABEL: define {{[^@]+}}@write_then_barrier0
686+
; CHECK-SAME: (ptr [[P:%.*]]) {
687+
; CHECK-NEXT: store i32 0, ptr [[P]], align 4
688+
; CHECK-NEXT: call void @aligned_barrier()
689+
; CHECK-NEXT: ret void
697690
;
698691
store i32 0, ptr %p
699692
call void @aligned_barrier()
@@ -702,8 +695,7 @@ define internal void @write_then_barrier0(ptr %p) {
702695
define internal void @barrier_then_write0(ptr %p) {
703696
; MODULE-LABEL: define {{[^@]+}}@barrier_then_write0
704697
; MODULE-SAME: (ptr [[P:%.*]]) {
705-
; MODULE-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
706-
; MODULE-NEXT: store i32 0, ptr addrspace(1) [[TMP1]], align 4
698+
; MODULE-NEXT: store i32 0, ptr [[P]], align 4
707699
; MODULE-NEXT: ret void
708700
;
709701
; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write0
@@ -719,8 +711,7 @@ define internal void @barrier_then_write0(ptr %p) {
719711
define internal void @barrier_then_write_then_barrier0(ptr %p) {
720712
; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0
721713
; MODULE-SAME: (ptr [[P:%.*]]) {
722-
; MODULE-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
723-
; MODULE-NEXT: store i32 0, ptr addrspace(1) [[TMP1]], align 4
714+
; MODULE-NEXT: store i32 0, ptr [[P]], align 4
724715
; MODULE-NEXT: call void @aligned_barrier()
725716
; MODULE-NEXT: ret void
726717
;

0 commit comments

Comments
 (0)