Skip to content

Commit c24a60e

Browse files
committed
[AMDGPU] Make getAssumedAddrSpace return AS1 for pointer kernel arguments
1 parent c5a17e6 commit c24a60e

File tree

7 files changed

+94
-68
lines changed

7 files changed

+94
-68
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,10 @@ bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
977977
}
978978

979979
unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
980+
if (auto *Arg = dyn_cast<Argument>(V);
981+
Arg && AMDGPU::isKernelCC(Arg->getParent()) && !Arg->hasByRefAttr())
982+
return AMDGPUAS::GLOBAL_ADDRESS;
983+
980984
const auto *LD = dyn_cast<LoadInst>(V);
981985
if (!LD) // TODO: Handle invariant load like constant.
982986
return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12592,29 +12592,18 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1259212592
}
1259312593

1259412594
ChangeStatus updateImpl(Attributor &A) override {
12595-
unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
1259612595
uint32_t OldAddressSpace = AssumedAddressSpace;
1259712596

1259812597
auto CheckAddressSpace = [&](Value &Obj) {
1259912598
if (isa<UndefValue>(&Obj))
1260012599
return true;
12601-
// If an argument in flat address space only has addrspace cast uses, and
12602-
// those casts are same, then we take the dst addrspace.
1260312600
if (auto *Arg = dyn_cast<Argument>(&Obj)) {
12604-
if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
12605-
unsigned CastAddrSpace = FlatAS;
12606-
for (auto *U : Arg->users()) {
12607-
auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
12608-
if (!ASCI)
12609-
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
12610-
if (CastAddrSpace != FlatAS &&
12611-
CastAddrSpace != ASCI->getDestAddressSpace())
12612-
return false;
12613-
CastAddrSpace = ASCI->getDestAddressSpace();
12614-
}
12615-
if (CastAddrSpace != FlatAS)
12616-
return takeAddressSpace(CastAddrSpace);
12617-
}
12601+
auto *TTI =
12602+
A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
12603+
*Arg->getParent());
12604+
unsigned AssumedAS = TTI->getAssumedAddrSpace(Arg);
12605+
if (AssumedAS != ~0U)
12606+
return takeAddressSpace(AssumedAS);
1261812607
}
1261912608
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
1262012609
};

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,7 @@ define void @foo(ptr addrspace(3) %val) {
246246
define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
247247
; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
248248
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
249-
; CHECK-NEXT: [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
250-
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
249+
; CHECK-NEXT: store i32 [[VAL]], ptr [[P]], align 4
251250
; CHECK-NEXT: ret void
252251
;
253252
%p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
@@ -259,8 +258,7 @@ define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val)
259258
define internal void @use_argument_after_promotion(ptr %p, i32 %val) {
260259
; CHECK-LABEL: define internal void @use_argument_after_promotion(
261260
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
262-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
263-
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
261+
; CHECK-NEXT: store i32 [[VAL]], ptr [[P]], align 4
264262
; CHECK-NEXT: ret void
265263
;
266264
store i32 %val, ptr %p

llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ define amdgpu_kernel void @store_global_from_flat(ptr %generic_scalar) #0 {
6666
define amdgpu_kernel void @store_group_from_flat(ptr %generic_scalar) #0 {
6767
; CHECK-LABEL: define amdgpu_kernel void @store_group_from_flat(
6868
; CHECK-SAME: ptr [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
69-
; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(3)
69+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(1)
70+
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr
71+
; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[TMP2]] to ptr addrspace(3)
7072
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(3) [[_TMP0]], align 4
7173
; CHECK-NEXT: ret void
7274
;
@@ -78,7 +80,9 @@ define amdgpu_kernel void @store_group_from_flat(ptr %generic_scalar) #0 {
7880
define amdgpu_kernel void @store_private_from_flat(ptr %generic_scalar) #0 {
7981
; CHECK-LABEL: define amdgpu_kernel void @store_private_from_flat(
8082
; CHECK-SAME: ptr [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
81-
; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(5)
83+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(1)
84+
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr
85+
; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[TMP2]] to ptr addrspace(5)
8286
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[_TMP0]], align 4
8387
; CHECK-NEXT: ret void
8488
;
@@ -136,8 +140,10 @@ define amdgpu_kernel void @load_store_private(ptr addrspace(5) nocapture %input,
136140
define amdgpu_kernel void @load_store_flat(ptr nocapture %input, ptr nocapture %output) #0 {
137141
; CHECK-LABEL: define amdgpu_kernel void @load_store_flat(
138142
; CHECK-SAME: ptr captures(none) [[INPUT:%.*]], ptr captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
139-
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4
140-
; CHECK-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4
143+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(1)
144+
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
145+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
146+
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP2]], align 4
141147
; CHECK-NEXT: ret void
142148
;
143149
%val = load i32, ptr %input, align 4

llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ define amdgpu_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.
4848
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
4949
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(
5050
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
51-
; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
51+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
52+
; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
5253
; CHECK-NEXT: ret void
5354
;
5455
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -59,7 +60,8 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest,
5960
define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr) #0 {
6061
; CHECK-LABEL: define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(
6162
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]]) #[[ATTR0]] {
62-
; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
63+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
64+
; CHECK-NEXT: call void @llvm.memcpy.inline.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
6365
; CHECK-NEXT: ret void
6466
;
6567
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -70,7 +72,8 @@ define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr
7072
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrspace(3) %dest.group.ptr, ptr %src.ptr, i64 %size) #0 {
7173
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(
7274
; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr [[SRC_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
73-
; CHECK-NEXT: call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr align 4 [[SRC_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
75+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[SRC_PTR]] to ptr addrspace(1)
76+
; CHECK-NEXT: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[TMP1]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
7477
; CHECK-NEXT: ret void
7578
;
7679
%cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr
@@ -116,7 +119,8 @@ define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspac
116119
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
117120
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
118121
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
119-
; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
122+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
123+
; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
120124
; CHECK-NEXT: ret void
121125
;
122126
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -127,7 +131,8 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struc
127131
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
128132
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(
129133
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
130-
; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
134+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
135+
; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
131136
; CHECK-NEXT: ret void
132137
;
133138
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -138,8 +143,10 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr
138143
define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest0, ptr %dest1, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
139144
; CHECK-LABEL: define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
140145
; CHECK-SAME: ptr [[DEST0:%.*]], ptr [[DEST1:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
141-
; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST0]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
142-
; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
146+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST0]] to ptr addrspace(1)
147+
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DEST1]] to ptr addrspace(1)
148+
; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
149+
; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP2]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
143150
; CHECK-NEXT: ret void
144151
;
145152
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -162,7 +169,8 @@ define amdgpu_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %grou
162169
define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
163170
; CHECK-LABEL: define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(
164171
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
165-
; CHECK-NEXT: call void @llvm.memmove.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
172+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
173+
; CHECK-NEXT: call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
166174
; CHECK-NEXT: ret void
167175
;
168176
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr

llvm/test/Transforms/OpenMP/barrier_removal.ll

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -682,11 +682,18 @@ m:
682682
}
683683

684684
define internal void @write_then_barrier0(ptr %p) {
685-
; CHECK-LABEL: define {{[^@]+}}@write_then_barrier0
686-
; CHECK-SAME: (ptr [[P:%.*]]) {
687-
; CHECK-NEXT: store i32 0, ptr [[P]], align 4
688-
; CHECK-NEXT: call void @aligned_barrier()
689-
; CHECK-NEXT: ret void
685+
; MODULE-LABEL: define {{[^@]+}}@write_then_barrier0
686+
; MODULE-SAME: (ptr [[P:%.*]]) {
687+
; MODULE-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
688+
; MODULE-NEXT: store i32 0, ptr addrspace(1) [[TMP1]], align 4
689+
; MODULE-NEXT: call void @aligned_barrier()
690+
; MODULE-NEXT: ret void
691+
;
692+
; CGSCC-LABEL: define {{[^@]+}}@write_then_barrier0
693+
; CGSCC-SAME: (ptr [[P:%.*]]) {
694+
; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
695+
; CGSCC-NEXT: call void @aligned_barrier()
696+
; CGSCC-NEXT: ret void
690697
;
691698
store i32 0, ptr %p
692699
call void @aligned_barrier()
@@ -695,7 +702,8 @@ define internal void @write_then_barrier0(ptr %p) {
695702
define internal void @barrier_then_write0(ptr %p) {
696703
; MODULE-LABEL: define {{[^@]+}}@barrier_then_write0
697704
; MODULE-SAME: (ptr [[P:%.*]]) {
698-
; MODULE-NEXT: store i32 0, ptr [[P]], align 4
705+
; MODULE-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
706+
; MODULE-NEXT: store i32 0, ptr addrspace(1) [[TMP1]], align 4
699707
; MODULE-NEXT: ret void
700708
;
701709
; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write0
@@ -711,7 +719,8 @@ define internal void @barrier_then_write0(ptr %p) {
711719
define internal void @barrier_then_write_then_barrier0(ptr %p) {
712720
; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0
713721
; MODULE-SAME: (ptr [[P:%.*]]) {
714-
; MODULE-NEXT: store i32 0, ptr [[P]], align 4
722+
; MODULE-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
723+
; MODULE-NEXT: store i32 0, ptr addrspace(1) [[TMP1]], align 4
715724
; MODULE-NEXT: call void @aligned_barrier()
716725
; MODULE-NEXT: ret void
717726
;

0 commit comments

Comments
 (0)