Skip to content

Commit 80c0a95

Browse files
committed
[WIP][AMDGPU][Attributor] Infer inreg attribute in AMDGPUAttributor
1 parent 0551926 commit 80c0a95

File tree

2 files changed

+148
-1
lines changed

2 files changed

+148
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,82 @@ struct AAAMDGPUNoAGPR
10091009

10101010
const char AAAMDGPUNoAGPR::ID = 0;
10111011

1012+
struct AAAMDGPUInreg
1013+
: public IRAttribute<Attribute::InReg,
1014+
StateWrapper<BooleanState, AbstractAttribute>,
1015+
AAAMDGPUInreg> {
1016+
AAAMDGPUInreg(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
1017+
1018+
/// Create an abstract attribute view for the position \p IRP.
1019+
static AAAMDGPUInreg &createForPosition(const IRPosition &IRP, Attributor &A);
1020+
1021+
/// See AbstractAttribute::getName()
1022+
const std::string getName() const override { return "AAAMDGPUInreg"; }
1023+
1024+
const std::string getAsStr(Attributor *A) const override {
1025+
return getAssumed() ? "inreg" : "non-inreg";
1026+
}
1027+
1028+
void trackStatistics() const override {}
1029+
1030+
/// See AbstractAttribute::getIdAddr()
1031+
const char *getIdAddr() const override { return &ID; }
1032+
1033+
/// This function should return true if the type of the \p AA is AAAMDGPUInreg
1034+
static bool classof(const AbstractAttribute *AA) {
1035+
return (AA->getIdAddr() == &ID);
1036+
}
1037+
1038+
/// Unique ID (due to the unique address)
1039+
static const char ID;
1040+
};
1041+
1042+
const char AAAMDGPUInreg::ID = 0;
1043+
1044+
namespace {
1045+
1046+
struct AAAMDGPUInregArgument : public AAAMDGPUInreg {
1047+
AAAMDGPUInregArgument(const IRPosition &IRP, Attributor &A)
1048+
: AAAMDGPUInreg(IRP, A) {}
1049+
1050+
void initialize(Attributor &A) override {
1051+
if (getAssociatedArgument()->hasAttribute(Attribute::InReg))
1052+
indicateOptimisticFixpoint();
1053+
}
1054+
1055+
ChangeStatus updateImpl(Attributor &A) override {
1056+
const auto &InfoCache =
1057+
static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1058+
unsigned ArgNo = getAssociatedArgument()->getArgNo();
1059+
1060+
auto Pred = [&](AbstractCallSite ACS) -> bool {
1061+
CallBase *CB = ACS.getInstruction();
1062+
auto TTI = InfoCache.TM.getTargetTransformInfo(*CB->getFunction());
1063+
Value *V = CB->getArgOperandUse(ArgNo);
1064+
return TTI.isAlwaysUniform(V);
1065+
};
1066+
1067+
bool UsedAssumedInformation = false;
1068+
if (!A.checkForAllCallSites(Pred, *this, /*RequireAllCallSites=*/true,
1069+
UsedAssumedInformation))
1070+
return indicatePessimisticFixpoint();
1071+
1072+
return ChangeStatus::UNCHANGED;
1073+
}
1074+
};
1075+
1076+
} // namespace
1077+
1078+
AAAMDGPUInreg &AAAMDGPUInreg::createForPosition(const IRPosition &IRP,
1079+
Attributor &A) {
1080+
switch (IRP.getPositionKind()) {
1081+
case IRPosition::IRP_ARGUMENT:
1082+
return *new (A.Allocator) AAAMDGPUInregArgument(IRP, A);
1083+
default:
1084+
llvm_unreachable("not a valid position for AAAMDGPUInreg");
1085+
}
1086+
}
1087+
10121088
static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
10131089
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
10141090
for (unsigned I = 0;
@@ -1041,7 +1117,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
10411117
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
10421118
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
10431119
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1044-
&AAInstanceInfo::ID});
1120+
&AAInstanceInfo::ID, &AAAMDGPUInreg::ID});
10451121

10461122
AttributorConfig AC(CGUpdater);
10471123
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1091,6 +1167,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
10911167
IRPosition::value(*SI->getPointerOperand()));
10921168
}
10931169
}
1170+
1171+
if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) {
1172+
for (auto &Arg : F.args())
1173+
A.getOrCreateAAFor<AAAMDGPUInreg>(IRPosition::argument(Arg));
1174+
}
10941175
}
10951176

10961177
ChangeStatus Change = A.run();
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2+
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor %s -o - | FileCheck %s
3+
4+
@g1 = protected addrspace(1) externally_initialized global i32 0, align 4
5+
@g2 = protected addrspace(1) externally_initialized global i32 0, align 4
6+
7+
;.
8+
; CHECK: @g1 = protected addrspace(1) externally_initialized global i32 0, align 4
9+
; CHECK: @g2 = protected addrspace(1) externally_initialized global i32 0, align 4
10+
;.
11+
define internal fastcc void @f(i32 %x.0.val, ptr nocapture noundef readonly %y) {
12+
; CHECK-LABEL: define {{[^@]+}}@f
13+
; CHECK-SAME: (i32 [[X_0_VAL:%.*]], ptr nocapture noundef readonly [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
14+
; CHECK-NEXT: entry:
15+
; CHECK-NEXT: store i32 [[X_0_VAL]], ptr addrspace(1) @g1, align 4
16+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[Y]], align 4
17+
; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) @g2, align 4
18+
; CHECK-NEXT: ret void
19+
;
20+
entry:
21+
store i32 %x.0.val, ptr addrspace(1) @g1, align 4
22+
%load = load i32, ptr %y, align 4
23+
store i32 %load, ptr addrspace(1) @g2, align 4
24+
ret void
25+
}
26+
27+
define protected amdgpu_kernel void @kernel(ptr addrspace(1) %x2, i32 %z) {
28+
; CHECK-LABEL: define {{[^@]+}}@kernel
29+
; CHECK-SAME: (ptr addrspace(1) [[X2:%.*]], i32 [[Z:%.*]]) #[[ATTR1:[0-9]+]] {
30+
; CHECK-NEXT: entry:
31+
; CHECK-NEXT: [[X2_CAST:%.*]] = addrspacecast ptr addrspace(1) [[X2]] to ptr
32+
; CHECK-NEXT: [[QUEUE_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
33+
; CHECK-NEXT: [[QUEUE_PTR_CAST:%.*]] = addrspacecast ptr addrspace(4) [[QUEUE_PTR]] to ptr
34+
; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
35+
; CHECK-NEXT: [[IMPLICITARG_PTR_CAST:%.*]] = addrspacecast ptr addrspace(4) [[IMPLICITARG_PTR]] to ptr
36+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[Z]], 0
37+
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], ptr [[QUEUE_PTR_CAST]], ptr [[X2_CAST]]
38+
; CHECK-NEXT: [[COND_VAL:%.*]] = load i32, ptr [[COND]], align 4
39+
; CHECK-NEXT: tail call fastcc void @f(i32 [[COND_VAL]], ptr noundef [[IMPLICITARG_PTR_CAST]])
40+
; CHECK-NEXT: [[DOTVAL:%.*]] = load i32, ptr addrspace(4) [[QUEUE_PTR]], align 4
41+
; CHECK-NEXT: tail call fastcc void @f(i32 [[DOTVAL]], ptr noundef [[IMPLICITARG_PTR_CAST]])
42+
; CHECK-NEXT: ret void
43+
;
44+
entry:
45+
%x2.cast = addrspacecast ptr addrspace(1) %x2 to ptr
46+
%queue.ptr = tail call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
47+
%queue.ptr.cast = addrspacecast ptr addrspace(4) %queue.ptr to ptr
48+
%implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
49+
%implicitarg.ptr.cast = addrspacecast ptr addrspace(4) %implicitarg.ptr to ptr
50+
%cmp = icmp sgt i32 %z, 0
51+
%cond = select i1 %cmp, ptr %queue.ptr.cast, ptr %x2.cast
52+
%cond.val = load i32, ptr %cond, align 4
53+
tail call fastcc void @f(i32 %cond.val, ptr noundef %implicitarg.ptr.cast)
54+
%.val = load i32, ptr addrspace(4) %queue.ptr, align 4
55+
tail call fastcc void @f(i32 %.val, ptr noundef %implicitarg.ptr.cast)
56+
ret void
57+
}
58+
59+
declare align 4 ptr addrspace(4) @llvm.amdgcn.queue.ptr()
60+
61+
declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
62+
;.
63+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
64+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
65+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
66+
;.

0 commit comments

Comments
 (0)