Skip to content

Commit 7a68449

Browse files
committed
Reapply "[Attributor][AMDGPU] Enable AAIndirectCallInfo for AMDAttributor (llvm#100952)"
This reverts commit 874cd10.
1 parent 47bf996 commit 7a68449

File tree

8 files changed

+95
-13
lines changed

8 files changed

+95
-13
lines changed

llvm/include/llvm/Transforms/IPO/Attributor.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,7 +1448,7 @@ struct AttributorConfig {
14481448
/// Callback function to determine if an indirect call targets should be made
14491449
/// direct call targets (with an if-cascade).
14501450
std::function<bool(Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1451-
Function &AssummedCallee)>
1451+
Function &AssumedCallee, unsigned NumAssumedCallees)>
14521452
IndirectCalleeSpecializationCallback = nullptr;
14531453

14541454
/// Helper to update an underlying call graph and to delete functions.
@@ -1718,10 +1718,11 @@ struct Attributor {
17181718
/// Return true if we should specialize the call site \b CB for the potential
17191719
/// callee \p Fn.
17201720
bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA,
1721-
CallBase &CB, Function &Callee) {
1721+
CallBase &CB, Function &Callee,
1722+
unsigned NumAssumedCallees) {
17221723
return Configuration.IndirectCalleeSpecializationCallback
1723-
? Configuration.IndirectCalleeSpecializationCallback(*this, AA,
1724-
CB, Callee)
1724+
? Configuration.IndirectCalleeSpecializationCallback(
1725+
*this, AA, CB, Callee, NumAssumedCallees)
17251726
: true;
17261727
}
17271728

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "GCNSubtarget.h"
1515
#include "Utils/AMDGPUBaseInfo.h"
1616
#include "llvm/Analysis/CycleAnalysis.h"
17+
#include "llvm/Analysis/TargetTransformInfo.h"
1718
#include "llvm/CodeGen/TargetPassConfig.h"
1819
#include "llvm/IR/IntrinsicsAMDGPU.h"
1920
#include "llvm/IR/IntrinsicsR600.h"
@@ -1038,12 +1039,25 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
10381039
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
10391040
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
10401041
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1041-
&AAUnderlyingObjects::ID, &AAAddressSpace::ID});
1042+
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1043+
&AAInstanceInfo::ID});
10421044

10431045
AttributorConfig AC(CGUpdater);
10441046
AC.Allowed = &Allowed;
10451047
AC.IsModulePass = true;
10461048
AC.DefaultInitializeLiveInternals = false;
1049+
AC.IndirectCalleeSpecializationCallback =
1050+
[&TM](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1051+
Function &Callee, unsigned NumAssumedCallees) {
1052+
if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv()))
1053+
return false;
1054+
// Singleton functions can be specialized.
1055+
if (NumAssumedCallees == 1)
1056+
return true;
1057+
// Otherwise specialize uniform values.
1058+
const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller());
1059+
return TTI.isAlwaysUniform(CB.getCalledOperand());
1060+
};
10471061
AC.IPOAmendableCB = [](const Function &F) {
10481062
return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
10491063
};

llvm/lib/Transforms/IPO/Attributor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3836,7 +3836,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
38363836
if (MaxSpecializationPerCB.getNumOccurrences()) {
38373837
AC.IndirectCalleeSpecializationCallback =
38383838
[&](Attributor &, const AbstractAttribute &AA, CallBase &CB,
3839-
Function &Callee) {
3839+
Function &Callee, unsigned) {
38403840
if (MaxSpecializationPerCB == 0)
38413841
return false;
38423842
auto &Set = IndirectCalleeTrackingMap[&CB];

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12357,7 +12357,8 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
1235712357
SmallVector<Function *, 8> SkippedAssumedCallees;
1235812358
SmallVector<std::pair<CallInst *, Instruction *>> NewCalls;
1235912359
for (Function *NewCallee : AssumedCallees) {
12360-
if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) {
12360+
if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee,
12361+
AssumedCallees.size())) {
1236112362
SkippedAssumedCallees.push_back(NewCallee);
1236212363
SpecializedForAllCallees = false;
1236312364
continue;

llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define internal void @direct() {
1515
; CHECK-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
1616
; CHECK-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
1717
; CHECK-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
18-
; CHECK-NEXT: call void [[FP]]()
18+
; CHECK-NEXT: call void @indirect()
1919
; CHECK-NEXT: ret void
2020
;
2121
%fptr = alloca ptr, addrspace(5)
@@ -36,5 +36,5 @@ define amdgpu_kernel void @test_direct_indirect_call() {
3636
}
3737
;.
3838
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
39-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
39+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
4040
;.

llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
2727
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
2828
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
2929
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
30-
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
30+
; ATTRIBUTOR_GCN-NEXT: call void @indirect()
3131
; ATTRIBUTOR_GCN-NEXT: ret void
3232
;
3333
%fptr = alloca ptr, addrspace(5)
@@ -43,5 +43,5 @@ attributes #0 = { "amdgpu-no-dispatch-id" }
4343
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
4444
;.
4545
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
46-
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
46+
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
4747
;.
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck --check-prefixes=CHECK,OW %s
3+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -attributor-assume-closed-world=1 %s | FileCheck --check-prefixes=CHECK,CW %s
4+
5+
target datalayout = "A5"
6+
7+
@G = global i32 0, align 4
8+
9+
;.
10+
; CHECK: @G = global i32 0, align 4
11+
;.
12+
define void @bar() {
13+
; CHECK-LABEL: define {{[^@]+}}@bar
14+
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
15+
; CHECK-NEXT: entry:
16+
; CHECK-NEXT: store i32 1, ptr @G, align 4
17+
; CHECK-NEXT: ret void
18+
;
19+
entry:
20+
store i32 1, ptr @G, align 4
21+
ret void
22+
}
23+
24+
define ptr @helper() {
25+
; CHECK-LABEL: define {{[^@]+}}@helper
26+
; CHECK-SAME: () #[[ATTR0]] {
27+
; CHECK-NEXT: entry:
28+
; CHECK-NEXT: ret ptr @bar
29+
;
30+
entry:
31+
ret ptr @bar
32+
}
33+
34+
define amdgpu_kernel void @foo(ptr noundef %fp) {
35+
; OW-LABEL: define {{[^@]+}}@foo
36+
; OW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
37+
; OW-NEXT: entry:
38+
; OW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
39+
; OW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
40+
; OW-NEXT: call void [[FP]]()
41+
; OW-NEXT: ret void
42+
;
43+
; CW-LABEL: define {{[^@]+}}@foo
44+
; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
45+
; CW-NEXT: entry:
46+
; CW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
47+
; CW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
48+
; CW-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8
49+
; CW-NEXT: call void @bar()
50+
; CW-NEXT: ret void
51+
;
52+
entry:
53+
%fp.addr = alloca ptr, addrspace(5)
54+
store ptr %fp, ptr addrspace(5) %fp.addr
55+
%load = load ptr, ptr addrspace(5) %fp.addr
56+
call void %load()
57+
ret void
58+
}
59+
60+
;.
61+
; OW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
62+
; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
63+
;.
64+
; CW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
65+
; CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
66+
;.

llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
3636
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
3737
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
3838
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
39-
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
39+
; ATTRIBUTOR_GCN-NEXT: call void @indirect()
4040
; ATTRIBUTOR_GCN-NEXT: ret void
4141
;
4242
; GFX9-LABEL: test_simple_indirect_call:
@@ -81,7 +81,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
8181
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
8282
;.
8383
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
84-
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
84+
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
8585
;.
8686
; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
8787
;.

0 commit comments

Comments
 (0)