-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Drop amdgpu-no-lds-kernel-id attribute in LDS lowering #71481
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis is in preparation for moving the run of AMDGPUAttributor earlier. Other options include teaching the attributor to avoid adding it in cases Patch is 38.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71481.diff 27 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 2c29710f8c8cb46..403014db56171ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -17,6 +17,7 @@
namespace llvm {
class AMDGPUTargetMachine;
+class GCNTargetMachine;
class TargetMachine;
// GlobalISel passes
@@ -86,8 +87,8 @@ extern char &AMDGPUMachineCFGStructurizerID;
void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
Pass *createAMDGPUAnnotateKernelFeaturesPass();
-Pass *createAMDGPUAttributorPass();
-void initializeAMDGPUAttributorPass(PassRegistry &);
+Pass *createAMDGPUAttributorLegacyPass();
+void initializeAMDGPUAttributorLegacyPass(PassRegistry &);
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
@@ -262,6 +263,15 @@ class AMDGPULowerKernelArgumentsPass
PreservedAnalyses run(Function &, FunctionAnalysisManager &);
};
+class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
+private:
+ TargetMachine &TM;
+
+public:
+ AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
FunctionPass *createAMDGPUAnnotateUniformValues();
ModulePass *createAMDGPUPrintfRuntimeBinding();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index d7dc37066b1fd38..5fd9e571282dbcb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -933,9 +933,53 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
}
}
-class AMDGPUAttributor : public ModulePass {
+static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
+ SetVector<Function *> Functions;
+ for (Function &F : M) {
+ if (!F.isIntrinsic())
+ Functions.insert(&F);
+ }
+
+ CallGraphUpdater CGUpdater;
+ BumpPtrAllocator Allocator;
+ AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
+ DenseSet<const char *> Allowed(
+ {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
+ &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
+ &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
+ &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
+
+ AttributorConfig AC(CGUpdater);
+ AC.Allowed = &Allowed;
+ AC.IsModulePass = true;
+ AC.DefaultInitializeLiveInternals = false;
+ AC.IPOAmendableCB = [](const Function &F) {
+ return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
+ };
+
+ Attributor A(Functions, InfoCache, AC);
+
+ for (Function &F : M) {
+ if (!F.isIntrinsic()) {
+ A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
+ A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
+ CallingConv::ID CC = F.getCallingConv();
+ if (!AMDGPU::isEntryFunctionCC(CC)) {
+ A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
+ A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
+ } else if (CC == CallingConv::AMDGPU_KERNEL) {
+ addPreloadKernArgHint(F, TM);
+ }
+ }
+ }
+
+ ChangeStatus Change = A.run();
+ return Change == ChangeStatus::CHANGED;
+}
+
+class AMDGPUAttributorLegacy : public ModulePass {
public:
- AMDGPUAttributor() : ModulePass(ID) {}
+ AMDGPUAttributorLegacy() : ModulePass(ID) {}
/// doInitialization - Virtual method overridden by subclasses to do
/// any necessary initialization before any pass is run.
@@ -949,48 +993,8 @@ class AMDGPUAttributor : public ModulePass {
}
bool runOnModule(Module &M) override {
- SetVector<Function *> Functions;
AnalysisGetter AG(this);
- for (Function &F : M) {
- if (!F.isIntrinsic())
- Functions.insert(&F);
- }
-
- CallGraphUpdater CGUpdater;
- BumpPtrAllocator Allocator;
- AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
- DenseSet<const char *> Allowed(
- {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
- &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
- &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
- &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
-
- AttributorConfig AC(CGUpdater);
- AC.Allowed = &Allowed;
- AC.IsModulePass = true;
- AC.DefaultInitializeLiveInternals = false;
- AC.IPOAmendableCB = [](const Function &F) {
- return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
- };
-
- Attributor A(Functions, InfoCache, AC);
-
- for (Function &F : M) {
- if (!F.isIntrinsic()) {
- A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
- A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
- CallingConv::ID CC = F.getCallingConv();
- if (!AMDGPU::isEntryFunctionCC(CC)) {
- A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
- A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
- } else if (CC == CallingConv::AMDGPU_KERNEL) {
- addPreloadKernArgHint(F, *TM);
- }
- }
- }
-
- ChangeStatus Change = A.run();
- return Change == ChangeStatus::CHANGED;
+ return runImpl(M, AG, *TM);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1003,11 +1007,25 @@ class AMDGPUAttributor : public ModulePass {
};
} // namespace
-char AMDGPUAttributor::ID = 0;
+PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
-Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
-INITIALIZE_PASS_BEGIN(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false,
- false)
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ AnalysisGetter AG(FAM);
+
+ // TODO: Probably preserves CFG
+ return runImpl(M, AG, TM) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
+
+char AMDGPUAttributorLegacy::ID = 0;
+
+Pass *llvm::createAMDGPUAttributorLegacyPass() {
+ return new AMDGPUAttributorLegacy();
+}
+INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass);
-INITIALIZE_PASS_END(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false,
- false)
+INITIALIZE_PASS_END(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
+ false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index efe40e52065426d..64d72b6ff50363b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -1028,6 +1028,42 @@ class AMDGPULowerModuleLDS {
return N;
}
+ /// Strip "amdgpu-no-lds-kernel-id" from any functions where we may have
+ /// introduced its use. If AMDGPUAttributor ran prior to the pass, we inferred
+ /// the lack of llvm.amdgcn.lds.kernel.id calls.
+ void removeNoLdsKernelIdFromReachable(CallGraph &CG, Function *KernelRoot) {
+ KernelRoot->removeFnAttr("amdgpu-no-lds-kernel-id");
+
+ SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()});
+ if (!Tmp.back())
+ return;
+
+ do {
+ Function *F = Tmp.pop_back_val();
+
+ for (auto &N : *CG[F]) {
+ if (!N.second)
+ continue;
+
+ Function *Callee = N.second->getFunction();
+ if (!Callee) {
+ // If we see any indirect calls, assume nothing about potential
+ // targets.
+ // TODO: This could be refined to possible LDS global users.
+ for (auto &N : *CG.getCallsExternalNode()) {
+ Function *PotentialCallee = N.second->getFunction();
+ Tmp.push_back(PotentialCallee);
+ }
+
+ continue;
+ }
+
+ Callee->removeFnAttr("amdgpu-no-lds-kernel-id");
+ Tmp.push_back(Callee);
+ }
+ } while (!Tmp.empty());
+ }
+
DenseMap<Function *, GlobalVariable *> lowerDynamicLDSVariables(
Module &M, LDSUsesInfoTy &LDSUsesInfo,
DenseSet<Function *> const &KernelsThatIndirectlyAllocateDynamicLDS,
@@ -1177,6 +1213,13 @@ class AMDGPULowerModuleLDS {
M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
LookupTable);
+
+ // Strip amdgpu-no-lds-kernel-id from all functions reachable from the
+ // kernel. We may have inferred this wasn't used prior to the pass.
+ //
+ // TODO: We could filter out subgraphs that do not access LDS globals.
+ for (Function *F : KernelsThatAllocateTableLDS)
+ removeNoLdsKernelIdFromReachable(CG, F);
}
DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 375df27206f7b41..f9095e684515a72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -381,7 +381,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSILoadStoreOptimizerPass(*PR);
initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
- initializeAMDGPUAttributorPass(*PR);
+ initializeAMDGPUAttributorLegacyPass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
initializeAMDGPUArgumentUsageInfoPass(*PR);
@@ -610,6 +610,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineParsingCallback(
[this](StringRef PassName, ModulePassManager &PM,
ArrayRef<PassBuilder::PipelineElement>) {
+ if (PassName == "amdgpu-attributor") {
+ PM.addPass(AMDGPUAttributorPass(*this));
+ return true;
+ }
if (PassName == "amdgpu-unify-metadata") {
PM.addPass(AMDGPUUnifyMetadataPass());
return true;
@@ -1021,7 +1025,7 @@ void AMDGPUPassConfig::addIRPasses() {
// AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run
// after their introduction
if (TM.getOptLevel() > CodeGenOptLevel::None)
- addPass(createAMDGPUAttributorPass());
+ addPass(createAMDGPUAttributorLegacyPass());
if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(createInferAddressSpacesPass());
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index c4e40d46e3179b0..089799383294ecf 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i32, i1) #0
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll b/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll
index 84d97a86e5e5df8..28722021e0448f2 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor %s | FileCheck %s
; Check handling for pre-existing attributes on function declarations
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index a516d9ecdaf0e4c..1f90c0d03a85661 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_HSA %s
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA %s
; TODO: The test contains UB which is refined by the Attributor and should be removed.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index aa3243404056d94..9e5df73d69a07b9 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
index 24339d526aa0338..6c5e58c74033945 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
-; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
declare i32 @llvm.r600.read.tgid.x() #0
declare i32 @llvm.r600.read.tgid.y() #0
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
index 60da0445927743a..2b9f579e6a1839e 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
%0 = type { ptr, ptr }
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index c20df45f10a2950..0c034192869b184 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor < %s | FileCheck %s
define internal void @indirect() {
; CHECK-LABEL: define {{[^@]+}}@indirect
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index fde1ca5ce02b741..0069370cc9721ca 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=AKF_GCN %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
define internal void @indirect() {
; AKF_GCN-LABEL: define {{[^@]+}}@indirect() {
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
index c355023270c19e4..d5590754d78bc70 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor | FileCheck -check-prefixes=CHECK,V4 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor | FileCheck -check-prefixes=CHECK,V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V5 %s
declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
index c4af5f258243e60..2e9f09ad41813d9 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; Check that no attributes are added to graphics functions
; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=AKF_GCN %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-attributor %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdpal -passes=amdgpu-attributor %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN %s
; Check that it doesn't crash
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
index 1238fa8c49a928b..e7488e059ee9df8 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-attributor -S < %s | FileCheck -check-prefix=NO-PRELOAD %s
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-1 %s
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=3 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-3 %s
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=16 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-16 %s
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=20 -amdgpu-attributor -S < %s | FileCheck ...
[truncated]
|
d274f3c
to
e29616b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it an option to locally create an attributor and run it instead of writing our own callstack traversal? That seems cleaner in terms of reusing functionality. In general, the attributor encapsulates everything about attributes, so if it is possible to depend on the attributor for this cleanup, that sounds better to me. It might even benefit from future enhancements in tracking potential callees at an indirect call.
But the attributor was not designed to remove attributes, right? It will need to ignore existing attributes when initializing the known state?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See comment below. Also, can we add a statistics counter or remark, I want a way to quantify how often this hits to decide if we should improve this.
continue; | ||
|
||
Function *Callee = N.second->getFunction(); | ||
if (!Callee) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You only need to do this once, this might visit the external nodes many times. Similarly, this will not handle recursions well.
It's basically never. LDS used outside of kernels is barely used, and then it needs to be in something that won't be inlined anyway |
…l-id Prepare to move where AMDGPUAttributor runs. Currently it runs after LDS lowering, where the lack of kernel ID intrinsics is inferred.
This is in preparation for moving the run of AMDGPUAttributor earlier. Currently it infers the lack of the corresponding intrinsic calls, so if we introduce new ones we need to remove the attribute from any possible transitive callers. This is more conservative than necessary, we could try to identify specific subgraphs where LDS globals are not used. Other options include teaching the attributor to avoid adding it in cases where the lowering may choose the table, but this seems more complex. Alternatively could add a second run which doesn't seem worth it.
e29616b
to
f2eadcb
Compare
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG.
I would like to teach the AMDGPU attributor about LDS to avoid this, but we can do that later.
…71481) This is in preparation for moving the run of AMDGPUAttributor earlier. Currently it infers the lack of the corresponding intrinsic calls, so if we introduce new ones we need to remove the attribute from any possible transitive callers. This is more conservative than necessary, we could try to identify specific subgraphs where LDS globals are not used. Other options include teaching the attributor to avoid adding it in cases where the lowering may choose the table, but this seems more complex. Alternatively could add a second run which doesn't seem worth it. Depends llvm#71349
This is in preparation for moving the run of AMDGPUAttributor earlier.
Currently it infers the lack of the corresponding intrinsic calls,
so if we introduce new ones we need to remove the attribute from any
possible transitive callers. This is more conservative than necessary,
we could try to identify specific subgraphs where LDS globals are not
used.
Other options include teaching the attributor to avoid adding it in cases
where the lowering may choose the table, but this seems more complex.
Alternatively could add a second run which doesn't seem worth it.
Depends #71349