-
Notifications
You must be signed in to change notification settings - Fork 13.7k
AMDGPU: Respect amdgpu-no-agpr in functions and with calls #128147
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Respect amdgpu-no-agpr in functions and with calls #128147
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesRemove the MIR scan to detect whether AGPRs are used or not, Also adds an xfail-ish test where the register allocator asserts Future work should reintroduce a more refined MIR scan to estimate Patch is 41.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128147.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index c5efb89d8b2db..a83fc2d188de2 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -64,6 +64,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
}
MayNeedAGPRs = ST.hasMAIInsts();
+ if (ST.hasGFX90AInsts() &&
+ ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
+ !mayUseAGPRs(F))
+ MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
if (AMDGPU::isChainCC(CC)) {
// Chain functions don't receive an SP from their caller, but are free to
@@ -98,13 +102,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
ImplicitArgPtr = true;
} else {
ImplicitArgPtr = false;
- MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
- MaxKernArgAlign);
-
- if (ST.hasGFX90AInsts() &&
- ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
- !mayUseAGPRs(F))
- MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
+ MaxKernArgAlign =
+ std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign);
}
if (!AMDGPU::isGraphics(CC) ||
@@ -783,44 +782,3 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
return !F.hasFnAttribute("amdgpu-no-agpr");
}
-
-bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
- if (UsesAGPRs)
- return *UsesAGPRs;
-
- if (!mayNeedAGPRs()) {
- UsesAGPRs = false;
- return false;
- }
-
- if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
- MF.getFrameInfo().hasCalls()) {
- UsesAGPRs = true;
- return true;
- }
-
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- const Register Reg = Register::index2VirtReg(I);
- const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
- if (RC && SIRegisterInfo::isAGPRClass(RC)) {
- UsesAGPRs = true;
- return true;
- }
- if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
- // Defer caching UsesAGPRs, function might not yet been regbank selected.
- return true;
- }
- }
-
- for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
- if (MRI.isPhysRegUsed(Reg)) {
- UsesAGPRs = true;
- return true;
- }
- }
-
- UsesAGPRs = false;
- return false;
-}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 2e2716f1ce888..740f752bc93b7 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -494,8 +494,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// scheduler stage.
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
- mutable std::optional<bool> UsesAGPRs;
-
MCPhysReg getNextUserSGPR() const;
MCPhysReg getNextSystemSGPR() const;
@@ -1126,9 +1124,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// has a call which may use it.
bool mayUseAGPRs(const Function &F) const;
- // \returns true if a function needs or may need AGPRs.
- bool usesAGPRs(const MachineFunction &MF) const;
-
/// \returns Default/requested number of work groups for this function.
SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 71c720ed09b5f..924aa45559366 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -585,7 +585,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
// register file accordingly.
if (ST.hasGFX90AInsts()) {
- if (MFI->usesAGPRs(MF)) {
+ if (MFI->mayNeedAGPRs()) {
MaxNumVGPRs /= 2;
MaxNumAGPRs = MaxNumVGPRs;
} else {
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll
new file mode 100644
index 0000000000000..feae79b377174
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers-assertion-after-ra-failure.xfail.ll
@@ -0,0 +1,23 @@
+; REQUIRES: asserts
+; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CRASH %s
+
+; CRASH: error: <unknown>:0:0: no registers from class available to allocate in function 'no_free_vgprs_at_agpr_to_agpr_copy'
+; CRASH: Assertion failed: (valid() && "Cannot access invalid iterator")
+
+define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
+ %asm = call { <32 x i32>, <16 x float> } asm sideeffect "; def $0 $1", "=${v[0:31]},=${a[0:15]}"()
+ %vgpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 0
+ %agpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 1
+ %mfma = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr0, i32 0, i32 0, i32 0)
+ %agpr1 = call i32 asm sideeffect "; copy ", "={a1},a,~{a[0:15]}"(<16 x float> %agpr0)
+ %agpr2 = call i32 asm sideeffect "; copy ", "={a2},a,{a[0:15]}"(i32 %agpr1, <16 x float> %mfma)
+ call void asm sideeffect "; use $0 $1", "{a3},{v[0:31]}"(i32 %agpr2, <32 x i32> %vgpr0)
+ ret void
+}
+
+declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
+declare noundef i32 @llvm.amdgcn.workitem.id.x() #2
+
+attributes #0 = { "amdgpu-no-agpr" "amdgpu-waves-per-eu"="6,6" }
+attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 4ce46bbaf45ac..d1b01eeee11a4 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -240,7 +240,7 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
}
; Check that we do make use of v32 if there are no AGPRs present in the function
-define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #5 {
; GFX908-LABEL: no_agpr_no_reserve:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
@@ -1144,5 +1144,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2
attributes #0 = { "amdgpu-waves-per-eu"="6,6" }
attributes #1 = { convergent nounwind readnone willreturn }
attributes #2 = { nounwind readnone willreturn }
-attributes #3 = { "amdgpu-waves-per-eu"="7,7" }
+attributes #3 = { "amdgpu-waves-per-eu"="7,7" "amdgpu-no-agpr" }
attributes #4 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-flat-work-group-size"="1024,1024" }
+attributes #5 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-no-agpr" }
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index 322686b0144a0..f6f78f134fc1f 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -94,9 +94,20 @@ bb3:
ret void
}
-; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry:
+; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_noagpr:
+; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
+; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
+define void @test_mfma_f32_32x32x1f32_nonentry_noagpr(ptr addrspace(1) %arg) #0 {
+bb:
+ %in.1 = load <32 x float>, ptr addrspace(1) %arg
+ %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
+ store <32 x float> %mai.1, ptr addrspace(1) %arg
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_with_agpr:
; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define void @test_mfma_f32_32x32x1f32_nonentry(ptr addrspace(1) %arg) #0 {
+define void @test_mfma_f32_32x32x1f32_nonentry_with_agpr(ptr addrspace(1) %arg) #3 {
bb:
%in.1 = load <32 x float>, ptr addrspace(1) %arg
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
@@ -109,3 +120,4 @@ declare void @foo()
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-no-agpr" }
attributes #1 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
attributes #2 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-agpr" }
+attributes #3 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
index ed57caadea5c5..583d1f9e4a9fd 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
@@ -6,7 +6,7 @@
ret void
}
- attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
+ attributes #0 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-no-agpr" }
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
index 040799435db4a..03978e68e81b4 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
@@ -518,6 +518,522 @@ define internal void @use256vgprs() {
ret void
}
+define internal void @use256vgprs_no_agpr() "amdgpu-no-agpr" {
+ %v0 = call i32 asm sideeffect "; def $0", "=v"()
+ %v1 = call i32 asm sideeffect "; def $0", "=v"()
+ %v2 = call i32 asm sideeffect "; def $0", "=v"()
+ %v3 = call i32 asm sideeffect "; def $0", "=v"()
+ %v4 = call i32 asm sideeffect "; def $0", "=v"()
+ %v5 = call i32 asm sideeffect "; def $0", "=v"()
+ %v6 = call i32 asm sideeffect "; def $0", "=v"()
+ %v7 = call i32 asm sideeffect "; def $0", "=v"()
+ %v8 = call i32 asm sideeffect "; def $0", "=v"()
+ %v9 = call i32 asm sideeffect "; def $0", "=v"()
+ %v10 = call i32 asm sideeffect "; def $0", "=v"()
+ %v11 = call i32 asm sideeffect "; def $0", "=v"()
+ %v12 = call i32 asm sideeffect "; def $0", "=v"()
+ %v13 = call i32 asm sideeffect "; def $0", "=v"()
+ %v14 = call i32 asm sideeffect "; def $0", "=v"()
+ %v15 = call i32 asm sideeffect "; def $0", "=v"()
+ %v16 = call i32 asm sideeffect "; def $0", "=v"()
+ %v17 = call i32 asm sideeffect "; def $0", "=v"()
+ %v18 = call i32 asm sideeffect "; def $0", "=v"()
+ %v19 = call i32 asm sideeffect "; def $0", "=v"()
+ %v20 = call i32 asm sideeffect "; def $0", "=v"()
+ %v21 = call i32 asm sideeffect "; def $0", "=v"()
+ %v22 = call i32 asm sideeffect "; def $0", "=v"()
+ %v23 = call i32 asm sideeffect "; def $0", "=v"()
+ %v24 = call i32 asm sideeffect "; def $0", "=v"()
+ %v25 = call i32 asm sideeffect "; def $0", "=v"()
+ %v26 = call i32 asm sideeffect "; def $0", "=v"()
+ %v27 = call i32 asm sideeffect "; def $0", "=v"()
+ %v28 = call i32 asm sideeffect "; def $0", "=v"()
+ %v29 = call i32 asm sideeffect "; def $0", "=v"()
+ %v30 = call i32 asm sideeffect "; def $0", "=v"()
+ %v31 = call i32 asm sideeffect "; def $0", "=v"()
+ %v32 = call i32 asm sideeffect "; def $0", "=v"()
+ %v33 = call i32 asm sideeffect "; def $0", "=v"()
+ %v34 = call i32 asm sideeffect "; def $0", "=v"()
+ %v35 = call i32 asm sideeffect "; def $0", "=v"()
+ %v36 = call i32 asm sideeffect "; def $0", "=v"()
+ %v37 = call i32 asm sideeffect "; def $0", "=v"()
+ %v38 = call i32 asm sideeffect "; def $0", "=v"()
+ %v39 = call i32 asm sideeffect "; def $0", "=v"()
+ %v40 = call i32 asm sideeffect "; def $0", "=v"()
+ %v41 = call i32 asm sideeffect "; def $0", "=v"()
+ %v42 = call i32 asm sideeffect "; def $0", "=v"()
+ %v43 = call i32 asm sideeffect "; def $0", "=v"()
+ %v44 = call i32 asm sideeffect "; def $0", "=v"()
+ %v45 = call i32 asm sideeffect "; def $0", "=v"()
+ %v46 = call i32 asm sideeffect "; def $0", "=v"()
+ %v47 = call i32 asm sideeffect "; def $0", "=v"()
+ %v48 = call i32 asm sideeffect "; def $0", "=v"()
+ %v49 = call i32 asm sideeffect "; def $0", "=v"()
+ %v50 = call i32 asm sideeffect "; def $0", "=v"()
+ %v51 = call i32 asm sideeffect "; def $0", "=v"()
+ %v52 = call i32 asm sideeffect "; def $0", "=v"()
+ %v53 = call i32 asm sideeffect "; def $0", "=v"()
+ %v54 = call i32 asm sideeffect "; def $0", "=v"()
+ %v55 = call i32 asm sideeffect "; def $0", "=v"()
+ %v56 = call i32 asm sideeffect "; def $0", "=v"()
+ %v57 = call i32 asm sideeffect "; def $0", "=v"()
+ %v58 = call i32 asm sideeffect "; def $0", "=v"()
+ %v59 = call i32 asm sideeffect "; def $0", "=v"()
+ %v60 = call i32 asm sideeffect "; def $0", "=v"()
+ %v61 = call i32 asm sideeffect "; def $0", "=v"()
+ %v62 = call i32 asm sideeffect "; def $0", "=v"()
+ %v63 = call i32 asm sideeffect "; def $0", "=v"()
+ %v64 = call i32 asm sideeffect "; def $0", "=v"()
+ %v65 = call i32 asm sideeffect "; def $0", "=v"()
+ %v66 = call i32 asm sideeffect "; def $0", "=v"()
+ %v67 = call i32 asm sideeffect "; def $0", "=v"()
+ %v68 = call i32 asm sideeffect "; def $0", "=v"()
+ %v69 = call i32 asm sideeffect "; def $0", "=v"()
+ %v70 = call i32 asm sideeffect "; def $0", "=v"()
+ %v71 = call i32 asm sideeffect "; def $0", "=v"()
+ %v72 = call i32 asm sideeffect "; def $0", "=v"()
+ %v73 = call i32 asm sideeffect "; def $0", "=v"()
+ %v74 = call i32 asm sideeffect "; def $0", "=v"()
+ %v75 = call i32 asm sideeffect "; def $0", "=v"()
+ %v76 = call i32 asm sideeffect "; def $0", "=v"()
+ %v77 = call i32 asm sideeffect "; def $0", "=v"()
+ %v78 = call i32 asm sideeffect "; def $0", "=v"()
+ %v79 = call i32 asm sideeffect "; def $0", "=v"()
+ %v80 = call i32 asm sideeffect "; def $0", "=v"()
+ %v81 = call i32 asm sideeffect "; def $0", "=v"()
+ %v82 = call i32 asm sideeffect "; def $0", "=v"()
+ %v83 = call i32 asm sideeffect "; def $0", "=v"()
+ %v84 = call i32 asm sideeffect "; def $0", "=v"()
+ %v85 = call i32 asm sideeffect "; def $0", "=v"()
+ %v86 = call i32 asm sideeffect "; def $0", "=v"()
+ %v87 = call i32 asm sideeffect "; def $0", "=v"()
+ %v88 = call i32 asm sideeffect "; def $0", "=v"()
+ %v89 = call i32 asm sideeffect "; def $0", "=v"()
+ %v90 = call i32 asm sideeffect "; def $0", "=v"()
+ %v91 = call i32 asm sideeffect "; def $0", "=v"()
+ %v92 = call i32 asm sideeffect "; def $0", "=v"()
+ %v93 = call i32 asm sideeffect "; def $0", "=v"()
+ %v94 = call i32 asm sideeffect "; def $0", "=v"()
+ %v95 = call i32 asm sideeffect "; def $0", "=v"()
+ %v96 = call i32 asm sideeffect "; def $0", "=v"()
+ %v97 = call i32 asm sideeffect "; def $0", "=v"()
+ %v98 = call i32 asm sideeffect "; def $0", "=v"()
+ %v99 = call i32 asm sideeffect "; def $0", "=v"()
+ %v100 = call i32 asm sideeffect "; def $0", "=v"()
+ %v101 = call i32 asm sideeffect "; def $0", "=v"()
+ %v102 = call i32 asm sideeffect "; def $0", "=v"()
+ %v103 = call i32 asm sideeffect "; def $0", "=v"()
+ %v104 = call i32 asm sideeffect "; def $0", "=v"()
+ %v105 = call i32 asm sideeffect "; def $0", "=v"()
+ %v106 = call i32 asm sideeffect "; def $0", "=v"()
+ %v107 = call i32 asm sideeffect "; def $0", "=v"()
+ %v108 = call i32 asm sideeffect "; def $0", "=v"()
+ %v109 = call i32 asm sideeffect "; def $0", "=v"()
+ %v110 = call i32 asm sideeffect "; def $0", "=v"()
+ %v111 = call i32 asm sideeffect "; def $0", "=v"()
+ %v112 = call i32 asm sideeffect "; def $0", "=v"()
+ %v113 = call i32 asm sideeffect "; def $0", "=v"()
+ %v114 = call i32 asm sideeffect "; def $0", "=v"()
+ %v115 = call i32 asm sideeffect "; def $0", "=v"()
+ %v116 = call i32 asm sideeffect "; def $0", "=v"()
+ %v117 = call i32 asm sideeffect "; def $0", "=v"()
+ %v118 = call i32 asm sideeffect "; def $0", "=v"()
+ %v119 = call i32 asm sideeffect "; def $0", "=v"()
+ %v120 = call i32 asm sideeffect "; def $0", "=v"()
+ %v121 = call i32 asm sideeffect "; def $0", "=v"()
+ %v122 = call i32 asm sideeffect "; def $0", "=v"()
+ %v123 = call i32 asm sideeffect "; def $0", "=v"()
+ %v124 = call i32 asm sideeffect "; def $0", "=v"()
+ %v125 = call i32 asm sideeffect "; def $0", "=v"()
+ %v126 = call i32 asm sideeffect "; def $0", "=v"()
+ %v127 = call i32 asm sideeffect "; def $0", "=v"()
+ %v128 = call i32 asm sideeffect "; def $0", "=v"()
+ %v129 = call i32 asm sideeffect "; def $0", "=v"()
+ %v130 = call i32 asm sideeffect "; def $0", "=v"()
+ %v131 = call i32 asm sideeffect "; def $0", "=v"()
+ %v132 = call i32 asm sideeffect "; def $0", "=v"()
+ %v133 = call i32 asm sideeffect "; def $0", "=v"()
+ %v134 = call i32 asm sideeffect "; def $0", "=v"()
+ %v135 = call i32 asm sideeffect "; def $0", "=v"()
+ %v136 = call i32 asm sideeffect "; def $0", "=v"()
+ %v137 = call i32 asm sideeffect "; def $0", "=v"()
+ %v138 = call i32 asm sideeffect "; def $0", "=v"()
+ %v139 = call i32 asm sideeffect "; def $0", "=v"()
+ %v140 = call i32 asm sideeffect "; def $0", "=v"()
+ %v141 = call i32 asm sideeffect "; def $0", "=v"()
+ %v142 = call i32 asm sideeffect "; def $0", "=v"()
+ %v143 = call i32 asm sideeffect "; def $0", "=v"()
+ %v144 = call i32 asm sideeffect "; def $0", "=v"()
+ %v145 = call i32 asm sideeffect "; def $0", "=v"()
+ %v146 = call i32 asm sideeffect "; def $0", "=v"()
+ %v147 = call i32 asm sideeffect "; def $0", "=v"()
+ %v148 = call i32 asm sideeffect "; def $0", "=v"()
+ %v149 = call i32 asm sideeffect "; def $0", "=v"()
+ %v150 = call i32 asm sideeffect "; def $0", "=v"()
+ %v151 = call i32 asm sideeffect "; def $0", "=v"()
+ %v152 = call i32 asm sideeffect "; def $0", "=v"()
+ %v153 = call i32 asm sideeffect "; def $0", "=v"()
+ %v154 = call i32 asm sideeffect "; def $0", "=v"()
+ %v155 = call i32 asm sideeffect "; def $0", "=v"()
+ %v156 = call i32 asm sideeffect "; def $0", "=v"()
+ %v157 = call i32 asm sideeffect "; def $0", "=v"()
+ %v158 = call i32 asm sideeffect "; def $0", "=v"()
+ %v159 = call i32 asm sideeffect "; def $0", "=v"()
+ %v160 = call i32 asm sideeffect "; def $0", "=v"()
+ %v161 = call i32 asm sideeffect "; def $0", "=v"()
+ %v162 = call i32 asm sideeffect "; def $0", "=v"()
+ %v163 = call i32 asm sideeffect "; def $0", "=v"()
+ %v164 = call i32 asm sideeffect "; def $0", "=v"()
+ %v165 = call i32 asm sideeffect "; def $0", "=v"()
+ %v166 = call i32 asm sideeffect "; def $0", "=v"()
+ %v167 = call i32 asm sideeffect "; def $0", "=v"()
+ %v168 = call i32 asm sideeffect "; def $0", "=v"()
+ %v169 = call i32 asm sideeffect "; def $0", "=v"()
+ %v170 = call i32 asm sideeffect "; def $0", "=v"()
+ %v171 = call i32 asm sideeffect "; def $0", "=v"()
+ %v172 = call i32 asm sideeffect "; def $0", "=v"()
+ %v173 = call i32 asm sideeffect "; def $0", "=v"()
+ %v174 = call i32 asm sideeffect "; def $0", "=v"()
+ %v175 = call i32 asm sideeffect "; def $0", "=v"()
+ %v176 = call i32 asm sideeffect "; def $0", "=v"()
+ %v177 = call i32 asm sideeffect "; def $0", "=v"()
+ %v178 = call i32 asm sideeffect "; def $0", "=v"()
+ %v179 = call i32 asm sideeffect "; def $0", "=v"()
+ %v180 = call i32 asm sideeffect "; def $0", "=v"()
+ %v181 = call i32 asm sideeffect "; def $0", "=v"()
+ %v182 = call i32 asm sideeffect "; def $0", "=v"()
+ %v183 = call i32 asm sideeffect "; def $0", "=v"()
+ %v184 = call i32 asm sideeffect "; def $0", "=v"()
+ %v185 = call i32 asm sideeffect "; def $0", "=v"()
+ %v186 = call i32 asm sideeffect "; def $0", "=v"()
+ %v187 = call i32 asm sideeffect "; def $0", "=v"()
+ %v188 = call i32 asm sideeffect "; def $0", "=v"()
+ %v189 = call i32 asm sideeffect "; def $0", "=v"()
+ %v190 = call i32 asm sideeffect "; def $0", "=v"()
+ %v191 = call i32 asm sideeffect "; def $0", "=v"()
+ %v192 = call i32 asm sideeffect "; def $0", "=v"()
+ %v193 = call i32 asm sideeffect "; def $0", "=v"()
+ %v194 = call i32 asm sideeffect "; def $0", "=v"()
+ %v195 = call i32 asm sideeffect "; def $0", "=v"()
+ %v196 = call i...
[truncated]
|
Remove the MIR scan to detect whether AGPRs are used or not, and the special case for callable functions. This behavior was confusing, and not overridable. The amdgpu-no-agpr attribute was intended to avoid this imprecise heuristic for how many AGPRs to allocate. It was also too confusing to make this interact with the pending amdgpu-num-agpr replacement for amdgpu-no-agpr. Also adds an xfail-ish test where the register allocator asserts after allocation fails which I ran into. Future work should reintroduce a more refined MIR scan to estimate AGPR pressure for how to split AGPRs and VGPRs.
167fd5a
to
2ba691b
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/81/builds/5029 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/186/builds/6778 Here is the relevant piece of the build log for the reference
|
Remove the MIR scan to detect whether AGPRs are used or not,
and the special case for callable functions. This behavior was
confusing, and not overridable. The amdgpu-no-agpr attribute was
intended to avoid this imprecise heuristic for how many AGPRs to
allocate. It was also too confusing to make this interact with
the pending amdgpu-num-agpr replacement for amdgpu-no-agpr.
Also adds an xfail-ish test where the register allocator asserts
after allocation fails which I ran into.
Future work should reintroduce a more refined MIR scan to estimate
AGPR pressure for how to split AGPRs and VGPRs.