Skip to content

AMDGPU: Respect amdgpu-no-agpr in functions and with calls #128147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 6 additions & 48 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
}

MayNeedAGPRs = ST.hasMAIInsts();
if (ST.hasGFX90AInsts() &&
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
!mayUseAGPRs(F))
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.

if (AMDGPU::isChainCC(CC)) {
// Chain functions don't receive an SP from their caller, but are free to
Expand Down Expand Up @@ -98,13 +102,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
ImplicitArgPtr = true;
} else {
ImplicitArgPtr = false;
MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
MaxKernArgAlign);

if (ST.hasGFX90AInsts() &&
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
!mayUseAGPRs(F))
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
MaxKernArgAlign =
std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign);
}

if (!AMDGPU::isGraphics(CC) ||
Expand Down Expand Up @@ -783,44 +782,3 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
return !F.hasFnAttribute("amdgpu-no-agpr");
}

bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
if (UsesAGPRs)
return *UsesAGPRs;

if (!mayNeedAGPRs()) {
UsesAGPRs = false;
return false;
}

if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
MF.getFrameInfo().hasCalls()) {
UsesAGPRs = true;
return true;
}

const MachineRegisterInfo &MRI = MF.getRegInfo();

for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
const Register Reg = Register::index2VirtReg(I);
const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
if (RC && SIRegisterInfo::isAGPRClass(RC)) {
UsesAGPRs = true;
return true;
}
if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
// Defer caching UsesAGPRs, function might not yet been regbank selected.
return true;
}
}

for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
if (MRI.isPhysRegUsed(Reg)) {
UsesAGPRs = true;
return true;
}
}

UsesAGPRs = false;
return false;
}
5 changes: 0 additions & 5 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// scheduler stage.
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;

mutable std::optional<bool> UsesAGPRs;

MCPhysReg getNextUserSGPR() const;

MCPhysReg getNextSystemSGPR() const;
Expand Down Expand Up @@ -1126,9 +1124,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// has a call which may use it.
bool mayUseAGPRs(const Function &F) const;

// \returns true if a function needs or may need AGPRs.
bool usesAGPRs(const MachineFunction &MF) const;

/// \returns Default/requested number of work groups for this function.
SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
// register file accordingly.
if (ST.hasGFX90AInsts()) {
if (MFI->usesAGPRs(MF)) {
if (MFI->mayNeedAGPRs()) {
MaxNumVGPRs /= 2;
MaxNumAGPRs = MaxNumVGPRs;
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
; REQUIRES: asserts
; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CRASH %s

; CRASH: error: <unknown>:0:0: no registers from class available to allocate in function 'no_free_vgprs_at_agpr_to_agpr_copy'
; CRASH: Cannot access invalid iterator

define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
%asm = call { <32 x i32>, <16 x float> } asm sideeffect "; def $0 $1", "=${v[0:31]},=${a[0:15]}"()
%vgpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 0
%agpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 1
%mfma = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr0, i32 0, i32 0, i32 0)
%agpr1 = call i32 asm sideeffect "; copy ", "={a1},a,~{a[0:15]}"(<16 x float> %agpr0)
%agpr2 = call i32 asm sideeffect "; copy ", "={a2},a,{a[0:15]}"(i32 %agpr1, <16 x float> %mfma)
call void asm sideeffect "; use $0 $1", "{a3},{v[0:31]}"(i32 %agpr2, <32 x i32> %vgpr0)
ret void
}

declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
declare noundef i32 @llvm.amdgcn.workitem.id.x() #2

attributes #0 = { "amdgpu-no-agpr" "amdgpu-waves-per-eu"="6,6" }
attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
}

; Check that we do make use of v32 if there are no AGPRs present in the function
define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #0 {
define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #5 {
; GFX908-LABEL: no_agpr_no_reserve:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
Expand Down Expand Up @@ -1144,5 +1144,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2
attributes #0 = { "amdgpu-waves-per-eu"="6,6" }
attributes #1 = { convergent nounwind readnone willreturn }
attributes #2 = { nounwind readnone willreturn }
attributes #3 = { "amdgpu-waves-per-eu"="7,7" }
attributes #3 = { "amdgpu-waves-per-eu"="7,7" "amdgpu-no-agpr" }
attributes #4 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-flat-work-group-size"="1024,1024" }
attributes #5 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-no-agpr" }
16 changes: 14 additions & 2 deletions llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,20 @@ bb3:
ret void
}

; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry:
; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_noagpr:
; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
define void @test_mfma_f32_32x32x1f32_nonentry_noagpr(ptr addrspace(1) %arg) #0 {
bb:
%in.1 = load <32 x float>, ptr addrspace(1) %arg
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
store <32 x float> %mai.1, ptr addrspace(1) %arg
ret void
}

; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_with_agpr:
; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
define void @test_mfma_f32_32x32x1f32_nonentry(ptr addrspace(1) %arg) #0 {
define void @test_mfma_f32_32x32x1f32_nonentry_with_agpr(ptr addrspace(1) %arg) #3 {
bb:
%in.1 = load <32 x float>, ptr addrspace(1) %arg
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
Expand All @@ -109,3 +120,4 @@ declare void @foo()
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-no-agpr" }
attributes #1 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
attributes #2 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-agpr" }
attributes #3 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
ret void
}

attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
attributes #0 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-no-agpr" }
...

---
Expand Down
Loading