Skip to content

Commit 12e4b8a

Browse files
committed
AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy
This was trying to hack around the intermediate VGPR requirement to copy to AGPRs on gfx908. We should still use a copy for all reg-to-reg cases. This should matter less these days, as we reserve a VGPR to handle it when required (and no end to end tests need updating). This was also an obstacle to handling this fold for input registers which are larger than 32-bits.
1 parent 3fe0c48 commit 12e4b8a

File tree

2 files changed

+11
-12
lines changed

2 files changed

+11
-12
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,9 +1573,8 @@ bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const {
15731573
Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
15741574
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def);
15751575
}
1576-
auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1577-
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp)
1578-
.addReg(Vgpr);
1576+
Register Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1577+
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Tmp).addReg(Vgpr);
15791578
B.addReg(Tmp);
15801579
}
15811580

llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -206,11 +206,11 @@ body: |
206206
; CHECK-LABEL: name: s_mov_b32_999_splat_sgpr_128_copy_vgpr_copy_agpr
207207
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 999
208208
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
209-
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
210-
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
211-
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
212-
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
213-
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
209+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]]
210+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]]
211+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]]
212+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]]
213+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
214214
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]]
215215
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
216216
%0:sgpr_32 = S_MOV_B32 999
@@ -232,10 +232,10 @@ body: |
232232
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
233233
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
234234
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
235-
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
236-
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
237-
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
238-
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
235+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]]
236+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]]
237+
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
238+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub3
239239
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]]
240240
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
241241
%0:sgpr_32 = S_MOV_B32 999

0 commit comments

Comments
 (0)