Skip to content

AMDGPU: Correct legal literal operand logic for multiple uses #127594

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5918,11 +5918,17 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
if (!MO)
MO = &MI.getOperand(OpIdx);

const MachineOperand *UsedLiteral = nullptr;

int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
if (!MO->isReg() && !isInlineConstant(*MO, OpInfo) && !LiteralLimit--)
return false;
if (!MO->isReg() && !isInlineConstant(*MO, OpInfo)) {
if (!LiteralLimit--)
return false;

UsedLiteral = MO;
}

SmallDenseSet<RegSubRegPair> SGPRsUsed;
if (MO->isReg())
Expand All @@ -5943,6 +5949,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
}
} else if (AMDGPU::isSISrcOperand(InstDesc, i) &&
!isInlineConstant(Op, InstDesc.operands()[i])) {
// The same literal may be used multiple times.
if (!UsedLiteral)
UsedLiteral = &Op;
else if (UsedLiteral->isIdenticalTo(Op))
continue;

if (!LiteralLimit--)
return false;
if (--ConstantBusLimit <= 0)
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
Original file line number Diff line number Diff line change
Expand Up @@ -2162,8 +2162,7 @@ body: |
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
Expand All @@ -2178,8 +2177,7 @@ body: |
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
Expand Down Expand Up @@ -2315,8 +2313,7 @@ body: |
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
Expand All @@ -2332,8 +2329,7 @@ body: |
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
Expand Down Expand Up @@ -2469,8 +2465,7 @@ body: |
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
Expand All @@ -2485,8 +2480,7 @@ body: |
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
Expand Down Expand Up @@ -2622,8 +2616,7 @@ body: |
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
Expand All @@ -2639,8 +2632,7 @@ body: |
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
Expand Down
66 changes: 66 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=si-fold-operands -o - %s | FileCheck %s

# The same literal may be used multiple times in different operands,
# as long as it is the same value.

---
name: fold_multiple_same_literal_use_0
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0

; CHECK-LABEL: name: fold_multiple_same_literal_use_0
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, 1178657792, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
%0:vgpr_32 = COPY $vgpr0
%1:sreg_32 = S_MOV_B32 1178657792
%2:vgpr_32 = COPY %1
%3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, %2, 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %3
...

---
name: fold_multiple_same_literal_use_1
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0

; CHECK-LABEL: name: fold_multiple_same_literal_use_1
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, 1178657792, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
%0:vgpr_32 = COPY $vgpr0
%1:sreg_32 = S_MOV_B32 1178657792
%2:vgpr_32 = COPY %1
%3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, %2, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %3
...

---
name: no_fold_multiple_same_literal_different_value
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0

; CHECK-LABEL: name: no_fold_multiple_same_literal_different_value
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1178657793, implicit $exec
; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
%0:vgpr_32 = COPY $vgpr0
%1:sreg_32 = S_MOV_B32 1178657793
%2:vgpr_32 = COPY %1
%3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, %2, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %3
...
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ body: |

# GCN-LABEL: name: fma_sgpr_sgpr_use
# GCN: %0:sgpr_32 = IMPLICIT_DEF
# GCN-NEXT: %2:vgpr_32 = V_MOV_B32_e32 1234567, implicit $exec
# GCN-NEXT: %3:vgpr_32 = V_FMAC_F32_e64 0, %0, 0, 1234567, 0, %2, 0, 0, implicit $mode, implicit $exec
# GCN: %3:vgpr_32 = V_FMA_F32_e64 0, %0, 0, 1234567, 0, 1234567, 0, 0, implicit $mode, implicit $exec
---
name: fma_sgpr_sgpr_use
body: |
Expand Down