Skip to content

Commit ff7f2d1

Browse files
committed
AMDGPU: Correct legal literal operand logic for multiple uses
The same literal can be used multiple times in an instruction, not just once. We were not tracking the used value to verify this, so correct this. This helps avoid regressions in a future patch.
1 parent c325d4b commit ff7f2d1

File tree

4 files changed

+23
-22
lines changed

4 files changed

+23
-22
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5918,11 +5918,16 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
59185918
if (!MO)
59195919
MO = &MI.getOperand(OpIdx);
59205920

5921+
const MachineOperand *UsedLiteral = nullptr;
5922+
59215923
int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
59225924
int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
59235925
if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
5924-
if (!MO->isReg() && !isInlineConstant(*MO, OpInfo) && !LiteralLimit--)
5925-
return false;
5926+
if (!MO->isReg() && !isInlineConstant(*MO, OpInfo)) {
5927+
UsedLiteral = MO;
5928+
if (!LiteralLimit--)
5929+
return false;
5930+
}
59265931

59275932
SmallDenseSet<RegSubRegPair> SGPRsUsed;
59285933
if (MO->isReg())
@@ -5943,6 +5948,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
59435948
}
59445949
} else if (AMDGPU::isSISrcOperand(InstDesc, i) &&
59455950
!isInlineConstant(Op, InstDesc.operands()[i])) {
5951+
// The same literal may be used multiple times.
5952+
if (!UsedLiteral)
5953+
UsedLiteral = &Op;
5954+
else if (UsedLiteral->isIdenticalTo(Op))
5955+
continue;
5956+
59465957
if (!LiteralLimit--)
59475958
return false;
59485959
if (--ConstantBusLimit <= 0)

llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,8 +2162,7 @@ body: |
21622162
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
21632163
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
21642164
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
2165-
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2166-
; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
2165+
; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
21672166
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
21682167
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
21692168
; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2178,8 +2177,7 @@ body: |
21782177
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
21792178
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
21802179
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
2181-
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2182-
; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
2180+
; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
21832181
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
21842182
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
21852183
; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2315,8 +2313,7 @@ body: |
23152313
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
23162314
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
23172315
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
2318-
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2319-
; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
2316+
; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
23202317
; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
23212318
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
23222319
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2332,8 +2329,7 @@ body: |
23322329
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
23332330
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
23342331
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
2335-
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2336-
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
2332+
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
23372333
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
23382334
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
23392335
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2469,8 +2465,7 @@ body: |
24692465
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
24702466
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
24712467
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
2472-
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2473-
; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
2468+
; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
24742469
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
24752470
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
24762471
; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2485,8 +2480,7 @@ body: |
24852480
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
24862481
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
24872482
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
2488-
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2489-
; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
2483+
; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
24902484
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
24912485
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
24922486
; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2622,8 +2616,7 @@ body: |
26222616
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
26232617
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
26242618
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
2625-
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2626-
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
2619+
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
26272620
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
26282621
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
26292622
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2639,8 +2632,7 @@ body: |
26392632
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
26402633
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
26412634
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
2642-
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2643-
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
2635+
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
26442636
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
26452637
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
26462638
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5

llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ body: |
3535
; CHECK: liveins: $vgpr0
3636
; CHECK-NEXT: {{ $}}
3737
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
38-
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1178657792, implicit $exec
39-
; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
38+
; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, 1178657792, 0, 0, implicit $mode, implicit $exec
4039
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
4140
%0:vgpr_32 = COPY $vgpr0
4241
%1:sreg_32 = S_MOV_B32 1178657792

llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ body: |
5555

5656
# GCN-LABEL: name: fma_sgpr_sgpr_use
5757
# GCN: %0:sgpr_32 = IMPLICIT_DEF
58-
# GCN-NEXT: %2:vgpr_32 = V_MOV_B32_e32 1234567, implicit $exec
59-
# GCN-NEXT: %3:vgpr_32 = V_FMAC_F32_e64 0, %0, 0, 1234567, 0, %2, 0, 0, implicit $mode, implicit $exec
58+
# GCN: %3:vgpr_32 = V_FMA_F32_e64 0, %0, 0, 1234567, 0, 1234567, 0, 0, implicit $mode, implicit $exec
6059
---
6160
name: fma_sgpr_sgpr_use
6261
body: |

0 commit comments

Comments
 (0)