Skip to content

Commit 7dcb9c0

Browse files
committed
InlineSpiller: Consider copy bundles when looking for snippet copies
This was looking for full copies produced by SplitKit, but SplitKit introduces copy bundles if not all lanes are live. The scan for uses needs to look at bundles, not individual instructions. This is a prerequisite to avoiding some redundant spills due to subregisters which will help avoid an allocation failure in a future patch.
1 parent 29ce367 commit 7dcb9c0

16 files changed

+451
-253
lines changed

llvm/include/llvm/CodeGen/MachineInstrBundle.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,13 @@ VirtRegInfo AnalyzeVirtRegInBundle(
241241
MachineInstr &MI, Register Reg,
242242
SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops = nullptr);
243243

244+
/// Return a pair of lane masks (reads, writes) indicating which lanes this
245+
/// instruction uses with Reg.
246+
std::pair<LaneBitmask, LaneBitmask>
247+
AnalyzeVirtRegLanesInBundle(const MachineInstr &MI, Register Reg,
248+
const MachineRegisterInfo &MRI,
249+
const TargetRegisterInfo &TRI);
250+
244251
/// Information about how a physical register Reg is used by a set of
245252
/// operands.
246253
struct PhysRegInfo {

llvm/lib/CodeGen/InlineSpiller.cpp

Lines changed: 69 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ class InlineSpiller : public Spiller {
175175

176176
// All COPY instructions to/from snippets.
177177
// They are ignored since both operands refer to the same stack slot.
178+
// For bundled copies, this will only include the first header copy.
178179
SmallPtrSet<MachineInstr*, 8> SnippetCopies;
179180

180181
// Values that failed to remat at some point.
@@ -255,15 +256,58 @@ Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass,
255256
// This minimizes register pressure and maximizes the store-to-load distance for
256257
// spill slots which can be important in tight loops.
257258

258-
/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
259-
/// otherwise return 0.
260-
static Register isFullCopyOf(const MachineInstr &MI, Register Reg) {
261-
if (!MI.isFullCopy())
259+
/// If MI is a COPY to or from Reg, return the other register, otherwise return
260+
/// 0.
261+
static Register isCopyOf(const MachineInstr &MI, Register Reg) {
262+
assert(!MI.isBundled());
263+
if (!MI.isCopy())
262264
return Register();
263-
if (MI.getOperand(0).getReg() == Reg)
264-
return MI.getOperand(1).getReg();
265-
if (MI.getOperand(1).getReg() == Reg)
266-
return MI.getOperand(0).getReg();
265+
266+
const MachineOperand &DstOp = MI.getOperand(0);
267+
const MachineOperand &SrcOp = MI.getOperand(1);
268+
269+
// TODO: Probably only worth allowing subreg copies with undef dests.
270+
if (DstOp.getSubReg() != SrcOp.getSubReg())
271+
return Register();
272+
if (DstOp.getReg() == Reg)
273+
return SrcOp.getReg();
274+
if (SrcOp.getReg() == Reg)
275+
return DstOp.getReg();
276+
return Register();
277+
}
278+
279+
/// Check for a copy bundle as formed by SplitKit.
280+
static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg) {
281+
if (!FirstMI.isBundled())
282+
return isCopyOf(FirstMI, Reg);
283+
284+
assert(!FirstMI.isBundledWithPred() && FirstMI.isBundledWithSucc() &&
285+
"expected to see first instruction in bundle");
286+
287+
Register SnipReg;
288+
MachineBasicBlock::const_instr_iterator I = FirstMI.getIterator();
289+
while (I->isBundledWithSucc()) {
290+
const MachineInstr &MI = *I;
291+
if (!MI.isCopy())
292+
return Register();
293+
294+
const MachineOperand &DstOp = MI.getOperand(0);
295+
const MachineOperand &SrcOp = MI.getOperand(1);
296+
if (DstOp.getReg() == Reg) {
297+
if (!SnipReg)
298+
SnipReg = SrcOp.getReg();
299+
else if (SnipReg != SrcOp.getReg())
300+
return Register();
301+
} else if (SrcOp.getReg() == Reg) {
302+
if (!SnipReg)
303+
SnipReg = DstOp.getReg();
304+
else if (SnipReg != DstOp.getReg())
305+
return Register();
306+
}
307+
308+
++I;
309+
}
310+
267311
return Register();
268312
}
269313

@@ -307,14 +351,14 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
307351
MachineInstr *UseMI = nullptr;
308352

309353
// Check that all uses satisfy our criteria.
310-
for (MachineRegisterInfo::reg_instr_nodbg_iterator
311-
RI = MRI.reg_instr_nodbg_begin(SnipLI.reg()),
312-
E = MRI.reg_instr_nodbg_end();
354+
for (MachineRegisterInfo::reg_bundle_nodbg_iterator
355+
RI = MRI.reg_bundle_nodbg_begin(SnipLI.reg()),
356+
E = MRI.reg_bundle_nodbg_end();
313357
RI != E;) {
314358
MachineInstr &MI = *RI++;
315359

316360
// Allow copies to/from Reg.
317-
if (isFullCopyOf(MI, Reg))
361+
if (isCopyOfBundle(MI, Reg))
318362
continue;
319363

320364
// Allow stack slot loads.
@@ -351,9 +395,8 @@ void InlineSpiller::collectRegsToSpill() {
351395
if (Original == Reg)
352396
return;
353397

354-
for (MachineInstr &MI :
355-
llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
356-
Register SnipReg = isFullCopyOf(MI, Reg);
398+
for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
399+
Register SnipReg = isCopyOfBundle(MI, Reg);
357400
if (!isSibling(SnipReg))
358401
continue;
359402
LiveInterval &SnipLI = LIS.getInterval(SnipReg);
@@ -475,21 +518,22 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
475518

476519
// Find all spills and copies of VNI.
477520
for (MachineInstr &MI :
478-
llvm::make_early_inc_range(MRI.use_nodbg_instructions(Reg))) {
521+
llvm::make_early_inc_range(MRI.use_nodbg_bundles(Reg))) {
479522
if (!MI.isCopy() && !MI.mayStore())
480523
continue;
481524
SlotIndex Idx = LIS.getInstructionIndex(MI);
482525
if (LI->getVNInfoAt(Idx) != VNI)
483526
continue;
484527

485528
// Follow sibling copies down the dominator tree.
486-
if (Register DstReg = isFullCopyOf(MI, Reg)) {
529+
if (Register DstReg = isCopyOfBundle(MI, Reg)) {
487530
if (isSibling(DstReg)) {
488-
LiveInterval &DstLI = LIS.getInterval(DstReg);
489-
VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
490-
assert(DstVNI && "Missing defined value");
491-
assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
492-
WorkList.push_back(std::make_pair(&DstLI, DstVNI));
531+
LiveInterval &DstLI = LIS.getInterval(DstReg);
532+
VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
533+
assert(DstVNI && "Missing defined value");
534+
assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
535+
536+
WorkList.push_back(std::make_pair(&DstLI, DstVNI));
493537
}
494538
continue;
495539
}
@@ -1111,7 +1155,7 @@ void InlineSpiller::spillAroundUses(Register Reg) {
11111155
Idx = VNI->def;
11121156

11131157
// Check for a sibling copy.
1114-
Register SibReg = isFullCopyOf(MI, Reg);
1158+
Register SibReg = isCopyOfBundle(MI, Reg);
11151159
if (SibReg && isSibling(SibReg)) {
11161160
// This may actually be a copy between snippets.
11171161
if (isRegToSpill(SibReg)) {
@@ -1202,8 +1246,8 @@ void InlineSpiller::spillAll() {
12021246
llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
12031247
assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy");
12041248
// FIXME: Do this with a LiveRangeEdit callback.
1205-
LIS.RemoveMachineInstrFromMaps(MI);
1206-
MI.eraseFromParent();
1249+
LIS.getSlotIndexes()->removeSingleMachineInstrFromMaps(MI);
1250+
MI.eraseFromBundle();
12071251
}
12081252
}
12091253

llvm/lib/CodeGen/LiveRangeEdit.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,12 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
286286

287287
// Never delete a bundled instruction.
288288
if (MI->isBundled()) {
289+
// TODO: Handle deleting copy bundles
290+
LLVM_DEBUG(dbgs() << "Won't delete dead bundled inst: " << Idx << '\t'
291+
<< *MI);
289292
return;
290293
}
294+
291295
// Never delete inline asm.
292296
if (MI->isInlineAsm()) {
293297
LLVM_DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);

llvm/lib/CodeGen/MachineInstrBundle.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,34 @@ VirtRegInfo llvm::AnalyzeVirtRegInBundle(
307307
return RI;
308308
}
309309

310+
std::pair<LaneBitmask, LaneBitmask>
311+
llvm::AnalyzeVirtRegLanesInBundle(const MachineInstr &MI, Register Reg,
312+
const MachineRegisterInfo &MRI,
313+
const TargetRegisterInfo &TRI) {
314+
315+
LaneBitmask UseMask, DefMask;
316+
317+
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
318+
const MachineOperand &MO = *O;
319+
if (!MO.isReg() || MO.getReg() != Reg)
320+
continue;
321+
322+
unsigned SubReg = MO.getSubReg();
323+
if (SubReg == 0 && MO.isUse() && !MO.isUndef())
324+
UseMask |= MRI.getMaxLaneMaskForVReg(Reg);
325+
326+
LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
327+
if (MO.isDef()) {
328+
if (!MO.isUndef())
329+
UseMask |= ~SubRegMask;
330+
DefMask |= SubRegMask;
331+
} else if (!MO.isUndef())
332+
UseMask |= SubRegMask;
333+
}
334+
335+
return {UseMask, DefMask};
336+
}
337+
310338
PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg,
311339
const TargetRegisterInfo *TRI) {
312340
bool AllDefsDead = true;

llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ body: |
2626
; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
2727
; CHECK-NEXT: S_NOP 0, implicit %9.sub1
2828
; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
29-
; CHECK-NEXT: undef %11.sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0
30-
; CHECK-NEXT: S_NOP 0, implicit %11.sub0
29+
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V64_RESTORE]].sub0
3130
; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1
3231
; CHECK-NEXT: S_NOP 0, implicit %7.sub1
3332
; CHECK-NEXT: S_ENDPGM 0

llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -68,26 +68,24 @@ body: |
6868
; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101
6969
; CHECK-NEXT: {{ $}}
7070
; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
71-
; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr68
72-
; CHECK-NEXT: renamable $sgpr53 = COPY renamable $sgpr68
73-
; CHECK-NEXT: renamable $sgpr54 = COPY renamable $sgpr68
74-
; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr68
75-
; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr68
76-
; CHECK-NEXT: renamable $sgpr57 = COPY renamable $sgpr68
77-
; CHECK-NEXT: renamable $sgpr58 = COPY renamable $sgpr68
78-
; CHECK-NEXT: renamable $sgpr59 = COPY renamable $sgpr68
79-
; CHECK-NEXT: renamable $sgpr60 = COPY renamable $sgpr68
80-
; CHECK-NEXT: renamable $sgpr61 = COPY renamable $sgpr68
81-
; CHECK-NEXT: renamable $sgpr62 = COPY renamable $sgpr68
82-
; CHECK-NEXT: renamable $sgpr63 = COPY renamable $sgpr68
83-
; CHECK-NEXT: renamable $sgpr64 = COPY renamable $sgpr68
84-
; CHECK-NEXT: renamable $sgpr65 = COPY renamable $sgpr68
85-
; CHECK-NEXT: renamable $sgpr66 = COPY renamable $sgpr68
86-
; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr68
71+
; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr68
72+
; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr68
73+
; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr68
74+
; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr68
75+
; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr68
76+
; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr68
77+
; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr68
78+
; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr68
79+
; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr68
80+
; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr68
81+
; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr68
82+
; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr68
83+
; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr68
84+
; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr68
85+
; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr68
86+
; CHECK-NEXT: renamable $sgpr51 = COPY killed renamable $sgpr68
8787
; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
88-
; CHECK-NEXT: renamable $sgpr68 = COPY killed renamable $sgpr84
89-
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
90-
; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr68
88+
; CHECK-NEXT: renamable $sgpr52 = COPY killed renamable $sgpr84
9189
; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
9290
; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr72
9391
; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
@@ -158,15 +156,15 @@ body: |
158156
; CHECK-NEXT: successors: %bb.7(0x80000000)
159157
; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101
160158
; CHECK-NEXT: {{ $}}
161-
; CHECK-NEXT: dead %27:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr22_sgpr23, implicit $exec
159+
; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr22_sgpr23, implicit $exec
162160
; CHECK-NEXT: {{ $}}
163161
; CHECK-NEXT: bb.7:
164162
; CHECK-NEXT: successors: %bb.8(0x80000000)
165163
; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101
166164
; CHECK-NEXT: {{ $}}
167165
; CHECK-NEXT: renamable $sgpr90_sgpr91 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
168166
; CHECK-NEXT: renamable $sgpr92_sgpr93 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
169-
; CHECK-NEXT: dead %30:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr33, 11, implicit-def $m0, implicit $m0, implicit $exec
167+
; CHECK-NEXT: dead [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr33, 11, implicit-def $m0, implicit $m0, implicit $exec
170168
; CHECK-NEXT: {{ $}}
171169
; CHECK-NEXT: bb.8:
172170
; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.9(0x40000000)
@@ -182,40 +180,40 @@ body: |
182180
; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
183181
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr68_sgpr69, implicit $exec
184182
; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, [[COPY2]], undef renamable $sgpr4_sgpr5, 0, 0, implicit $exec :: (store (s64), addrspace 1)
185-
; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
186-
; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec
183+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
184+
; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec
187185
; CHECK-NEXT: renamable $sgpr64 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc
188186
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
189187
; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr34_sgpr35
190-
; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr6_sgpr7
191-
; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr52_sgpr53
188+
; CHECK-NEXT: renamable $sgpr42_sgpr43 = COPY killed renamable $sgpr6_sgpr7
189+
; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr42_sgpr43
192190
; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY killed renamable $sgpr10_sgpr11
193191
; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr38_sgpr39
194-
; CHECK-NEXT: renamable $sgpr42_sgpr43 = COPY killed renamable $sgpr12_sgpr13
192+
; CHECK-NEXT: renamable $sgpr44_sgpr45 = COPY killed renamable $sgpr12_sgpr13
195193
; CHECK-NEXT: $sgpr12 = COPY renamable $sgpr33
196194
; CHECK-NEXT: $sgpr13 = COPY renamable $sgpr15
197195
; CHECK-NEXT: renamable $sgpr36 = COPY killed renamable $sgpr16
198196
; CHECK-NEXT: renamable $sgpr37 = COPY killed renamable $sgpr15
199197
; CHECK-NEXT: renamable $sgpr40 = COPY killed renamable $sgpr8
200-
; CHECK-NEXT: renamable $sgpr44_sgpr45 = COPY killed renamable $sgpr18_sgpr19
201-
; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY killed renamable $sgpr20_sgpr21
202-
; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr22_sgpr23
203-
; CHECK-NEXT: renamable $sgpr50_sgpr51 = COPY killed renamable $sgpr24_sgpr25
198+
; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY killed renamable $sgpr18_sgpr19
199+
; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr20_sgpr21
200+
; CHECK-NEXT: renamable $sgpr50_sgpr51 = COPY killed renamable $sgpr22_sgpr23
201+
; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr24_sgpr25
204202
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13
205203
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
206204
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
207205
; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr64_sgpr65
208206
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9
209-
; CHECK-NEXT: renamable $sgpr24_sgpr25 = COPY killed renamable $sgpr50_sgpr51
210-
; CHECK-NEXT: renamable $sgpr22_sgpr23 = COPY killed renamable $sgpr48_sgpr49
211-
; CHECK-NEXT: renamable $sgpr20_sgpr21 = COPY killed renamable $sgpr46_sgpr47
212-
; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr44_sgpr45
213-
; CHECK-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr42_sgpr43
207+
; CHECK-NEXT: renamable $sgpr24_sgpr25 = COPY killed renamable $sgpr52_sgpr53
208+
; CHECK-NEXT: renamable $sgpr22_sgpr23 = COPY killed renamable $sgpr50_sgpr51
209+
; CHECK-NEXT: renamable $sgpr20_sgpr21 = COPY killed renamable $sgpr48_sgpr49
210+
; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr46_sgpr47
211+
; CHECK-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr44_sgpr45
212+
; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr42_sgpr43
214213
; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr40
215214
; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr38_sgpr39
216215
; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr37
217216
; CHECK-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr36
218-
; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr52_sgpr53
219217
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
220218
; CHECK-NEXT: $exec = S_MOV_B64_term renamable $sgpr92_sgpr93
221219
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec

0 commit comments

Comments
 (0)