Skip to content

Commit 3b83edb

Browse files
committed
Re-apply "[VirtRegRewriter] Avoid clobbering registers when expanding copy bundles"
This is r334750 (which was reverted in r334754) with a fix for an uninitialized variable that was caught by msan. Original commit message: > If a copy bundle happens to involve overlapping registers, we can end > up with emitting the copies in an order that ends up clobbering some > of the subregisters. Since instructions in the copy bundle > semantically happen at the same time, this is incorrect and we need to > make sure we order the copies such that this doesn't happen. llvm-svn: 334756
1 parent 8ec242f commit 3b83edb

File tree

3 files changed

+142
-7
lines changed

3 files changed

+142
-7
lines changed

llvm/lib/CodeGen/VirtRegMap.cpp

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -406,23 +406,62 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
406406
return;
407407

408408
if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
409+
SmallVector<MachineInstr *, 2> MIs({&MI});
410+
409411
// Only do this when the complete bundle is made out of COPYs.
410412
MachineBasicBlock &MBB = *MI.getParent();
411413
for (MachineBasicBlock::reverse_instr_iterator I =
412414
std::next(MI.getReverseIterator()), E = MBB.instr_rend();
413415
I != E && I->isBundledWithSucc(); ++I) {
414416
if (!I->isCopy())
415417
return;
418+
MIs.push_back(&*I);
419+
}
420+
MachineInstr *FirstMI = MIs.back();
421+
422+
auto anyRegsAlias = [](const MachineInstr *Dst,
423+
ArrayRef<MachineInstr *> Srcs,
424+
const TargetRegisterInfo *TRI) {
425+
for (const MachineInstr *Src : Srcs)
426+
if (Src != Dst)
427+
if (TRI->regsOverlap(Dst->getOperand(0).getReg(),
428+
Src->getOperand(1).getReg()))
429+
return true;
430+
return false;
431+
};
432+
433+
// If any of the destination registers in the bundle of copies alias any of
434+
// the source registers, try to schedule the instructions to avoid any
435+
// clobbering.
436+
for (int E = MIs.size(), PrevE = E; E > 1; PrevE = E) {
437+
for (int I = E; I--; )
438+
if (!anyRegsAlias(MIs[I], makeArrayRef(MIs).take_front(E), TRI)) {
439+
if (I + 1 != E)
440+
std::swap(MIs[I], MIs[E - 1]);
441+
--E;
442+
}
443+
if (PrevE == E) {
444+
MF->getFunction().getContext().emitError(
445+
"register rewriting failed: cycle in copy bundle");
446+
break;
447+
}
416448
}
417449

418-
for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator();
419-
I->isBundledWithPred(); ) {
420-
MachineInstr &MI = *I;
421-
++I;
450+
MachineInstr *BundleStart = FirstMI;
451+
for (MachineInstr *BundledMI : llvm::reverse(MIs)) {
452+
// If instruction is in the middle of the bundle, move it before the
453+
// bundle starts, otherwise, just unbundle it. When we get to the last
454+
// instruction, the bundle will have been completely undone.
455+
if (BundledMI != BundleStart) {
456+
BundledMI->removeFromBundle();
457+
MBB.insert(FirstMI, BundledMI);
458+
} else if (BundledMI->isBundledWithSucc()) {
459+
BundledMI->unbundleFromSucc();
460+
BundleStart = &*std::next(BundledMI->getIterator());
461+
}
422462

423-
MI.unbundleFromPred();
424-
if (Indexes)
425-
Indexes->insertMachineInstrInMaps(MI);
463+
if (Indexes && BundledMI != FirstMI)
464+
Indexes->insertMachineInstrInMaps(*BundledMI);
426465
}
427466
}
428467
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# RUN: not llc -mtriple=aarch64-apple-ios -run-pass=greedy -run-pass=virtregrewriter %s -o /dev/null 2>&1 | FileCheck %s
2+
3+
# Check we don't infinitely loop on cycles in copy bundles.
4+
# CHECK: error: register rewriting failed: cycle in copy bundle
5+
6+
---
7+
name: func0
8+
body: |
9+
bb.0:
10+
$x0 = IMPLICIT_DEF
11+
$q0_q1_q2_q3 = IMPLICIT_DEF
12+
$q1_q2_q3 = COPY $q0_q1_q2 {
13+
$q2_q3_q4 = COPY $q1_q2_q3
14+
}
15+
ST4i64 $q1_q2_q3_q4, 0, $x0
16+
...
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-apple-ios -run-pass=greedy -run-pass=virtregrewriter %s -o - | FileCheck %s
3+
---
4+
name: func0
5+
body: |
6+
bb.0:
7+
; Make sure we don't clobber q3 when we expand the bundle
8+
; CHECK-LABEL: name: func0
9+
; CHECK: $x0 = IMPLICIT_DEF
10+
; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF
11+
; CHECK: $q4 = COPY $q3
12+
; CHECK: $q1_q2_q3 = COPY $q0_q1_q2
13+
; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0
14+
$x0 = IMPLICIT_DEF
15+
$q0_q1_q2_q3 = IMPLICIT_DEF
16+
$q1_q2_q3 = COPY $q0_q1_q2 {
17+
$q4 = COPY $q3
18+
}
19+
ST4i64 $q1_q2_q3_q4, 0, $x0
20+
21+
...
22+
---
23+
name: func1
24+
body: |
25+
bb.0:
26+
; If it was already ordered, make sure we don't break it
27+
; CHECK-LABEL: name: func1
28+
; CHECK: $x0 = IMPLICIT_DEF
29+
; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF
30+
; CHECK: $q4 = COPY $q3
31+
; CHECK: $q1_q2_q3 = COPY $q0_q1_q2
32+
; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0
33+
$x0 = IMPLICIT_DEF
34+
$q0_q1_q2_q3 = IMPLICIT_DEF
35+
$q4 = COPY $q3 {
36+
$q1_q2_q3 = COPY $q0_q1_q2
37+
}
38+
ST4i64 $q1_q2_q3_q4, 0, $x0
39+
40+
...
41+
---
42+
name: func2
43+
body: |
44+
bb.0:
45+
; A bit less realistic, but check that we handle multiple nodes
46+
; CHECK-LABEL: name: func2
47+
; CHECK: $x0 = IMPLICIT_DEF
48+
; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF
49+
; CHECK: $q3 = COPY $q2
50+
; CHECK: $q4 = COPY $q1
51+
; CHECK: $q1_q2 = COPY $q0_q1
52+
; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0
53+
$x0 = IMPLICIT_DEF
54+
$q0_q1_q2_q3 = IMPLICIT_DEF
55+
$q1_q2 = COPY $q0_q1 {
56+
$q3 = COPY $q2
57+
$q4 = COPY $q1
58+
}
59+
ST4i64 $q1_q2_q3_q4, 0, $x0
60+
61+
...
62+
---
63+
name: func3
64+
body: |
65+
bb.0:
66+
; If there was nothing wrong, don't change the order for no reason
67+
; CHECK-LABEL: name: func3
68+
; CHECK: $x0 = IMPLICIT_DEF
69+
; CHECK: $q1_q2_q3_q4 = IMPLICIT_DEF
70+
; CHECK: $q0_q1 = COPY $q1_q2
71+
; CHECK: $q2_q3 = COPY $q3_q4
72+
; CHECK: ST4i64 $q0_q1_q2_q3, 0, $x0
73+
$x0 = IMPLICIT_DEF
74+
$q1_q2_q3_q4 = IMPLICIT_DEF
75+
$q0_q1 = COPY $q1_q2 {
76+
$q2_q3 = COPY $q3_q4
77+
}
78+
ST4i64 $q0_q1_q2_q3, 0, $x0
79+
80+
...

0 commit comments

Comments
 (0)