Skip to content

[PHIElimination] Reuse existing COPY in predecessor basic block (Take Two) #146806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions llvm/lib/CodeGen/PHIElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "llvm/CodeGen/PHIElimination.h"
#include "PHIEliminationUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
Expand Down Expand Up @@ -541,6 +542,7 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
// Now loop over all of the incoming arguments, changing them to copy into the
// IncomingReg register in the corresponding predecessor basic block.
SmallPtrSet<MachineBasicBlock *, 8> MBBsInsertedInto;
SmallVector<MachineInstr *, 8> InsertedCopies;
for (int i = NumSrcs - 1; i >= 0; --i) {
Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg();
unsigned SrcSubReg = MPhi->getOperand(i * 2 + 1).getSubReg();
Expand Down Expand Up @@ -607,6 +609,7 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
NewSrcInstr = TII->createPHISourceCopy(opBlock, InsertPos, nullptr,
SrcReg, SrcSubReg, IncomingReg);
}
InsertedCopies.emplace_back(NewSrcInstr);
}

// We only need to update the LiveVariables kill of SrcReg if this was the
Expand Down Expand Up @@ -730,6 +733,38 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
}
}

// Remove redundant COPY instruction chains, which were potentially added by
// the code above. This can prevent future passes from complicating the CFG
// and cause a suboptimal block layout.
for (MachineInstr *NewCopy : InsertedCopies) {
if (NewCopy->isImplicitDef())
continue;
Register IncomingReg = NewCopy->getOperand(0).getReg();
if (!IncomingReg.isVirtual())
continue;
Register SrcReg = NewCopy->getOperand(1).getReg();
if (!MRI->hasOneNonDBGUse(SrcReg))
continue;
MachineInstr *DefMI = MRI->getUniqueVRegDef(SrcReg);
if (!DefMI || !DefMI->isCopy() ||
DefMI->getParent() != NewCopy->getParent())
continue;
auto InstrRange =
make_range(std::next(DefMI->getIterator()), NewCopy->getIterator());
if (any_of(InstrRange, [&](const MachineInstr &MI) {
return MI.readsVirtualRegister(IncomingReg);
}))
continue;
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
const TargetRegisterClass *IncomingRC = MRI->getRegClass(IncomingReg);
if (!IncomingRC->hasSuperClassEq(SrcRC))
continue;
MRI->replaceRegWith(SrcReg, IncomingReg);
NewCopy->removeFromParent();
if (LV)
LV->getVarInfo(SrcReg).AliveBlocks.clear();
}

// Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
if (EliminateNow) {
if (LIS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
; -O0: bl __aarch64_cas16_relax
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
; -O0: subs x9, x0, x9
; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -131,8 +131,8 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
; -O0: bl __aarch64_cas16_relax
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
; -O0: subs x9, x0, x9
; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -144,8 +144,8 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
; -O0: bl __aarch64_cas16_rel
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
; -O0: subs x9, x0, x9
; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_release:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -157,8 +157,8 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
; -O0: bl __aarch64_cas16_acq_rel
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
; -O0: subs x9, x0, x9
; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
; -O1: ldaxp xzr, x8, [x2]
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {

define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stxp w8, x14, x15, [x9]
; -O0: stxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stxp w12, x14, x15, [x13]
; -O0: stxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -134,13 +134,13 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr

define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stxp w8, x14, x15, [x9]
; -O0: stxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stxp w12, x14, x15, [x13]
; -O0: stxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -151,13 +151,13 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr

define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stlxp w8, x14, x15, [x9]
; -O0: stlxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stlxp w12, x14, x15, [x13]
; -O0: stlxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_release:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -168,13 +168,13 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)

define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
; -O0: ldaxp x10, x12, [x9]
; -O0: ldaxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stlxp w8, x14, x15, [x9]
; -O0: stlxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stlxp w12, x14, x15, [x13]
; -O0: stlxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
; -O1: ldaxp xzr, x8, [x2]
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {

define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stxp w8, x14, x15, [x9]
; -O0: stxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stxp w12, x14, x15, [x13]
; -O0: stxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -134,13 +134,13 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr

define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stxp w8, x14, x15, [x9]
; -O0: stxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stxp w12, x14, x15, [x13]
; -O0: stxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -151,13 +151,13 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr

define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stlxp w8, x14, x15, [x9]
; -O0: stlxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stlxp w12, x14, x15, [x13]
; -O0: stlxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_release:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -168,13 +168,13 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)

define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
; -O0: ldaxp x10, x12, [x9]
; -O0: ldaxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stlxp w8, x14, x15, [x9]
; -O0: stlxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stlxp w12, x14, x15, [x13]
; -O0: stlxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
; -O1: ldaxp xzr, x8, [x2]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/PHIElimination-debugloc.mir
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ body: |
bb.1:
%x:gpr32 = COPY $wzr
; Test that the debug location is not copied into bb1!
; CHECK: %3:gpr32 = COPY killed %x{{$}}
; CHECK: %3:gpr32 = COPY $wzr
; CHECK-LABEL: bb.2:
bb.2:
%y:gpr32 = PHI %x:gpr32, %bb.1, undef %undef:gpr32, %bb.0, debug-location !14
Expand Down
Loading