From 146882242fb5a69b1a4114dbb9f280c9da89f6cb Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Mon, 17 Jun 2019 23:20:29 +0000 Subject: [PATCH] [GlobalISel][Localizer] Rewrite localizer to run in 2 phases, inter & intra block. Inter-block localization is the same as what currently happens, except now it only runs on the entry block because that's where the problematic constants with long live ranges come from. The second phase is a new intra-block localization phase which attempts to re-sink the already localized instructions further right before one of the multiple uses. One additional change is to also localize G_GLOBAL_VALUE as they're constants too. However, on some targets like arm64 it takes multiple instructions to materialize the value, so some additional heuristics with a TTI hook have been introduced attempt to prevent code size regressions when localizing these. Overall, these changes improve CTMark code size on arm64 by 1.2%. Full code size results: Program baseline new diff ------------------------------------------------------------------------------ test-suite...-typeset/consumer-typeset.test 1249984 1217216 -2.6% test-suite...:: CTMark/ClamAV/clamscan.test 1264928 1232152 -2.6% test-suite :: CTMark/SPASS/SPASS.test 1394092 1361316 -2.4% test-suite...Mark/mafft/pairlocalalign.test 731320 714928 -2.2% test-suite :: CTMark/lencod/lencod.test 1340592 1324200 -1.2% test-suite :: CTMark/kimwitu++/kc.test 3853512 3820420 -0.9% test-suite :: CTMark/Bullet/bullet.test 3406036 3389652 -0.5% test-suite...ark/tramp3d-v4/tramp3d-v4.test 8017000 8016992 -0.0% test-suite...TMark/7zip/7zip-benchmark.test 2856588 2856588 0.0% test-suite...:: CTMark/sqlite3/sqlite3.test 765704 765704 0.0% Geomean difference -1.2% Differential Revision: https://reviews.llvm.org/D63303 llvm-svn: 363632 --- .../llvm/Analysis/TargetTransformInfo.h | 11 + .../llvm/Analysis/TargetTransformInfoImpl.h | 4 + .../llvm/CodeGen/GlobalISel/Localizer.h | 12 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 4 + llvm/lib/CodeGen/GlobalISel/Localizer.cpp | 219 +++++++++++++----- .../AArch64/AArch64TargetTransformInfo.h | 4 + .../AArch64/GlobalISel/localizer-arm64-tti.ll | 62 +++++ .../CodeGen/AArch64/GlobalISel/localizer.mir | 87 +++++++ 8 files changed, 341 insertions(+), 62 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index f53b17df0128c..c1c92b7887514 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1053,6 +1053,11 @@ class TargetTransformInfo { /// \returns True if the target wants to expand the given reduction intrinsic /// into a shuffle sequence. bool shouldExpandReduction(const IntrinsicInst *II) const; + + /// \returns the size cost of rematerializing a GlobalValue address relative + /// to a stack reload. + unsigned getGISelRematGlobalCost() const; + /// @} private: @@ -1269,6 +1274,7 @@ class TargetTransformInfo::Concept { virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags) const = 0; virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; + virtual unsigned getGISelRematGlobalCost() const = 0; virtual int getInstructionLatency(const Instruction *I) = 0; }; @@ -1701,6 +1707,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { bool shouldExpandReduction(const IntrinsicInst *II) const override { return Impl.shouldExpandReduction(II); } + + unsigned getGISelRematGlobalCost() const override { + return Impl.getGISelRematGlobalCost(); + } + int getInstructionLatency(const Instruction *I) override { return Impl.getInstructionLatency(I); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index f8b36ec43a32f..62e9d0f1925be 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -572,6 +572,10 @@ class TargetTransformInfoImplBase { return true; } + unsigned getGISelRematGlobalCost() const { + return 1; + } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h index cfc7c3567c5d5..8ab1e55195c6e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -27,6 +27,7 @@ namespace llvm { // Forward declarations. class MachineRegisterInfo; +class TargetTransformInfo; /// This pass implements the localization mechanism described at the /// top of this file. One specificity of the implementation is that @@ -43,9 +44,11 @@ class Localizer : public MachineFunctionPass { /// MRI contains all the register class/bank information that this /// pass uses and updates. MachineRegisterInfo *MRI; + /// TTI used for getting remat costs for instructions. + TargetTransformInfo *TTI; /// Check whether or not \p MI needs to be moved close to its uses. - static bool shouldLocalize(const MachineInstr &MI); + bool shouldLocalize(const MachineInstr &MI); /// Check if \p MOUse is used in the same basic block as \p Def. /// If the use is in the same block, we say it is local. @@ -57,6 +60,13 @@ class Localizer : public MachineFunctionPass { /// Initialize the field members using \p MF. void init(MachineFunction &MF); + /// Do inter-block localization from the entry block. + bool localizeInterBlock(MachineFunction &MF, + SmallPtrSetImpl &LocalizedInstrs); + + /// Do intra-block localization of already localized instructions. + bool localizeIntraBlock(SmallPtrSetImpl &LocalizedInstrs); + public: Localizer(); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index a55c1be1a09ae..83840aa7fbb61 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -724,6 +724,10 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } +unsigned TargetTransformInfo::getGISelRematGlobalCost() const { + return TTIImpl->getGISelRematGlobalCost(); +} + int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { return TTIImpl->getInstructionLatency(I); } diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index e61cddf114a01..9b99ec12b82ab 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -20,17 +21,55 @@ using namespace llvm; char Localizer::ID = 0; -INITIALIZE_PASS(Localizer, DEBUG_TYPE, - "Move/duplicate certain instructions close to their use", false, - false) +INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", + false, false) Localizer::Localizer() : MachineFunctionPass(ID) { initializeLocalizerPass(*PassRegistry::getPassRegistry()); } -void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); } +void Localizer::init(MachineFunction &MF) { + MRI = &MF.getRegInfo(); + TTI = &getAnalysis().getTTI(MF.getFunction()); +} bool Localizer::shouldLocalize(const MachineInstr &MI) { + // Assuming a spill and reload of a value has a cost of 1 instruction each, + // this helper function computes the maximum number of uses we should consider + // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We + // break even in terms of code size when the original MI has 2 users vs + // choosing to potentially spill. Any more than 2 users we we have a net code + // size increase. This doesn't take into account register pressure though. + auto maxUses = [](unsigned RematCost) { + // A cost of 1 means remats are basically free. + if (RematCost == 1) + return UINT_MAX; + if (RematCost == 2) + return 2U; + + // Remat is too expensive, only sink if there's one user. + if (RematCost > 2) + return 1U; + llvm_unreachable("Unexpected remat cost"); + }; + + // Helper to walk through uses and terminate if we've reached a limit. Saves + // us spending time traversing uses if all we want to know is if it's >= min. + auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { + unsigned NumUses = 0; + auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end(); + for (; UI != UE && NumUses < MaxUses; ++UI) { + NumUses++; + } + // If we haven't reached the end yet then there are more than MaxUses users. + return UI == UE; + }; + switch (MI.getOpcode()) { default: return false; @@ -40,10 +79,20 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) { case TargetOpcode::G_FCONSTANT: case TargetOpcode::G_FRAME_INDEX: return true; + case TargetOpcode::G_GLOBAL_VALUE: { + unsigned RematCost = TTI->getGISelRematGlobalCost(); + unsigned Reg = MI.getOperand(0).getReg(); + unsigned MaxUses = maxUses(RematCost); + if (MaxUses == UINT_MAX) + return true; // Remats are "free" so always localize. + bool B = isUsesAtMost(Reg, MaxUses); + return B; + } } } void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -57,6 +106,106 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, return InsertMBB == Def.getParent(); } +bool Localizer::localizeInterBlock( + MachineFunction &MF, SmallPtrSetImpl &LocalizedInstrs) { + bool Changed = false; + DenseMap, unsigned> MBBWithLocalDef; + + // Since the IRTranslator only emits constants into the entry block, and the + // rest of the GISel pipeline generally emits constants close to their users, + // we only localize instructions in the entry block here. This might change if + // we start doing CSE across blocks. + auto &MBB = MF.front(); + for (MachineInstr &MI : MBB) { + if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) + continue; + LLVM_DEBUG(dbgs() << "Should localize: " << MI); + assert(MI.getDesc().getNumDefs() == 1 && + "More than one definition not supported yet"); + unsigned Reg = MI.getOperand(0).getReg(); + // Check if all the users of MI are local. + // We are going to invalidation the list of use operands, so we + // can't use range iterator. + for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); + MOIt != MOItEnd;) { + MachineOperand &MOUse = *MOIt++; + // Check if the use is already local. + MachineBasicBlock *InsertMBB; + LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); + dbgs() << "Checking use: " << MIUse + << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); + if (isLocalUse(MOUse, MI, InsertMBB)) + continue; + LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); + Changed = true; + auto MBBAndReg = std::make_pair(InsertMBB, Reg); + auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); + if (NewVRegIt == MBBWithLocalDef.end()) { + // Create the localized instruction. + MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); + LocalizedInstrs.insert(LocalizedMI); + MachineInstr &UseMI = *MOUse.getParent(); + if (MRI->hasOneUse(Reg) && !UseMI.isPHI()) + InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI); + else + InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), + LocalizedMI); + + // Set a new register for the definition. + unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); + MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); + LocalizedMI->getOperand(0).setReg(NewReg); + NewVRegIt = + MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; + LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); + } + LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) + << '\n'); + // Update the user reg. + MOUse.setReg(NewVRegIt->second); + } + } + return Changed; +} + +bool Localizer::localizeIntraBlock( + SmallPtrSetImpl &LocalizedInstrs) { + bool Changed = false; + + // For each already-localized instruction which has multiple users, then we + // scan the block top down from the current position until we hit one of them. + + // FIXME: Consider doing inst duplication if live ranges are very long due to + // many users, but this case may be better served by regalloc improvements. + + for (MachineInstr *MI : LocalizedInstrs) { + unsigned Reg = MI->getOperand(0).getReg(); + MachineBasicBlock &MBB = *MI->getParent(); + // If the instruction has a single use, we would have already moved it right + // before its user in localizeInterBlock(). + if (MRI->hasOneUse(Reg)) + continue; + + // All of the user MIs of this reg. + SmallPtrSet Users; + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) + Users.insert(&UseMI); + + MachineBasicBlock::iterator II(MI); + ++II; + while (II != MBB.end() && !Users.count(&*II)) + ++II; + + LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II + << "\n"); + assert(II != MBB.end() && "Didn't find the user in the MBB"); + MI->removeFromParent(); + MBB.insert(II, MI); + Changed = true; + } + return Changed; +} + bool Localizer::runOnMachineFunction(MachineFunction &MF) { // If the ISel pipeline failed, do not bother running that pass. if (MF.getProperties().hasProperty( @@ -67,62 +216,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { init(MF); - bool Changed = false; - // Keep track of the instructions we localized. - // We won't need to process them if we see them later in the CFG. - SmallPtrSet LocalizedInstrs; - DenseMap, unsigned> MBBWithLocalDef; - // TODO: Do bottom up traversal. - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) - continue; - LLVM_DEBUG(dbgs() << "Should localize: " << MI); - assert(MI.getDesc().getNumDefs() == 1 && - "More than one definition not supported yet"); - unsigned Reg = MI.getOperand(0).getReg(); - // Check if all the users of MI are local. - // We are going to invalidation the list of use operands, so we - // can't use range iterator. - for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); - MOIt != MOItEnd;) { - MachineOperand &MOUse = *MOIt++; - // Check if the use is already local. - MachineBasicBlock *InsertMBB; - LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); - dbgs() << "Checking use: " << MIUse - << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); - if (isLocalUse(MOUse, MI, InsertMBB)) - continue; - LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); - Changed = true; - auto MBBAndReg = std::make_pair(InsertMBB, Reg); - auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); - if (NewVRegIt == MBBWithLocalDef.end()) { - // Create the localized instruction. - MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); - LocalizedInstrs.insert(LocalizedMI); - // Don't try to be smart for the insertion point. - // There is no guarantee that the first seen use is the first - // use in the block. - InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), - LocalizedMI); + // Keep track of the instructions we localized. We'll do a second pass of + // intra-block localization to further reduce live ranges. + SmallPtrSet LocalizedInstrs; - // Set a new register for the definition. - unsigned NewReg = - MRI->createGenericVirtualRegister(MRI->getType(Reg)); - MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); - LocalizedMI->getOperand(0).setReg(NewReg); - NewVRegIt = - MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; - LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); - } - LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) - << '\n'); - // Update the user reg. - MOUse.setReg(NewVRegIt->second); - } - } - } - return Changed; + bool Changed = localizeInterBlock(MF, LocalizedInstrs); + return Changed |= localizeIntraBlock(LocalizedInstrs); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 67c3707ec5c43..10c15a139b4ce 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -165,6 +165,10 @@ class AArch64TTIImpl : public BasicTTIImplBase { return false; } + unsigned getGISelRematGlobalCost() const { + return 2; + } + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll new file mode 100644 index 0000000000000..a6c6326fcddae --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -o - -verify-machineinstrs -O0 -global-isel -stop-after=localizer %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios5.0.0" + +@var1 = common global i32 0, align 4 +@var2 = common global i32 0, align 4 +@var3 = common global i32 0, align 4 +@var4 = common global i32 0, align 4 + +; This is an ll test instead of MIR because -run-pass doesn't seem to support +; initializing the target TTI which we need for this test. + +; Some of the instructions in entry block are dead after this pass so don't +; strictly need to be checked for. + +define i32 @foo() { + ; CHECK-LABEL: name: foo + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 + ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 + ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 + ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 + ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1) + ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]] + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 + ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 + ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2) + ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 + ; CHECK: G_STORE [[C5]](s32), [[GV]](p0) :: (store 4 into @var1) + ; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 + ; CHECK: G_STORE [[C4]](s32), [[GV4]](p0) :: (store 4 into @var3) + ; CHECK: G_STORE [[C5]](s32), [[GV]](p0) :: (store 4 into @var1) + ; CHECK: bb.3.if.end: + ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: $w0 = COPY [[C6]](s32) + ; CHECK: RET_ReallyLR implicit $w0 +entry: + %0 = load i32, i32* @var1, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 2, i32* @var2, align 4 + store i32 3, i32* @var1, align 4 + store i32 2, i32* @var3, align 4 + store i32 3, i32* @var1, align 4 + br label %if.end + +if.end: + ret i32 0 +} + diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir index e4648a868f222..dad2240aa84c6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir @@ -15,6 +15,29 @@ define void @float_non_local_phi_use_followed_by_use_fi() { ret void } define void @non_local_phi() { ret void } define void @non_local_label() { ret void } + + @var1 = common global i32 0, align 4 + @var2 = common global i32 0, align 4 + @var3 = common global i32 0, align 4 + @var4 = common global i32 0, align 4 + + define i32 @intrablock_with_globalvalue() { + entry: + %0 = load i32, i32* @var1, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end + + if.then: + store i32 2, i32* @var2, align 4 + store i32 3, i32* @var1, align 4 + store i32 2, i32* @var3, align 4 + store i32 3, i32* @var1, align 4 + br label %if.end + + if.end: + ret i32 0 + } + ... --- @@ -301,3 +324,67 @@ body: | %2:fpr(s32) = G_FADD %0, %1 G_BR %bb.1 ... +--- +name: intrablock_with_globalvalue +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: intrablock_with_globalvalue + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 + ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 + ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 + ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 + ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1) + ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]] + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 + ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 + ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2) + ; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 + ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 + ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1) + ; CHECK: [[GV5:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 + ; CHECK: G_STORE [[C4]](s32), [[GV5]](p0) :: (store 4 into @var3) + ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1) + ; CHECK: bb.2.if.end: + ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: $w0 = COPY [[C6]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + + ; Some of these instructions are dead. We're checking that the other instructions are + ; sunk immediately before their first user in the if.then block or as close as possible. + bb.1.entry: + %1:gpr(p0) = G_GLOBAL_VALUE @var1 + %2:gpr(s32) = G_CONSTANT i32 1 + %4:gpr(s32) = G_CONSTANT i32 2 + %5:gpr(p0) = G_GLOBAL_VALUE @var2 + %6:gpr(s32) = G_CONSTANT i32 3 + %7:gpr(p0) = G_GLOBAL_VALUE @var3 + %8:gpr(s32) = G_CONSTANT i32 0 + %0:gpr(s32) = G_LOAD %1(p0) :: (load 4 from @var1) + %9:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2 + %3:gpr(s1) = G_TRUNC %9(s32) + G_BRCOND %3(s1), %bb.2 + G_BR %bb.3 + + bb.2.if.then: + G_STORE %4(s32), %5(p0) :: (store 4 into @var2) + G_STORE %6(s32), %1(p0) :: (store 4 into @var1) + G_STORE %4(s32), %7(p0) :: (store 4 into @var3) + G_STORE %6(s32), %1(p0) :: (store 4 into @var1) + + bb.3.if.end: + $w0 = COPY %8(s32) + RET_ReallyLR implicit $w0 + +...