Skip to content

Commit f677ea5

Browse files
committed
remove only not necessary instead rebuild load sequence
1 parent 38ab6d8 commit f677ea5

File tree

3 files changed

+35
-61
lines changed

3 files changed

+35
-61
lines changed

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 24 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
207207
MachineBasicBlock::iterator
208208
doFoldSymmetryConstantLoad(MachineInstr &MI,
209209
SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
210-
int SuccIndex, bool hasORR, int Accumulated);
210+
int UpperLoadIdx, int Accumulated);
211211

212212
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
213213

@@ -2260,7 +2260,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
22602260
return E;
22612261
}
22622262

2263-
static bool isSymmetric(MachineInstr &MI, Register BaseReg) {
2263+
static bool isSymmetricLoadCandidate(MachineInstr &MI, Register BaseReg) {
22642264
auto MatchBaseReg = [&](unsigned Count) {
22652265
for (unsigned I = 0; I < Count; I++) {
22662266
auto OpI = MI.getOperand(I);
@@ -2292,56 +2292,28 @@ static bool isSymmetric(MachineInstr &MI, Register BaseReg) {
22922292

22932293
MachineBasicBlock::iterator AArch64LoadStoreOpt::doFoldSymmetryConstantLoad(
22942294
MachineInstr &MI, SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
2295-
int SuccIndex, bool hasORR, int Accumulated) {
2295+
int UpperLoadIdx, int Accumulated) {
22962296
MachineBasicBlock::iterator I = MI.getIterator();
22972297
MachineBasicBlock::iterator E = I->getParent()->end();
22982298
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
2299-
MachineBasicBlock::iterator FirstMovI;
23002299
MachineBasicBlock *MBB = MI.getParent();
2301-
uint64_t Mask = 0xFFFFUL;
2302-
Register DstRegW;
23032300

2304-
if (hasORR) {
2301+
if (!UpperLoadIdx) {
2302+
// ORR ensures that previous instructions load lower 32-bit constants.
2303+
// Remove ORR only.
23052304
(*MIs.begin())->eraseFromParent();
23062305
} else {
2306+
// We need to remove MOV for upper of 32bit because We know these instrs
2307+
// is part of symmetric constant.
23072308
int Index = 0;
2308-
for (auto MI = MIs.begin(), E = MIs.end(); MI != E; ++MI, Index++) {
2309-
if (Index == SuccIndex - 1) {
2310-
FirstMovI = *MI;
2311-
break;
2312-
}
2309+
for (auto MI = MIs.begin(); Index < UpperLoadIdx; ++MI, Index++) {
23132310
(*MI)->eraseFromParent();
23142311
}
2315-
DstRegW =
2316-
TRI->getSubReg(FirstMovI->getOperand(0).getReg(), AArch64::sub_32);
2317-
2318-
int Lower = Accumulated & Mask;
2319-
if (Lower) {
2320-
BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(),
2321-
TII->get(AArch64::MOVZWi), DstRegW)
2322-
.addImm(Lower)
2323-
.addImm(0);
2324-
Lower = (Accumulated >> 16) & Mask;
2325-
if (Lower) {
2326-
BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(),
2327-
TII->get(AArch64::MOVKWi), DstRegW)
2328-
.addUse(DstRegW)
2329-
.addImm(Lower)
2330-
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16));
2331-
}
2332-
} else {
2333-
Lower = Accumulated >> 16 & Mask;
2334-
BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(),
2335-
TII->get(AArch64::MOVZWi), DstRegW)
2336-
.addImm(Lower)
2337-
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16));
2338-
}
2339-
FirstMovI->eraseFromParent();
23402312
}
23412313

23422314
Register BaseReg = getLdStRegOp(MI).getReg();
23432315
const MachineOperand MO = AArch64InstrInfo::getLdStBaseOp(MI);
2344-
DstRegW = TRI->getSubReg(BaseReg, AArch64::sub_32);
2316+
Register DstRegW = TRI->getSubReg(BaseReg, AArch64::sub_32);
23452317
unsigned DstRegState = getRegState(MI.getOperand(0));
23462318
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::STPWi))
23472319
.addReg(DstRegW, DstRegState)
@@ -2351,7 +2323,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::doFoldSymmetryConstantLoad(
23512323
.setMemRefs(MI.memoperands())
23522324
.setMIFlags(MI.getFlags());
23532325
I->eraseFromParent();
2354-
23552326
return NextI;
23562327
}
23572328

@@ -2367,19 +2338,18 @@ bool AArch64LoadStoreOpt::tryFoldSymmetryConstantLoad(
23672338
return false;
23682339

23692340
Register BaseReg = getLdStRegOp(MI).getReg();
2370-
unsigned Count = 0, SuccIndex = 0;
2371-
bool hasORR = false;
2341+
unsigned Count = 0, UpperLoadIdx = 0;
2342+
uint64_t Accumulated = 0, Mask = 0xFFFFUL;
2343+
bool hasORR = false, Found = false;
23722344
SmallVector<MachineBasicBlock::iterator> MIs;
23732345
ModifiedRegUnits.clear();
23742346
UsedRegUnits.clear();
2375-
2376-
uint64_t Accumulated = 0, Mask = 0xFFFFUL;
23772347
do {
23782348
MBBI = prev_nodbg(MBBI, B);
23792349
MachineInstr &MI = *MBBI;
23802350
if (!MI.isTransient())
23812351
++Count;
2382-
if (!isSymmetric(MI, BaseReg)) {
2352+
if (!isSymmetricLoadCandidate(MI, BaseReg)) {
23832353
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
23842354
TRI);
23852355
if (!ModifiedRegUnits.available(BaseReg) ||
@@ -2402,19 +2372,23 @@ bool AArch64LoadStoreOpt::tryFoldSymmetryConstantLoad(
24022372

24032373
uint64_t IValue = Value.getImm();
24042374
uint64_t IShift = Shift.getImm();
2405-
Accumulated -= (Accumulated & (Mask << IShift));
2406-
Accumulated += (IValue << IShift);
2375+
uint64_t Adder = IValue << IShift;
24072376
MIs.push_back(MBBI);
2377+
if (Adder >> 32)
2378+
UpperLoadIdx = MIs.size();
2379+
2380+
Accumulated -= Accumulated & (Mask << IShift);
2381+
Accumulated += Adder;
24082382
if (Accumulated != 0 &&
24092383
(((Accumulated >> 32) == (Accumulated & 0xffffffffULL)) ||
2410-
(hasORR && Accumulated >> 32 == 0))) {
2411-
SuccIndex = MIs.size();
2384+
(hasORR && (Accumulated >> 32 == 0)))) {
2385+
Found = true;
24122386
break;
24132387
}
24142388
} while (MBBI != B && Count < Limit);
24152389

2416-
if (SuccIndex) {
2417-
I = doFoldSymmetryConstantLoad(MI, MIs, SuccIndex, hasORR, Accumulated);
2390+
if (Found) {
2391+
I = doFoldSymmetryConstantLoad(MI, MIs, UpperLoadIdx, Accumulated);
24182392
return true;
24192393
}
24202394

llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ define i64 @testuu0xf555f555f555f555() {
9797
define void @test_store_0x1234567812345678(ptr %x) {
9898
; CHECK-LABEL: test_store_0x1234567812345678:
9999
; CHECK: // %bb.0:
100-
; CHECK-NEXT: mov w8, #22136 // =0x5678
101-
; CHECK-NEXT: movk w8, #4660, lsl #16
100+
; CHECK-NEXT: mov x8, #22136 // =0x5678
101+
; CHECK-NEXT: movk x8, #4660, lsl #16
102102
; CHECK-NEXT: stp w8, w8, [x0]
103103
; CHECK-NEXT: ret
104104
store i64 u0x1234567812345678, ptr %x
@@ -108,8 +108,8 @@ define void @test_store_0x1234567812345678(ptr %x) {
108108
define void @test_store_0xff3456ffff3456ff(ptr %x) {
109109
; CHECK-LABEL: test_store_0xff3456ffff3456ff:
110110
; CHECK: // %bb.0:
111-
; CHECK-NEXT: mov w8, #22271 // =0x56ff
112-
; CHECK-NEXT: movk w8, #65332, lsl #16
111+
; CHECK-NEXT: mov x8, #22271 // =0x56ff
112+
; CHECK-NEXT: movk x8, #65332, lsl #16
113113
; CHECK-NEXT: stp w8, w8, [x0]
114114
; CHECK-NEXT: ret
115115
store i64 u0xff3456ffff3456ff, ptr %x
@@ -163,7 +163,7 @@ define void @test_store_0x0000555555555555(ptr %x) {
163163
define void @test_store_0x0000555500005555(ptr %x) {
164164
; CHECK-LABEL: test_store_0x0000555500005555:
165165
; CHECK: // %bb.0:
166-
; CHECK-NEXT: mov w8, #21845 // =0x5555
166+
; CHECK-NEXT: mov x8, #21845 // =0x5555
167167
; CHECK-NEXT: stp w8, w8, [x0]
168168
; CHECK-NEXT: ret
169169
store i64 u0x0000555500005555, ptr %x
@@ -173,7 +173,7 @@ define void @test_store_0x0000555500005555(ptr %x) {
173173
define void @test_store_0x5555000055550000(ptr %x) {
174174
; CHECK-LABEL: test_store_0x5555000055550000:
175175
; CHECK: // %bb.0:
176-
; CHECK-NEXT: mov w8, #1431633920 // =0x55550000
176+
; CHECK-NEXT: mov x8, #1431633920 // =0x55550000
177177
; CHECK-NEXT: stp w8, w8, [x0]
178178
; CHECK-NEXT: ret
179179
store i64 u0x5555000055550000, ptr %x

llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ body: |
4242
; CHECK-LABEL: name: test_fold_repeating_constant_store
4343
; CHECK: liveins: $x0
4444
; CHECK-NEXT: {{ $}}
45-
; CHECK-NEXT: $w8 = MOVZWi 49370, 0
46-
; CHECK-NEXT: $w8 = MOVKWi $w8, 320, 16
45+
; CHECK-NEXT: renamable $x8 = MOVZXi 49370, 0
46+
; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 320, 16
4747
; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
4848
; CHECK-NEXT: RET undef $lr
4949
renamable $x8 = MOVi64imm 90284035103834330
@@ -59,8 +59,8 @@ body: |
5959
; CHECK-LABEL: name: test_fold_repeating_constant_store_neg
6060
; CHECK: liveins: $x0
6161
; CHECK-NEXT: {{ $}}
62-
; CHECK-NEXT: $w8 = MOVZWi 320, 0
63-
; CHECK-NEXT: $w8 = MOVKWi $w8, 49370, 16
62+
; CHECK-NEXT: renamable $x8 = MOVZXi 320, 0
63+
; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 49370, 16
6464
; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
6565
; CHECK-NEXT: RET undef $lr
6666
renamable $x8 = MOVi64imm -4550323095879417536
@@ -76,7 +76,7 @@ body: |
7676
; CHECK-LABEL: name: test_fold_repeating_constant_store_16bit_unit
7777
; CHECK: liveins: $x0
7878
; CHECK-NEXT: {{ $}}
79-
; CHECK-NEXT: $w8 = MOVZWi 21845, 16
79+
; CHECK-NEXT: renamable $x8 = MOVZXi 21845, 16
8080
; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
8181
; CHECK-NEXT: RET undef $lr
8282
renamable $x8 = MOVZXi 21845, 16

0 commit comments

Comments
 (0)