@@ -207,7 +207,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
207
207
MachineBasicBlock::iterator
208
208
doFoldSymmetryConstantLoad (MachineInstr &MI,
209
209
SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
210
- int SuccIndex, bool hasORR , int Accumulated);
210
+ int UpperLoadIdx , int Accumulated);
211
211
212
212
bool optimizeBlock (MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
213
213
@@ -2260,7 +2260,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2260
2260
return E;
2261
2261
}
2262
2262
2263
- static bool isSymmetric (MachineInstr &MI, Register BaseReg) {
2263
+ static bool isSymmetricLoadCandidate (MachineInstr &MI, Register BaseReg) {
2264
2264
auto MatchBaseReg = [&](unsigned Count) {
2265
2265
for (unsigned I = 0 ; I < Count; I++) {
2266
2266
auto OpI = MI.getOperand (I);
@@ -2292,56 +2292,28 @@ static bool isSymmetric(MachineInstr &MI, Register BaseReg) {
2292
2292
2293
2293
MachineBasicBlock::iterator AArch64LoadStoreOpt::doFoldSymmetryConstantLoad (
2294
2294
MachineInstr &MI, SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
2295
- int SuccIndex, bool hasORR , int Accumulated) {
2295
+ int UpperLoadIdx , int Accumulated) {
2296
2296
MachineBasicBlock::iterator I = MI.getIterator ();
2297
2297
MachineBasicBlock::iterator E = I->getParent ()->end ();
2298
2298
MachineBasicBlock::iterator NextI = next_nodbg (I, E);
2299
- MachineBasicBlock::iterator FirstMovI;
2300
2299
MachineBasicBlock *MBB = MI.getParent ();
2301
- uint64_t Mask = 0xFFFFUL ;
2302
- Register DstRegW;
2303
2300
2304
- if (hasORR) {
2301
+ if (!UpperLoadIdx) {
2302
+ // ORR ensures that previous instructions load lower 32-bit constants.
2303
+ // Remove ORR only.
2305
2304
(*MIs.begin ())->eraseFromParent ();
2306
2305
} else {
2306
+ // We need to remove MOV for upper of 32bit because We know these instrs
2307
+ // is part of symmetric constant.
2307
2308
int Index = 0 ;
2308
- for (auto MI = MIs.begin (), E = MIs.end (); MI != E; ++MI, Index++) {
2309
- if (Index == SuccIndex - 1 ) {
2310
- FirstMovI = *MI;
2311
- break ;
2312
- }
2309
+ for (auto MI = MIs.begin (); Index < UpperLoadIdx; ++MI, Index++) {
2313
2310
(*MI)->eraseFromParent ();
2314
2311
}
2315
- DstRegW =
2316
- TRI->getSubReg (FirstMovI->getOperand (0 ).getReg (), AArch64::sub_32);
2317
-
2318
- int Lower = Accumulated & Mask;
2319
- if (Lower) {
2320
- BuildMI (*MBB, FirstMovI, FirstMovI->getDebugLoc (),
2321
- TII->get (AArch64::MOVZWi), DstRegW)
2322
- .addImm (Lower)
2323
- .addImm (0 );
2324
- Lower = (Accumulated >> 16 ) & Mask;
2325
- if (Lower) {
2326
- BuildMI (*MBB, FirstMovI, FirstMovI->getDebugLoc (),
2327
- TII->get (AArch64::MOVKWi), DstRegW)
2328
- .addUse (DstRegW)
2329
- .addImm (Lower)
2330
- .addImm (AArch64_AM::getShifterImm (AArch64_AM::LSL, 16 ));
2331
- }
2332
- } else {
2333
- Lower = Accumulated >> 16 & Mask;
2334
- BuildMI (*MBB, FirstMovI, FirstMovI->getDebugLoc (),
2335
- TII->get (AArch64::MOVZWi), DstRegW)
2336
- .addImm (Lower)
2337
- .addImm (AArch64_AM::getShifterImm (AArch64_AM::LSL, 16 ));
2338
- }
2339
- FirstMovI->eraseFromParent ();
2340
2312
}
2341
2313
2342
2314
Register BaseReg = getLdStRegOp (MI).getReg ();
2343
2315
const MachineOperand MO = AArch64InstrInfo::getLdStBaseOp (MI);
2344
- DstRegW = TRI->getSubReg (BaseReg, AArch64::sub_32);
2316
+ Register DstRegW = TRI->getSubReg (BaseReg, AArch64::sub_32);
2345
2317
unsigned DstRegState = getRegState (MI.getOperand (0 ));
2346
2318
BuildMI (*MBB, MI, MI.getDebugLoc (), TII->get (AArch64::STPWi))
2347
2319
.addReg (DstRegW, DstRegState)
@@ -2351,7 +2323,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::doFoldSymmetryConstantLoad(
2351
2323
.setMemRefs (MI.memoperands ())
2352
2324
.setMIFlags (MI.getFlags ());
2353
2325
I->eraseFromParent ();
2354
-
2355
2326
return NextI;
2356
2327
}
2357
2328
@@ -2367,19 +2338,18 @@ bool AArch64LoadStoreOpt::tryFoldSymmetryConstantLoad(
2367
2338
return false ;
2368
2339
2369
2340
Register BaseReg = getLdStRegOp (MI).getReg ();
2370
- unsigned Count = 0 , SuccIndex = 0 ;
2371
- bool hasORR = false ;
2341
+ unsigned Count = 0 , UpperLoadIdx = 0 ;
2342
+ uint64_t Accumulated = 0 , Mask = 0xFFFFUL ;
2343
+ bool hasORR = false , Found = false ;
2372
2344
SmallVector<MachineBasicBlock::iterator> MIs;
2373
2345
ModifiedRegUnits.clear ();
2374
2346
UsedRegUnits.clear ();
2375
-
2376
- uint64_t Accumulated = 0 , Mask = 0xFFFFUL ;
2377
2347
do {
2378
2348
MBBI = prev_nodbg (MBBI, B);
2379
2349
MachineInstr &MI = *MBBI;
2380
2350
if (!MI.isTransient ())
2381
2351
++Count;
2382
- if (!isSymmetric (MI, BaseReg)) {
2352
+ if (!isSymmetricLoadCandidate (MI, BaseReg)) {
2383
2353
LiveRegUnits::accumulateUsedDefed (MI, ModifiedRegUnits, UsedRegUnits,
2384
2354
TRI);
2385
2355
if (!ModifiedRegUnits.available (BaseReg) ||
@@ -2402,19 +2372,23 @@ bool AArch64LoadStoreOpt::tryFoldSymmetryConstantLoad(
2402
2372
2403
2373
uint64_t IValue = Value.getImm ();
2404
2374
uint64_t IShift = Shift.getImm ();
2405
- Accumulated -= (Accumulated & (Mask << IShift));
2406
- Accumulated += (IValue << IShift);
2375
+ uint64_t Adder = IValue << IShift;
2407
2376
MIs.push_back (MBBI);
2377
+ if (Adder >> 32 )
2378
+ UpperLoadIdx = MIs.size ();
2379
+
2380
+ Accumulated -= Accumulated & (Mask << IShift);
2381
+ Accumulated += Adder;
2408
2382
if (Accumulated != 0 &&
2409
2383
(((Accumulated >> 32 ) == (Accumulated & 0xffffffffULL )) ||
2410
- (hasORR && Accumulated >> 32 == 0 ))) {
2411
- SuccIndex = MIs. size () ;
2384
+ (hasORR && ( Accumulated >> 32 == 0 ) ))) {
2385
+ Found = true ;
2412
2386
break ;
2413
2387
}
2414
2388
} while (MBBI != B && Count < Limit);
2415
2389
2416
- if (SuccIndex ) {
2417
- I = doFoldSymmetryConstantLoad (MI, MIs, SuccIndex, hasORR , Accumulated);
2390
+ if (Found ) {
2391
+ I = doFoldSymmetryConstantLoad (MI, MIs, UpperLoadIdx , Accumulated);
2418
2392
return true ;
2419
2393
}
2420
2394
0 commit comments