Skip to content

Commit 7f7f4fe

Browse files
authored
Revert "[AArch64] Optimize when storing symmetry constants" (#105474)
Reverts #93717 Introduce stack use after return https://lab.llvm.org/buildbot/#/builders/24/builds/1003
1 parent 86f2ec0 commit 7f7f4fe

File tree

3 files changed

+0
-486
lines changed

3 files changed

+0
-486
lines changed

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 0 additions & 178 deletions
Original file line numberDiff line numberDiff line change
@@ -226,14 +226,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
226226
// Find and merge an index ldr/st instruction into a base ld/st instruction.
227227
bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
228228

229-
// Finds and collapses loads of symmetric constant value.
230-
bool tryFoldSymmetryConstantLoad(MachineBasicBlock::iterator &I,
231-
unsigned Limit);
232-
MachineBasicBlock::iterator
233-
doFoldSymmetryConstantLoad(MachineInstr &MI,
234-
SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
235-
int UpperLoadIdx, int Accumulated);
236-
237229
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
238230

239231
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -2451,155 +2443,6 @@ AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
24512443
return E;
24522444
}
24532445

2454-
static bool isSymmetricLoadCandidate(MachineInstr &MI, Register BaseReg) {
2455-
auto MatchBaseReg = [&](unsigned Count) {
2456-
for (unsigned I = 0; I < Count; I++) {
2457-
auto OpI = MI.getOperand(I);
2458-
if (OpI.isReg() && OpI.getReg() != BaseReg)
2459-
return false;
2460-
}
2461-
return true;
2462-
};
2463-
2464-
unsigned Opc = MI.getOpcode();
2465-
switch (Opc) {
2466-
default:
2467-
return false;
2468-
case AArch64::MOVZXi:
2469-
return MatchBaseReg(1);
2470-
case AArch64::MOVKXi:
2471-
return MatchBaseReg(2);
2472-
case AArch64::ORRXrs:
2473-
MachineOperand &Imm = MI.getOperand(3);
2474-
// Fourth operand of ORR must be 32 which mean
2475-
// 32bit symmetric constant load.
2476-
// ex) renamable $x8 = ORRXrs $x8, $x8, 32
2477-
if (MatchBaseReg(3) && Imm.isImm() && Imm.getImm() == 32)
2478-
return true;
2479-
}
2480-
2481-
return false;
2482-
}
2483-
2484-
MachineBasicBlock::iterator AArch64LoadStoreOpt::doFoldSymmetryConstantLoad(
2485-
MachineInstr &MI, SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
2486-
int UpperLoadIdx, int Accumulated) {
2487-
MachineBasicBlock::iterator I = MI.getIterator();
2488-
MachineBasicBlock::iterator E = I->getParent()->end();
2489-
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
2490-
MachineBasicBlock *MBB = MI.getParent();
2491-
2492-
if (!UpperLoadIdx) {
2493-
// ORR ensures that previous instructions load lower 32-bit constants.
2494-
// Remove ORR only.
2495-
(*MIs.begin())->eraseFromParent();
2496-
} else {
2497-
// We need to remove MOV for upper of 32bit because we know these instrs
2498-
// is part of symmetric constant.
2499-
int Index = 0;
2500-
for (auto MI = MIs.begin(); Index < UpperLoadIdx; ++MI, Index++) {
2501-
(*MI)->eraseFromParent();
2502-
}
2503-
}
2504-
2505-
Register BaseReg = getLdStRegOp(MI).getReg();
2506-
const MachineOperand MO = AArch64InstrInfo::getLdStBaseOp(MI);
2507-
Register DstRegW = TRI->getSubReg(BaseReg, AArch64::sub_32);
2508-
unsigned DstRegState = getRegState(MI.getOperand(0));
2509-
int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
2510-
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::STPWi))
2511-
.addReg(DstRegW, DstRegState)
2512-
.addReg(DstRegW, DstRegState)
2513-
.addReg(MO.getReg(), getRegState(MO))
2514-
.addImm(Offset * 2)
2515-
.setMemRefs(MI.memoperands())
2516-
.setMIFlags(MI.getFlags());
2517-
I->eraseFromParent();
2518-
return NextI;
2519-
}
2520-
2521-
bool AArch64LoadStoreOpt::tryFoldSymmetryConstantLoad(
2522-
MachineBasicBlock::iterator &I, unsigned Limit) {
2523-
MachineInstr &MI = *I;
2524-
if (MI.getOpcode() != AArch64::STRXui)
2525-
return false;
2526-
2527-
MachineBasicBlock::iterator MBBI = I;
2528-
MachineBasicBlock::iterator B = I->getParent()->begin();
2529-
if (MBBI == B)
2530-
return false;
2531-
2532-
TypeSize Scale(0U, false), Width(0U, false);
2533-
int64_t MinOffset, MaxOffset;
2534-
if (!AArch64InstrInfo::getMemOpInfo(AArch64::STPWi, Scale, Width, MinOffset,
2535-
MaxOffset))
2536-
return false;
2537-
2538-
// We replace the STRX instruction, which stores 64 bits, with the STPW
2539-
// instruction, which stores two consecutive 32 bits. Therefore, we compare
2540-
// the offset range with multiplied by two.
2541-
int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
2542-
if (Offset * 2 < MinOffset || Offset * 2 > MaxOffset)
2543-
return false;
2544-
2545-
Register BaseReg = getLdStRegOp(MI).getReg();
2546-
unsigned Count = 0, UpperLoadIdx = 0;
2547-
uint64_t Accumulated = 0, Mask = 0xFFFFUL;
2548-
bool hasORR = false, Found = false;
2549-
SmallVector<MachineBasicBlock::iterator> MIs;
2550-
ModifiedRegUnits.clear();
2551-
UsedRegUnits.clear();
2552-
do {
2553-
MBBI = prev_nodbg(MBBI, B);
2554-
MachineInstr &MI = *MBBI;
2555-
if (!MI.isTransient())
2556-
++Count;
2557-
if (!isSymmetricLoadCandidate(MI, BaseReg)) {
2558-
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2559-
TRI);
2560-
if (!ModifiedRegUnits.available(BaseReg) ||
2561-
!UsedRegUnits.available(BaseReg))
2562-
return false;
2563-
continue;
2564-
}
2565-
2566-
unsigned Opc = MI.getOpcode();
2567-
if (Opc == AArch64::ORRXrs) {
2568-
hasORR = true;
2569-
MIs.push_back(MBBI);
2570-
continue;
2571-
}
2572-
unsigned ValueOrder = Opc == AArch64::MOVZXi ? 1 : 2;
2573-
MachineOperand Value = MI.getOperand(ValueOrder);
2574-
MachineOperand Shift = MI.getOperand(ValueOrder + 1);
2575-
if (!Value.isImm() || !Shift.isImm())
2576-
return false;
2577-
2578-
uint64_t IValue = Value.getImm();
2579-
uint64_t IShift = Shift.getImm();
2580-
uint64_t Adder = IValue << IShift;
2581-
MIs.push_back(MBBI);
2582-
if (Adder >> 32)
2583-
UpperLoadIdx = MIs.size();
2584-
2585-
Accumulated -= Accumulated & (Mask << IShift);
2586-
Accumulated += Adder;
2587-
if (Accumulated != 0 &&
2588-
(((Accumulated >> 32) == (Accumulated & 0xffffffffULL)) ||
2589-
(hasORR && (Accumulated >> 32 == 0)))) {
2590-
Found = true;
2591-
break;
2592-
}
2593-
} while (MBBI != B && Count < Limit);
2594-
2595-
if (Found) {
2596-
I = doFoldSymmetryConstantLoad(MI, MIs, UpperLoadIdx, Accumulated);
2597-
return true;
2598-
}
2599-
2600-
return false;
2601-
}
2602-
26032446
bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
26042447
MachineBasicBlock::iterator &MBBI) {
26052448
MachineInstr &MI = *MBBI;
@@ -2910,27 +2753,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
29102753
++MBBI;
29112754
}
29122755

2913-
// We have an opportunity to optimize the `STRXui` instruction, which loads
2914-
// the same 32-bit value into a register twice. The `STPXi` instruction allows
2915-
// us to load a 32-bit value only once.
2916-
// Considering :
2917-
// renamable $x8 = MOVZXi 49370, 0
2918-
// renamable $x8 = MOVKXi $x8, 320, 16
2919-
// renamable $x8 = ORRXrs $x8, $x8, 32
2920-
// STRXui killed renamable $x8, killed renamable $x0, 0
2921-
// Transform :
2922-
// $w8 = MOVZWi 49370, 0
2923-
// $w8 = MOVKWi $w8, 320, 16
2924-
// STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
2925-
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2926-
MBBI != E;) {
2927-
if (isMergeableLdStUpdate(*MBBI) &&
2928-
tryFoldSymmetryConstantLoad(MBBI, UpdateLimit))
2929-
Modified = true;
2930-
else
2931-
++MBBI;
2932-
}
2933-
29342756
return Modified;
29352757
}
29362758

llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll

Lines changed: 0 additions & 180 deletions
Original file line numberDiff line numberDiff line change
@@ -93,183 +93,3 @@ define i64 @testuu0xf555f555f555f555() {
9393
; CHECK-NEXT: ret
9494
ret i64 u0xf555f555f555f555
9595
}
96-
97-
define void @test_store_0x1234567812345678(ptr %x) {
98-
; CHECK-LABEL: test_store_0x1234567812345678:
99-
; CHECK: // %bb.0:
100-
; CHECK-NEXT: mov x8, #22136 // =0x5678
101-
; CHECK-NEXT: movk x8, #4660, lsl #16
102-
; CHECK-NEXT: stp w8, w8, [x0]
103-
; CHECK-NEXT: ret
104-
store i64 u0x1234567812345678, ptr %x
105-
ret void
106-
}
107-
108-
define void @test_store_0xff3456ffff3456ff(ptr %x) {
109-
; CHECK-LABEL: test_store_0xff3456ffff3456ff:
110-
; CHECK: // %bb.0:
111-
; CHECK-NEXT: mov x8, #22271 // =0x56ff
112-
; CHECK-NEXT: movk x8, #65332, lsl #16
113-
; CHECK-NEXT: stp w8, w8, [x0]
114-
; CHECK-NEXT: ret
115-
store i64 u0xff3456ffff3456ff, ptr %x
116-
ret void
117-
}
118-
119-
define void @test_store_0x00345600345600(ptr %x) {
120-
; CHECK-LABEL: test_store_0x00345600345600:
121-
; CHECK: // %bb.0:
122-
; CHECK-NEXT: mov x8, #22016 // =0x5600
123-
; CHECK-NEXT: movk x8, #52, lsl #16
124-
; CHECK-NEXT: movk x8, #13398, lsl #32
125-
; CHECK-NEXT: str x8, [x0]
126-
; CHECK-NEXT: ret
127-
store i64 u0x00345600345600, ptr %x
128-
ret void
129-
}
130-
131-
define void @test_store_0x5555555555555555(ptr %x) {
132-
; CHECK-LABEL: test_store_0x5555555555555555:
133-
; CHECK: // %bb.0:
134-
; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555
135-
; CHECK-NEXT: str x8, [x0]
136-
; CHECK-NEXT: ret
137-
store i64 u0x5555555555555555, ptr %x
138-
ret void
139-
}
140-
141-
define void @test_store_0x5055555550555555(ptr %x) {
142-
; CHECK-LABEL: test_store_0x5055555550555555:
143-
; CHECK: // %bb.0:
144-
; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555
145-
; CHECK-NEXT: and x8, x8, #0xf0fffffff0ffffff
146-
; CHECK-NEXT: str x8, [x0]
147-
; CHECK-NEXT: ret
148-
store i64 u0x5055555550555555, ptr %x
149-
ret void
150-
}
151-
152-
define void @test_store_0x0000555555555555(ptr %x) {
153-
; CHECK-LABEL: test_store_0x0000555555555555:
154-
; CHECK: // %bb.0:
155-
; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555
156-
; CHECK-NEXT: movk x8, #0, lsl #48
157-
; CHECK-NEXT: str x8, [x0]
158-
; CHECK-NEXT: ret
159-
store i64 u0x0000555555555555, ptr %x
160-
ret void
161-
}
162-
163-
define void @test_store_0x0000555500005555(ptr %x) {
164-
; CHECK-LABEL: test_store_0x0000555500005555:
165-
; CHECK: // %bb.0:
166-
; CHECK-NEXT: mov x8, #21845 // =0x5555
167-
; CHECK-NEXT: stp w8, w8, [x0]
168-
; CHECK-NEXT: ret
169-
store i64 u0x0000555500005555, ptr %x
170-
ret void
171-
}
172-
173-
define void @test_store_0x5555000055550000(ptr %x) {
174-
; CHECK-LABEL: test_store_0x5555000055550000:
175-
; CHECK: // %bb.0:
176-
; CHECK-NEXT: mov x8, #1431633920 // =0x55550000
177-
; CHECK-NEXT: stp w8, w8, [x0]
178-
; CHECK-NEXT: ret
179-
store i64 u0x5555000055550000, ptr %x
180-
ret void
181-
}
182-
183-
define void @test_store_u0xffff5555ffff5555(ptr %x) {
184-
; CHECK-LABEL: test_store_u0xffff5555ffff5555:
185-
; CHECK: // %bb.0:
186-
; CHECK-NEXT: mov x8, #-43691 // =0xffffffffffff5555
187-
; CHECK-NEXT: movk x8, #21845, lsl #32
188-
; CHECK-NEXT: str x8, [x0]
189-
; CHECK-NEXT: ret
190-
store i64 u0xffff5555ffff5555, ptr %x
191-
ret void
192-
}
193-
194-
define void @test_store_0x8888ffff8888ffff(ptr %x) {
195-
; CHECK-LABEL: test_store_0x8888ffff8888ffff:
196-
; CHECK: // %bb.0:
197-
; CHECK-NEXT: mov x8, #-2004287489 // =0xffffffff8888ffff
198-
; CHECK-NEXT: movk x8, #34952, lsl #48
199-
; CHECK-NEXT: str x8, [x0]
200-
; CHECK-NEXT: ret
201-
store i64 u0x8888ffff8888ffff, ptr %x
202-
ret void
203-
}
204-
205-
define void @test_store_uu0xfffff555f555f555(ptr %x) {
206-
; CHECK-LABEL: test_store_uu0xfffff555f555f555:
207-
; CHECK: // %bb.0:
208-
; CHECK-NEXT: mov x8, #-2731 // =0xfffffffffffff555
209-
; CHECK-NEXT: movk x8, #62805, lsl #16
210-
; CHECK-NEXT: movk x8, #62805, lsl #32
211-
; CHECK-NEXT: str x8, [x0]
212-
; CHECK-NEXT: ret
213-
store i64 u0xfffff555f555f555, ptr %x
214-
ret void
215-
}
216-
217-
define void @test_store_uu0xf555f555f555f555(ptr %x) {
218-
; CHECK-LABEL: test_store_uu0xf555f555f555f555:
219-
; CHECK: // %bb.0:
220-
; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555
221-
; CHECK-NEXT: orr x8, x8, #0xe001e001e001e001
222-
; CHECK-NEXT: str x8, [x0]
223-
; CHECK-NEXT: ret
224-
store i64 u0xf555f555f555f555, ptr %x
225-
ret void
226-
}
227-
228-
define void @test_store_0x1234567812345678_offset_range(ptr %x) {
229-
; CHECK-LABEL: test_store_0x1234567812345678_offset_range:
230-
; CHECK: // %bb.0:
231-
; CHECK-NEXT: mov x8, #22136 // =0x5678
232-
; CHECK-NEXT: movk x8, #4660, lsl #16
233-
; CHECK-NEXT: stp w8, w8, [x0, #32]
234-
; CHECK-NEXT: ret
235-
%g = getelementptr i64, ptr %x, i64 4
236-
store i64 u0x1234567812345678, ptr %g
237-
ret void
238-
}
239-
240-
define void @test_store_0x1234567812345678_offset_min(ptr %x) {
241-
; CHECK-LABEL: test_store_0x1234567812345678_offset_min:
242-
; CHECK: // %bb.0:
243-
; CHECK-NEXT: mov x8, #22136 // =0x5678
244-
; CHECK-NEXT: movk x8, #4660, lsl #16
245-
; CHECK-NEXT: stp w8, w8, [x0]
246-
; CHECK-NEXT: ret
247-
%g = getelementptr i8, ptr %x, i32 0
248-
store i64 u0x1234567812345678, ptr %g
249-
ret void
250-
}
251-
252-
define void @test_store_0x1234567812345678_offset_max(ptr %x) {
253-
; CHECK-LABEL: test_store_0x1234567812345678_offset_max:
254-
; CHECK: // %bb.0:
255-
; CHECK-NEXT: mov x8, #22136 // =0x5678
256-
; CHECK-NEXT: movk x8, #4660, lsl #16
257-
; CHECK-NEXT: stp w8, w8, [x0, #248]
258-
; CHECK-NEXT: ret
259-
%g = getelementptr i8, ptr %x, i32 248
260-
store i64 u0x1234567812345678, ptr %g
261-
ret void
262-
}
263-
264-
define void @test_store_0x1234567812345678_offset_max_over(ptr %x) {
265-
; CHECK-LABEL: test_store_0x1234567812345678_offset_max_over:
266-
; CHECK: // %bb.0:
267-
; CHECK-NEXT: mov x8, #22136 // =0x5678
268-
; CHECK-NEXT: movk x8, #4660, lsl #16
269-
; CHECK-NEXT: orr x8, x8, x8, lsl #32
270-
; CHECK-NEXT: stur x8, [x0, #249]
271-
; CHECK-NEXT: ret
272-
%g = getelementptr i8, ptr %x, i32 249
273-
store i64 u0x1234567812345678, ptr %g
274-
ret void
275-
}

0 commit comments

Comments
 (0)