Skip to content

Commit f568763

Browse files
committed
[AArch64] Fold more load.x into load.i with large offset
The list of load.x is refer to canFoldIntoAddrMode on D152828. Also support LDRSroX missed in canFoldIntoAddrMode
1 parent 32878c2 commit f568763

File tree

3 files changed

+97
-54
lines changed

3 files changed

+97
-54
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4094,7 +4094,20 @@ AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
40944094
switch (MI.getOpcode()) {
40954095
default:
40964096
llvm_unreachable("Unexpected opcode");
4097+
case AArch64::LDRBroX:
40974098
case AArch64::LDRBBroX:
4099+
case AArch64::LDRSBXroX:
4100+
case AArch64::LDRSBWroX:
4101+
case AArch64::LDRHroX:
4102+
case AArch64::LDRHHroX:
4103+
case AArch64::LDRSHXroX:
4104+
case AArch64::LDRSHWroX:
4105+
case AArch64::LDRWroX:
4106+
case AArch64::LDRSroX:
4107+
case AArch64::LDRSWroX:
4108+
case AArch64::LDRDroX:
4109+
case AArch64::LDRXroX:
4110+
case AArch64::LDRQroX:
40984111
return MI.getOperand(4);
40994112
}
41004113
}

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
180180

181181
// Scan the instruction list to find a register assigned with a const
182182
// value that can be combined with the current instruction (a load or store)
183-
// using base addressing with writeback. Scan forwards.
183+
// using base addressing with writeback. Scan backwards.
184184
MachineBasicBlock::iterator
185185
findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
186186
unsigned &Offset);
@@ -221,7 +221,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
221221
// Find and merge a base register updates before or after a ld/st instruction.
222222
bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
223223

224-
// Find and merge a index ldr/st instructions into a base ld/st instruction.
224+
// Find and merge a index ldr/st instruction into a base ld/st instruction.
225225
bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
226226

227227
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
@@ -511,8 +511,34 @@ static unsigned getBaseAddressOpcode(unsigned Opc) {
511511
switch (Opc) {
512512
default:
513513
llvm_unreachable("Opcode has no base address equivalent!");
514+
case AArch64::LDRBroX:
515+
return AArch64::LDRBui;
514516
case AArch64::LDRBBroX:
515517
return AArch64::LDRBBui;
518+
case AArch64::LDRSBXroX:
519+
return AArch64::LDRSBXui;
520+
case AArch64::LDRSBWroX:
521+
return AArch64::LDRSBWui;
522+
case AArch64::LDRHroX:
523+
return AArch64::LDRHui;
524+
case AArch64::LDRHHroX:
525+
return AArch64::LDRHHui;
526+
case AArch64::LDRSHXroX:
527+
return AArch64::LDRSHXui;
528+
case AArch64::LDRSHWroX:
529+
return AArch64::LDRSHWui;
530+
case AArch64::LDRWroX:
531+
return AArch64::LDRWui;
532+
case AArch64::LDRSroX:
533+
return AArch64::LDRSui;
534+
case AArch64::LDRSWroX:
535+
return AArch64::LDRSWui;
536+
case AArch64::LDRDroX:
537+
return AArch64::LDRDui;
538+
case AArch64::LDRXroX:
539+
return AArch64::LDRXui;
540+
case AArch64::LDRQroX:
541+
return AArch64::LDRQui;
516542
}
517543
}
518544

@@ -764,10 +790,31 @@ static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
764790
default:
765791
return false;
766792
// Scaled instructions.
767-
// TODO: Add more index address loads/stores.
793+
// TODO: Add more index address stores.
794+
case AArch64::LDRBroX:
768795
case AArch64::LDRBBroX:
796+
case AArch64::LDRSBXroX:
797+
case AArch64::LDRSBWroX:
769798
Scale = 1;
770799
return true;
800+
case AArch64::LDRHroX:
801+
case AArch64::LDRHHroX:
802+
case AArch64::LDRSHXroX:
803+
case AArch64::LDRSHWroX:
804+
Scale = 2;
805+
return true;
806+
case AArch64::LDRWroX:
807+
case AArch64::LDRSroX:
808+
case AArch64::LDRSWroX:
809+
Scale = 4;
810+
return true;
811+
case AArch64::LDRDroX:
812+
case AArch64::LDRXroX:
813+
Scale = 8;
814+
return true;
815+
case AArch64::LDRQroX:
816+
Scale = 16;
817+
return true;
771818
}
772819
}
773820

llvm/test/CodeGen/AArch64/arm64-addrmode.ll

Lines changed: 34 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -239,9 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a) {
239239
define i32 @LdOffset_i8_sext32(ptr %a) {
240240
; CHECK-LABEL: LdOffset_i8_sext32:
241241
; CHECK: // %bb.0:
242-
; CHECK-NEXT: mov w8, #56952 // =0xde78
243-
; CHECK-NEXT: movk w8, #15, lsl #16
244-
; CHECK-NEXT: ldrsb w0, [x0, x8]
242+
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
243+
; CHECK-NEXT: ldrsb w0, [x8, #3704]
245244
; CHECK-NEXT: ret
246245
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
247246
%val = load i8, ptr %arrayidx, align 1
@@ -266,9 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a) {
266265
define i64 @LdOffset_i8_sext64(ptr %a) {
267266
; CHECK-LABEL: LdOffset_i8_sext64:
268267
; CHECK: // %bb.0:
269-
; CHECK-NEXT: mov w8, #56952 // =0xde78
270-
; CHECK-NEXT: movk w8, #15, lsl #16
271-
; CHECK-NEXT: ldrsb x0, [x0, x8]
268+
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
269+
; CHECK-NEXT: ldrsb x0, [x8, #3704]
272270
; CHECK-NEXT: ret
273271
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
274272
%val = load i8, ptr %arrayidx, align 1
@@ -280,9 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a) {
280278
define i16 @LdOffset_i16(ptr %a) {
281279
; CHECK-LABEL: LdOffset_i16:
282280
; CHECK: // %bb.0:
283-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
284-
; CHECK-NEXT: movk w8, #31, lsl #16
285-
; CHECK-NEXT: ldrh w0, [x0, x8]
281+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
282+
; CHECK-NEXT: ldrh w0, [x8, #7408]
286283
; CHECK-NEXT: ret
287284
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
288285
%val = load i16, ptr %arrayidx, align 2
@@ -293,9 +290,8 @@ define i16 @LdOffset_i16(ptr %a) {
293290
define i32 @LdOffset_i16_zext32(ptr %a) {
294291
; CHECK-LABEL: LdOffset_i16_zext32:
295292
; CHECK: // %bb.0:
296-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
297-
; CHECK-NEXT: movk w8, #31, lsl #16
298-
; CHECK-NEXT: ldrh w0, [x0, x8]
293+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
294+
; CHECK-NEXT: ldrh w0, [x8, #7408]
299295
; CHECK-NEXT: ret
300296
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
301297
%val = load i16, ptr %arrayidx, align 2
@@ -307,9 +303,8 @@ define i32 @LdOffset_i16_zext32(ptr %a) {
307303
define i32 @LdOffset_i16_sext32(ptr %a) {
308304
; CHECK-LABEL: LdOffset_i16_sext32:
309305
; CHECK: // %bb.0:
310-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
311-
; CHECK-NEXT: movk w8, #31, lsl #16
312-
; CHECK-NEXT: ldrsh w0, [x0, x8]
306+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
307+
; CHECK-NEXT: ldrsh w0, [x8, #7408]
313308
; CHECK-NEXT: ret
314309
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
315310
%val = load i16, ptr %arrayidx, align 2
@@ -321,9 +316,8 @@ define i32 @LdOffset_i16_sext32(ptr %a) {
321316
define i64 @LdOffset_i16_zext64(ptr %a) {
322317
; CHECK-LABEL: LdOffset_i16_zext64:
323318
; CHECK: // %bb.0:
324-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
325-
; CHECK-NEXT: movk w8, #31, lsl #16
326-
; CHECK-NEXT: ldrh w0, [x0, x8]
319+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
320+
; CHECK-NEXT: ldrh w0, [x8, #7408]
327321
; CHECK-NEXT: ret
328322
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
329323
%val = load i16, ptr %arrayidx, align 2
@@ -335,9 +329,8 @@ define i64 @LdOffset_i16_zext64(ptr %a) {
335329
define i64 @LdOffset_i16_sext64(ptr %a) {
336330
; CHECK-LABEL: LdOffset_i16_sext64:
337331
; CHECK: // %bb.0:
338-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
339-
; CHECK-NEXT: movk w8, #31, lsl #16
340-
; CHECK-NEXT: ldrsh x0, [x0, x8]
332+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
333+
; CHECK-NEXT: ldrsh x0, [x8, #7408]
341334
; CHECK-NEXT: ret
342335
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
343336
%val = load i16, ptr %arrayidx, align 2
@@ -349,9 +342,8 @@ define i64 @LdOffset_i16_sext64(ptr %a) {
349342
define i32 @LdOffset_i32(ptr %a) {
350343
; CHECK-LABEL: LdOffset_i32:
351344
; CHECK: // %bb.0:
352-
; CHECK-NEXT: mov w8, #31200 // =0x79e0
353-
; CHECK-NEXT: movk w8, #63, lsl #16
354-
; CHECK-NEXT: ldr w0, [x0, x8]
345+
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
346+
; CHECK-NEXT: ldr w0, [x8, #14816]
355347
; CHECK-NEXT: ret
356348
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
357349
%val = load i32, ptr %arrayidx, align 4
@@ -362,9 +354,8 @@ define i32 @LdOffset_i32(ptr %a) {
362354
define i64 @LdOffset_i32_zext64(ptr %a) {
363355
; CHECK-LABEL: LdOffset_i32_zext64:
364356
; CHECK: // %bb.0:
365-
; CHECK-NEXT: mov w8, #31200 // =0x79e0
366-
; CHECK-NEXT: movk w8, #63, lsl #16
367-
; CHECK-NEXT: ldr w0, [x0, x8]
357+
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
358+
; CHECK-NEXT: ldr w0, [x8, #14816]
368359
; CHECK-NEXT: ret
369360
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
370361
%val = load i32, ptr %arrayidx, align 2
@@ -376,9 +367,8 @@ define i64 @LdOffset_i32_zext64(ptr %a) {
376367
define i64 @LdOffset_i32_sext64(ptr %a) {
377368
; CHECK-LABEL: LdOffset_i32_sext64:
378369
; CHECK: // %bb.0:
379-
; CHECK-NEXT: mov w8, #31200 // =0x79e0
380-
; CHECK-NEXT: movk w8, #63, lsl #16
381-
; CHECK-NEXT: ldrsw x0, [x0, x8]
370+
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
371+
; CHECK-NEXT: ldrsw x0, [x8, #14816]
382372
; CHECK-NEXT: ret
383373
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
384374
%val = load i32, ptr %arrayidx, align 2
@@ -390,9 +380,8 @@ define i64 @LdOffset_i32_sext64(ptr %a) {
390380
define i64 @LdOffset_i64(ptr %a) {
391381
; CHECK-LABEL: LdOffset_i64:
392382
; CHECK: // %bb.0:
393-
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
394-
; CHECK-NEXT: movk w8, #126, lsl #16
395-
; CHECK-NEXT: ldr x0, [x0, x8]
383+
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
384+
; CHECK-NEXT: ldr x0, [x8, #29632]
396385
; CHECK-NEXT: ret
397386
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
398387
%val = load i64, ptr %arrayidx, align 4
@@ -403,9 +392,8 @@ define i64 @LdOffset_i64(ptr %a) {
403392
define <2 x i32> @LdOffset_v2i32(ptr %a) {
404393
; CHECK-LABEL: LdOffset_v2i32:
405394
; CHECK: // %bb.0:
406-
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
407-
; CHECK-NEXT: movk w8, #126, lsl #16
408-
; CHECK-NEXT: ldr d0, [x0, x8]
395+
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
396+
; CHECK-NEXT: ldr d0, [x8, #29632]
409397
; CHECK-NEXT: ret
410398
%arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992
411399
%val = load <2 x i32>, ptr %arrayidx, align 4
@@ -416,9 +404,8 @@ define <2 x i32> @LdOffset_v2i32(ptr %a) {
416404
define <2 x i64> @LdOffset_v2i64(ptr %a) {
417405
; CHECK-LABEL: LdOffset_v2i64:
418406
; CHECK: // %bb.0:
419-
; CHECK-NEXT: mov w8, #59264 // =0xe780
420-
; CHECK-NEXT: movk w8, #253, lsl #16
421-
; CHECK-NEXT: ldr q0, [x0, x8]
407+
; CHECK-NEXT: add x8, x0, #4048, lsl #12 // =16580608
408+
; CHECK-NEXT: ldr q0, [x8, #59264]
422409
; CHECK-NEXT: ret
423410
%arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992
424411
%val = load <2 x i64>, ptr %arrayidx, align 4
@@ -429,9 +416,8 @@ define <2 x i64> @LdOffset_v2i64(ptr %a) {
429416
define double @LdOffset_i8_f64(ptr %a) {
430417
; CHECK-LABEL: LdOffset_i8_f64:
431418
; CHECK: // %bb.0:
432-
; CHECK-NEXT: mov w8, #56952 // =0xde78
433-
; CHECK-NEXT: movk w8, #15, lsl #16
434-
; CHECK-NEXT: ldrsb w8, [x0, x8]
419+
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
420+
; CHECK-NEXT: ldrsb w8, [x8, #3704]
435421
; CHECK-NEXT: scvtf d0, w8
436422
; CHECK-NEXT: ret
437423
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
@@ -444,9 +430,8 @@ define double @LdOffset_i8_f64(ptr %a) {
444430
define double @LdOffset_i16_f64(ptr %a) {
445431
; CHECK-LABEL: LdOffset_i16_f64:
446432
; CHECK: // %bb.0:
447-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
448-
; CHECK-NEXT: movk w8, #31, lsl #16
449-
; CHECK-NEXT: ldrsh w8, [x0, x8]
433+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
434+
; CHECK-NEXT: ldrsh w8, [x8, #7408]
450435
; CHECK-NEXT: scvtf d0, w8
451436
; CHECK-NEXT: ret
452437
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
@@ -459,9 +444,8 @@ define double @LdOffset_i16_f64(ptr %a) {
459444
define double @LdOffset_i32_f64(ptr %a) {
460445
; CHECK-LABEL: LdOffset_i32_f64:
461446
; CHECK: // %bb.0:
462-
; CHECK-NEXT: mov w8, #31200 // =0x79e0
463-
; CHECK-NEXT: movk w8, #63, lsl #16
464-
; CHECK-NEXT: ldr s0, [x0, x8]
447+
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
448+
; CHECK-NEXT: ldr s0, [x8, #14816]
465449
; CHECK-NEXT: ucvtf d0, d0
466450
; CHECK-NEXT: ret
467451
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
@@ -474,9 +458,8 @@ define double @LdOffset_i32_f64(ptr %a) {
474458
define double @LdOffset_i64_f64(ptr %a) {
475459
; CHECK-LABEL: LdOffset_i64_f64:
476460
; CHECK: // %bb.0:
477-
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
478-
; CHECK-NEXT: movk w8, #126, lsl #16
479-
; CHECK-NEXT: ldr d0, [x0, x8]
461+
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
462+
; CHECK-NEXT: ldr d0, [x8, #29632]
480463
; CHECK-NEXT: scvtf d0, d0
481464
; CHECK-NEXT: ret
482465
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992

0 commit comments

Comments
 (0)