Skip to content

Commit a4b2f5d

Browse files
committed
[AArch64][Falkor] Fix correctness bug in falkor prefetcher fix pass and correct some opcode tag computations.
Summary: This addresses a correctness bug for LD[1234]*_POST opcodes that have the prefetcher fix applied to them: the base register was not being written back from the temp after being incremented, so it would appear to never be incremented. Also, fix some opcode tag computations based on some updated HW details to get better tag avoidance and thus better prefetcher performance. Reviewers: mcrosier Subscribers: aemerson, rengolin, javed.absar, kristof.beyls Differential Revision: https://reviews.llvm.org/D38256 llvm-svn: 314251
1 parent b7e4c94 commit a4b2f5d

File tree

2 files changed

+318
-60
lines changed

2 files changed

+318
-60
lines changed

llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp

Lines changed: 52 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -240,27 +240,27 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
240240
default:
241241
return None;
242242

243+
case AArch64::LD1i64:
244+
case AArch64::LD2i64:
245+
DestRegIdx = 0;
246+
BaseRegIdx = 3;
247+
OffsetIdx = -1;
248+
IsPrePost = false;
249+
break;
250+
243251
case AArch64::LD1i8:
244252
case AArch64::LD1i16:
245253
case AArch64::LD1i32:
246-
case AArch64::LD1i64:
247254
case AArch64::LD2i8:
248255
case AArch64::LD2i16:
249256
case AArch64::LD2i32:
250-
case AArch64::LD2i64:
251257
case AArch64::LD3i8:
252258
case AArch64::LD3i16:
253259
case AArch64::LD3i32:
260+
case AArch64::LD3i64:
254261
case AArch64::LD4i8:
255262
case AArch64::LD4i16:
256263
case AArch64::LD4i32:
257-
DestRegIdx = 0;
258-
BaseRegIdx = 3;
259-
OffsetIdx = -1;
260-
IsPrePost = false;
261-
break;
262-
263-
case AArch64::LD3i64:
264264
case AArch64::LD4i64:
265265
DestRegIdx = -1;
266266
BaseRegIdx = 3;
@@ -284,23 +284,16 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
284284
case AArch64::LD1Rv4s:
285285
case AArch64::LD1Rv8h:
286286
case AArch64::LD1Rv16b:
287-
case AArch64::LD1Twov1d:
288-
case AArch64::LD1Twov2s:
289-
case AArch64::LD1Twov4h:
290-
case AArch64::LD1Twov8b:
291-
case AArch64::LD2Twov2s:
292-
case AArch64::LD2Twov4s:
293-
case AArch64::LD2Twov8b:
294-
case AArch64::LD2Rv1d:
295-
case AArch64::LD2Rv2s:
296-
case AArch64::LD2Rv4s:
297-
case AArch64::LD2Rv8b:
298287
DestRegIdx = 0;
299288
BaseRegIdx = 1;
300289
OffsetIdx = -1;
301290
IsPrePost = false;
302291
break;
303292

293+
case AArch64::LD1Twov1d:
294+
case AArch64::LD1Twov2s:
295+
case AArch64::LD1Twov4h:
296+
case AArch64::LD1Twov8b:
304297
case AArch64::LD1Twov2d:
305298
case AArch64::LD1Twov4s:
306299
case AArch64::LD1Twov8h:
@@ -321,10 +314,17 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
321314
case AArch64::LD1Fourv4s:
322315
case AArch64::LD1Fourv8h:
323316
case AArch64::LD1Fourv16b:
317+
case AArch64::LD2Twov2s:
318+
case AArch64::LD2Twov4s:
319+
case AArch64::LD2Twov8b:
324320
case AArch64::LD2Twov2d:
325321
case AArch64::LD2Twov4h:
326322
case AArch64::LD2Twov8h:
327323
case AArch64::LD2Twov16b:
324+
case AArch64::LD2Rv1d:
325+
case AArch64::LD2Rv2s:
326+
case AArch64::LD2Rv4s:
327+
case AArch64::LD2Rv8b:
328328
case AArch64::LD2Rv2d:
329329
case AArch64::LD2Rv4h:
330330
case AArch64::LD2Rv8h:
@@ -365,32 +365,32 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
365365
IsPrePost = false;
366366
break;
367367

368+
case AArch64::LD1i64_POST:
369+
case AArch64::LD2i64_POST:
370+
DestRegIdx = 1;
371+
BaseRegIdx = 4;
372+
OffsetIdx = 5;
373+
IsPrePost = true;
374+
break;
375+
368376
case AArch64::LD1i8_POST:
369377
case AArch64::LD1i16_POST:
370378
case AArch64::LD1i32_POST:
371-
case AArch64::LD1i64_POST:
372379
case AArch64::LD2i8_POST:
373380
case AArch64::LD2i16_POST:
374381
case AArch64::LD2i32_POST:
375-
case AArch64::LD2i64_POST:
376382
case AArch64::LD3i8_POST:
377383
case AArch64::LD3i16_POST:
378384
case AArch64::LD3i32_POST:
385+
case AArch64::LD3i64_POST:
379386
case AArch64::LD4i8_POST:
380387
case AArch64::LD4i16_POST:
381388
case AArch64::LD4i32_POST:
382-
DestRegIdx = 1;
383-
BaseRegIdx = 4;
384-
OffsetIdx = 5;
385-
IsPrePost = false;
386-
break;
387-
388-
case AArch64::LD3i64_POST:
389389
case AArch64::LD4i64_POST:
390390
DestRegIdx = -1;
391391
BaseRegIdx = 4;
392392
OffsetIdx = 5;
393-
IsPrePost = false;
393+
IsPrePost = true;
394394
break;
395395

396396
case AArch64::LD1Onev1d_POST:
@@ -409,23 +409,16 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
409409
case AArch64::LD1Rv4s_POST:
410410
case AArch64::LD1Rv8h_POST:
411411
case AArch64::LD1Rv16b_POST:
412-
case AArch64::LD1Twov1d_POST:
413-
case AArch64::LD1Twov2s_POST:
414-
case AArch64::LD1Twov4h_POST:
415-
case AArch64::LD1Twov8b_POST:
416-
case AArch64::LD2Twov2s_POST:
417-
case AArch64::LD2Twov4s_POST:
418-
case AArch64::LD2Twov8b_POST:
419-
case AArch64::LD2Rv1d_POST:
420-
case AArch64::LD2Rv2s_POST:
421-
case AArch64::LD2Rv4s_POST:
422-
case AArch64::LD2Rv8b_POST:
423412
DestRegIdx = 1;
424413
BaseRegIdx = 2;
425414
OffsetIdx = 3;
426-
IsPrePost = false;
415+
IsPrePost = true;
427416
break;
428417

418+
case AArch64::LD1Twov1d_POST:
419+
case AArch64::LD1Twov2s_POST:
420+
case AArch64::LD1Twov4h_POST:
421+
case AArch64::LD1Twov8b_POST:
429422
case AArch64::LD1Twov2d_POST:
430423
case AArch64::LD1Twov4s_POST:
431424
case AArch64::LD1Twov8h_POST:
@@ -446,10 +439,17 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
446439
case AArch64::LD1Fourv4s_POST:
447440
case AArch64::LD1Fourv8h_POST:
448441
case AArch64::LD1Fourv16b_POST:
442+
case AArch64::LD2Twov2s_POST:
443+
case AArch64::LD2Twov4s_POST:
444+
case AArch64::LD2Twov8b_POST:
449445
case AArch64::LD2Twov2d_POST:
450446
case AArch64::LD2Twov4h_POST:
451447
case AArch64::LD2Twov8h_POST:
452448
case AArch64::LD2Twov16b_POST:
449+
case AArch64::LD2Rv1d_POST:
450+
case AArch64::LD2Rv2s_POST:
451+
case AArch64::LD2Rv4s_POST:
452+
case AArch64::LD2Rv8b_POST:
453453
case AArch64::LD2Rv2d_POST:
454454
case AArch64::LD2Rv4h_POST:
455455
case AArch64::LD2Rv8h_POST:
@@ -487,7 +487,7 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
487487
DestRegIdx = -1;
488488
BaseRegIdx = 2;
489489
OffsetIdx = 3;
490-
IsPrePost = false;
490+
IsPrePost = true;
491491
break;
492492

493493
case AArch64::LDRBBroW:
@@ -592,16 +592,19 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
592592
IsPrePost = true;
593593
break;
594594

595+
case AArch64::LDNPDi:
596+
case AArch64::LDNPQi:
597+
case AArch64::LDNPSi:
595598
case AArch64::LDPQi:
599+
case AArch64::LDPDi:
600+
case AArch64::LDPSi:
596601
DestRegIdx = -1;
597602
BaseRegIdx = 2;
598603
OffsetIdx = 3;
599604
IsPrePost = false;
600605
break;
601606

602-
case AArch64::LDPDi:
603607
case AArch64::LDPSWi:
604-
case AArch64::LDPSi:
605608
case AArch64::LDPWi:
606609
case AArch64::LDPXi:
607610
DestRegIdx = 0;
@@ -612,18 +615,18 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
612615

613616
case AArch64::LDPQpost:
614617
case AArch64::LDPQpre:
618+
case AArch64::LDPDpost:
619+
case AArch64::LDPDpre:
620+
case AArch64::LDPSpost:
621+
case AArch64::LDPSpre:
615622
DestRegIdx = -1;
616623
BaseRegIdx = 3;
617624
OffsetIdx = 4;
618625
IsPrePost = true;
619626
break;
620627

621-
case AArch64::LDPDpost:
622-
case AArch64::LDPDpre:
623628
case AArch64::LDPSWpost:
624629
case AArch64::LDPSWpre:
625-
case AArch64::LDPSpost:
626-
case AArch64::LDPSpre:
627630
case AArch64::LDPWpost:
628631
case AArch64::LDPWpre:
629632
case AArch64::LDPXpost:

0 commit comments

Comments
 (0)