@@ -426,28 +426,54 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
426
426
static void applyBitsNotInRegMaskToRegUnitsMask (const TargetRegisterInfo &TRI,
427
427
BitVector &RUs,
428
428
const uint32_t *Mask) {
429
- BitVector ClobberedRUs (TRI.getNumRegUnits (), true );
429
+ // FIXME: This intentionally works in reverse due to some issues with the
430
+ // Register Units infrastructure.
431
+ //
432
+ // This is used to apply callee-saved-register masks to the clobbered regunits
433
+ // mask.
434
+ //
435
+ // The right way to approach this is to start with a BitVector full of ones,
436
+ // then reset all the bits of the regunits of each register that is set in the
437
+ // mask (registers preserved), then OR the resulting bits with the Clobbers
438
+ // mask. This correctly prioritizes the saved registers, so if a RU is shared
439
+ // between a register that is preserved, and one that is NOT preserved, that
440
+ // RU will not be set in the output vector (the clobbers).
441
+ //
442
+ // What we have to do for now is the opposite: we have to assume that the
443
+ // regunits of all registers that are NOT preserved are clobbered, even if
444
+ // those regunits are preserved by another register. So if a RU is shared
445
+ // like described previously, that RU will be set.
446
+ //
447
+ // This is to work around an issue which appears in AArch64, but isn't
448
+ // exclusive to that target: AArch64's Qn registers (128 bits) have Dn
449
+ // register (lower 64 bits). A few Dn registers are preserved by some calling
450
+ // conventions, but Qn and Dn share exactly the same reg units.
451
+ //
452
+ // If we do this the right way, Qn will be marked as NOT clobbered even though
453
+ // its upper 64 bits are NOT preserved. The conservative approach handles this
454
+ // correctly at the cost of some missed optimizations on other targets.
455
+ //
456
+ // This is caused by how RegUnits are handled within TableGen. Ideally, Qn
457
+ // should have an extra RegUnit to model the "unknown" bits not covered by the
458
+ // subregs.
459
+ BitVector RUsFromRegsNotInMask (TRI.getNumRegUnits ());
430
460
const unsigned NumRegs = TRI.getNumRegs ();
431
461
const unsigned MaskWords = (NumRegs + 31 ) / 32 ;
432
462
for (unsigned K = 0 ; K < MaskWords; ++K) {
433
463
const uint32_t Word = Mask[K];
434
- if (!Word)
435
- continue ;
436
-
437
464
for (unsigned Bit = 0 ; Bit < 32 ; ++Bit) {
438
465
const unsigned PhysReg = (K * 32 ) + Bit;
439
466
if (PhysReg == NumRegs)
440
467
break ;
441
468
442
- // Check if we have a valid PhysReg that is set in the mask.
443
- if ((Word >> Bit) & 1 ) {
469
+ if (PhysReg && !((Word >> Bit) & 1 )) {
444
470
for (MCRegUnitIterator RUI (PhysReg, &TRI); RUI.isValid (); ++RUI)
445
- ClobberedRUs. reset (*RUI);
471
+ RUsFromRegsNotInMask. set (*RUI);
446
472
}
447
473
}
448
474
}
449
475
450
- RUs |= ClobberedRUs ;
476
+ RUs |= RUsFromRegsNotInMask ;
451
477
}
452
478
453
479
// / Examine the instruction for potentai LICM candidate. Also
0 commit comments