Skip to content

Commit

Permalink
Update handling of limited register during consecutive registers allo…
Browse files Browse the repository at this point in the history
…cation (#84588)

* Fix issue to handle candidates that don't have assignedInterval in spillcost/prevReg

* Do not let limitRegs reduce the number of candidates

If we find out that there are no candidates free/busy for refPositions
that need consecutive registers, have at least one range of registers
in the candidates such that allocation is possible.

* getConsecutiveCandidates update if freeCandidate=RBM_NONE

Intially, we were just returning RBM_NONE if we don't find any freeCandidates,
but instead should try if we can find out if there are any busy candidates
that we should try them out.

* Relax limitStressRegs for refpositions live at consecutive register position

If consecutive registers are being allocated, other refpositions that are live
at the same location might not have enough registers left to be assigned because
all registers are busy. As such, introduce a way to track if we are assigning
at the location of consecutive registers, and if yes, do not take jitstressregs
limit into account.

* Update minRegCount for registerAssignment

For consecutive register, also include the register count needed for "minimum
register requirement" when limiting the registers.

* Remove LsraLimitFPSetForConsecutive

With other conditions in place, no need to have LsraLimitFPSetForConsecutive.

* Added an assert

* misc changes

* jit format

* Use BitOperations::BitScanForward()

* review feedback
  • Loading branch information
kunalspathak authored Apr 11, 2023
1 parent a6a5dfb commit 7a57f6d
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 125 deletions.
188 changes: 133 additions & 55 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,13 +496,6 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
{
mask |= refPosition->registerAssignment;
}

#ifdef TARGET_ARM64
if ((refPosition != nullptr) && refPosition->isFirstRefPositionOfConsecutiveRegisters())
{
mask |= LsraLimitFPSetForConsecutive;
}
#endif
}

return mask;
Expand Down Expand Up @@ -662,7 +655,9 @@ LinearScan::LinearScan(Compiler* theCompiler)
firstColdLoc = MaxLocation;

#ifdef DEBUG
maxNodeLocation = 0;
maxNodeLocation = 0;
consecutiveRegistersLocation = 0;

activeRefPosition = nullptr;
currBuildNode = nullptr;

Expand Down Expand Up @@ -4901,6 +4896,24 @@ void LinearScan::allocateRegisters()
}
}
prevLocation = currentLocation;
#ifdef TARGET_ARM64

#ifdef DEBUG
if (hasConsecutiveRegister)
{
if (currentRefPosition.needsConsecutive)
{
// track all the refpositions around the location that is also
// allocating consecutive registers.
consecutiveRegistersLocation = currentLocation;
}
else if (consecutiveRegistersLocation < currentLocation)
{
consecutiveRegistersLocation = MinLocation;
}
}
#endif // DEBUG
#endif // TARGET_ARM64

// get previous refposition, then current refpos is the new previous
if (currentReferent != nullptr)
Expand Down Expand Up @@ -11683,49 +11696,53 @@ void LinearScan::RegisterSelection::try_SPILL_COST()
Interval* assignedInterval = spillCandidateRegRecord->assignedInterval;
RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr;

// Can and should the interval in this register be spilled for this one,
// if we don't find a better alternative?
// Can and should the interval in this register be spilled for this one,
// if we don't find a better alternative?

weight_t currentSpillWeight = 0;
#ifdef TARGET_ARM64
if (assignedInterval == nullptr)
{
// Ideally we should not be seeing this candidate because it is not assigned to
// any interval. But based on that, we cannot determine if it is a good spill
// candidate or not. Skip processing it.
continue;
}

if ((recentRefPosition != nullptr) && linearScan->isRefPositionActive(recentRefPosition, thisLocation) &&
(recentRefPosition->needsConsecutive))
{
continue;
}
#endif // TARGET_ARM64

if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == thisLocation) &&
!assignedInterval->getNextRefPosition()->RegOptional())
{
continue;
}
if (!linearScan->isSpillCandidate(currentInterval, refPosition, spillCandidateRegRecord))
else if (assignedInterval != nullptr)
#endif
{
continue;
}
if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == thisLocation) &&
!assignedInterval->getNextRefPosition()->RegOptional())
{
continue;
}
if (!linearScan->isSpillCandidate(currentInterval, refPosition, spillCandidateRegRecord))
{
continue;
}

weight_t currentSpillWeight = 0;
if ((recentRefPosition != nullptr) &&
(recentRefPosition->RegOptional() && !(assignedInterval->isLocalVar && recentRefPosition->IsActualRef())))
{
// We do not "spillAfter" if previous (recent) refPosition was regOptional or if it
// is not an actual ref. In those cases, we will reload in future (next) refPosition.
// For such cases, consider the spill cost of next refposition.
// See notes in "spillInterval()".
RefPosition* reloadRefPosition = assignedInterval->getNextRefPosition();
if (reloadRefPosition != nullptr)
if ((recentRefPosition != nullptr) && (recentRefPosition->RegOptional() &&
!(assignedInterval->isLocalVar && recentRefPosition->IsActualRef())))
{
currentSpillWeight = linearScan->getWeight(reloadRefPosition);
// We do not "spillAfter" if previous (recent) refPosition was regOptional or if it
// is not an actual ref. In those cases, we will reload in future (next) refPosition.
// For such cases, consider the spill cost of next refposition.
// See notes in "spillInterval()".
RefPosition* reloadRefPosition = assignedInterval->getNextRefPosition();
if (reloadRefPosition != nullptr)
{
currentSpillWeight = linearScan->getWeight(reloadRefPosition);
}
}
}
#ifdef TARGET_ARM64
else
{
// Ideally we should not be seeing this candidate because it is not assigned to
// any interval. But it is possible for certain scenarios. One of them is that
// `refPosition` needs consecutive registers and we decided to pick a mix of free+busy
// registers. This candidate is part of that set and is free and hence is not assigned
// to any interval.
}
#endif // TARGET_ARM64

// Only consider spillCost if we were not able to calculate weight of reloadRefPosition.
if (currentSpillWeight == 0)
Expand Down Expand Up @@ -11875,7 +11892,16 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT()
#ifdef DEBUG
// The assigned should be non-null, and should have a recentRefPosition, however since
// this is a heuristic, we don't want a fatal error, so we just assert (not noway_assert).
if (!hasAssignedInterval)
if (!hasAssignedInterval
#ifdef TARGET_ARM64
// We could see a candidate that doesn't have assignedInterval because allocation is
// happening for `refPosition` that needs consecutive registers and we decided to pick
// a mix of free+busy registers. This candidate is part of that set and is free and hence
// is not assigned to any interval.

&& !refPosition->needsConsecutive
#endif
)
{
assert(!"Spill candidate has no assignedInterval recentRefPosition");
}
Expand Down Expand Up @@ -11988,6 +12014,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
*registerScore = NONE;
#endif

#ifdef TARGET_ARM64
assert(!needsConsecutiveRegisters || refPosition->needsConsecutive);
#endif

reset(currentInterval, refPosition);

// process data-structures
Expand Down Expand Up @@ -12036,7 +12066,19 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
}

#ifdef DEBUG
candidates = linearScan->stressLimitRegs(refPosition, candidates);
#ifdef TARGET_ARM64
if (!refPosition->needsConsecutive && (linearScan->consecutiveRegistersLocation == refPosition->nodeLocation))
{
// If a method has consecutive registers and we are assigning to refPositions that are not part
// of consecutive registers, but are live at same location, skip the limit stress for them, because
// there are high chances that many registers are busy for consecutive requirements and we don't
// have enough remaining for other refpositions (like operands).
}
else
#endif
{
candidates = linearScan->stressLimitRegs(refPosition, candidates);
}
#endif
assert(candidates != RBM_NONE);

Expand Down Expand Up @@ -12186,6 +12228,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
}
}

#ifdef DEBUG
regMaskTP inUseOrBusyRegsMask = RBM_NONE;
#endif

// Eliminate candidates that are in-use or busy.
if (!found)
{
Expand All @@ -12195,6 +12241,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation;
candidates &= ~busyRegs;

#ifdef DEBUG
inUseOrBusyRegsMask |= busyRegs;
#endif

// Also eliminate as busy any register with a conflicting fixed reference at this or
// the next location.
// Note that this will eliminate the fixedReg, if any, but we'll add it back below.
Expand All @@ -12210,6 +12260,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
(refPosition->delayRegFree && (checkConflictLocation == (refPosition->nodeLocation + 1))))
{
candidates &= ~checkConflictBit;
#ifdef DEBUG
inUseOrBusyRegsMask |= checkConflictBit;
#endif
}
}
candidates |= fixedRegMask;
Expand All @@ -12226,12 +12279,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
prevRegBit = genRegMask(prevRegRec->regNum);
if ((prevRegRec->assignedInterval == currentInterval) && ((candidates & prevRegBit) != RBM_NONE))
{
#ifdef TARGET_ARM64
// If this is allocating for consecutive register, we need to make sure that
// we allocate register, whose consecutive registers are also free.
if (!needsConsecutiveRegisters)
#endif
{
// If this is allocating for consecutive register, we need to make sure that
// we allocate register, whose consecutive registers are also free.
candidates = prevRegBit;
found = true;
#ifdef DEBUG
Expand All @@ -12245,13 +12296,6 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
prevRegBit = RBM_NONE;
}

if (!found && (candidates == RBM_NONE))
{
assert(refPosition->RegOptional());
currentInterval->assignedReg = nullptr;
return RBM_NONE;
}

// TODO-Cleanup: Previously, the "reverseSelect" stress mode reversed the order of the heuristics.
// It needs to be re-engineered with this refactoring.
// In non-debug builds, this will simply get optimized away
Expand All @@ -12260,9 +12304,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
reverseSelect = linearScan->doReverseSelect();
#endif // DEBUG

#ifdef TARGET_ARM64
if (needsConsecutiveRegisters)
{
#ifdef TARGET_ARM64
regMaskTP busyConsecutiveCandidates = RBM_NONE;
if (refPosition->isFirstRefPositionOfConsecutiveRegisters())
{
Expand All @@ -12287,12 +12331,46 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,

if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE))
{
noway_assert(!"Not sufficient consecutive registers available.");
#ifdef DEBUG
// Need to make sure that candidates has N consecutive registers to assign
if (linearScan->getStressLimitRegs() != LSRA_LIMIT_NONE)
{
// If the refPosition needs consecutive registers, then we want to make sure that
// the candidates have atleast one range of N registers that are consecutive, where N
// is the number of consecutive registers needed.
// Remove the `inUseOrBusyRegsMask` from the original candidates list and find one
// such range that is consecutive. Next, append that range to the `candidates`.
//
regMaskTP limitCandidatesForConsecutive = refPosition->registerAssignment & ~inUseOrBusyRegsMask;
regMaskTP overallLimitCandidates;
regMaskTP limitConsecutiveResult =
linearScan->filterConsecutiveCandidates(limitCandidatesForConsecutive, refPosition->regCount,
&overallLimitCandidates);
assert(limitConsecutiveResult != RBM_NONE);

unsigned startRegister = BitOperations::BitScanForward(limitConsecutiveResult);

regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1;
candidates |= (registersNeededMask << startRegister);
}

if (candidates == RBM_NONE)
#endif // DEBUG
{
noway_assert(!"Not sufficient consecutive registers available.");
}
}
#endif // TARGET_ARM64
}
else
#endif // TARGET_ARM64
{
if (!found && (candidates == RBM_NONE))
{
assert(refPosition->RegOptional());
currentInterval->assignedReg = nullptr;
return RBM_NONE;
}

freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType));
}

Expand Down
9 changes: 5 additions & 4 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -792,9 +792,6 @@ class LinearScan : public LinearScanInterface
#elif defined(TARGET_ARM64)
static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
// LsraLimitFPSetForConsecutive is used for stress mode and gives few extra registers to satisfy
// the requirements for allocating consecutive registers.
static const regMaskTP LsraLimitFPSetForConsecutive = (RBM_V3 | RBM_V5 | RBM_V7);
#elif defined(TARGET_X86)
static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
Expand Down Expand Up @@ -2006,9 +2003,13 @@ class LinearScan : public LinearScanInterface
int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount);
#ifdef TARGET_ARM64
int BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwNode = nullptr);
#endif
#endif // TARGET_ARM64
#endif // FEATURE_HW_INTRINSICS

#ifdef DEBUG
LsraLocation consecutiveRegistersLocation;
#endif // DEBUG

int BuildPutArgStk(GenTreePutArgStk* argNode);
#if FEATURE_ARG_SPLIT
int BuildPutArgSplit(GenTreePutArgSplit* tree);
Expand Down
Loading

0 comments on commit 7a57f6d

Please sign in to comment.