Skip to content

Commit

Permalink
JIT: Allow strength reducing to GCD of IVs (#110222)
Browse files Browse the repository at this point in the history
This adds support for strength reduction to create a new primary IV that
is the GCD of several IVs found in the loop. When the same index is used
to access arrays of different sizes we will often see the IV being
multiplied by different values; however, it is usually still profitable
to strength reduce to the GCD of the step values and then "recover" the
final IV by scaling.

Example:
```csharp
public static void Foo()
{
    string puzzle = "003020600900305001001806400008102900700000008006708200002609500800203009005010300";
    int[] board = new int[81];

    for (int i = 0; i < puzzle.Length; i++)
    {
        board[i] = puzzle[i] - '0';
    }
}
```

Codegen diff for loop:
```diff
        xor      ecx, ecx
+       mov      edx, 81

 G_M24659_IG03:
-       mov      edx, ecx
-       movzx    r8, word  ptr [rbx+2*rdx+0x10]
+       movzx    r8, word  ptr [rbx+rcx+0x10]
        add      r8d, -48
-       mov      dword ptr [rax+4*rdx+0x10], r8d
-       inc      ecx
-       cmp      ecx, 81
-       jl       SHORT G_M24659_IG03
-						;; size=24 bbWeight=3.96 PerfScore 19.80
+       mov      dword ptr [rax+2*rcx+0x10], r8d
+       add      rcx, 2
+       dec      edx
+       jne      SHORT G_M24659_IG03
+						;; size=23 bbWeight=3.96 PerfScore 18.81
```

A similar diff in ``System.Linq.Enumerable+EnumerableSorter`2[System.__Canon,System.Decimal]:ComputeKeys(System.__Canon[],int)``:
```diff
+       xor      edx, edx
 G_M57524_IG05:
-       mov      edx, r15d
-       mov      r8, gword ptr [rbx+8*rdx+0x10]
+       mov      r8, gword ptr [rbx+rdx+0x10]
        vmovups  xmm0, xmmword ptr [r8+0x20]
        vmovups  xmmword ptr [rsp+0x28], xmm0
-       shl      rdx, 4
        vmovups  xmm0, xmmword ptr [rsp+0x28]
-       vmovups  xmmword ptr [r14+rdx+0x10], xmm0
-       inc      r15d
-       cmp      r13d, r15d
-       jg       SHORT G_M57524_IG05
-						;; size=45 bbWeight=75.73 PerfScore 1079.10
+       vmovups  xmmword ptr [r14+2*rdx+0x10], xmm0
+       add      rdx, 8
+       dec      r13d
+       jne      SHORT G_M57524_IG05
+						;; size=39 bbWeight=75.73 PerfScore 1022.31
```

Fix #102068
Fix #105241
  • Loading branch information
jakobbotsch authored and pull[bot] committed Jan 8, 2025
1 parent eb4cb26 commit 2504912
Show file tree
Hide file tree
Showing 4 changed files with 239 additions and 11 deletions.
5 changes: 5 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2140,6 +2140,11 @@ struct GenTree
gtFlags &= ~GTF_MUL_64RSLT;
}

bool IsPartOfAddressMode()
{
return OperIs(GT_ADD, GT_MUL, GT_LSH) && ((gtFlags & GTF_ADDRMODE_NO_CSE) != 0);
}

void SetAllEffectsFlags(GenTree* source)
{
SetAllEffectsFlags(source->gtFlags & GTF_ALL_EFFECT);
Expand Down
241 changes: 232 additions & 9 deletions src/coreclr/jit/inductionvariableopts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ bool Compiler::optWidenPrimaryIV(FlowGraphNaturalLoop* loop,
GenTree* initVal;
if (initToConstant)
{
initVal = gtNewIconNode((int64_t)(uint32_t)startConstant, TYP_LONG);
initVal = gtNewLconNode((int64_t)(uint32_t)startConstant);
}
else
{
Expand Down Expand Up @@ -1376,6 +1376,16 @@ class StrengthReductionContext
void AdvanceCursors(ArrayStack<CursorInfo>* cursors, ArrayStack<CursorInfo>* nextCursors);
void ExpandStoredCursors(ArrayStack<CursorInfo>* cursors, ArrayStack<CursorInfo>* otherCursors);
bool CheckAdvancedCursors(ArrayStack<CursorInfo>* cursors, ScevAddRec** nextIV);
ScevAddRec* ComputeRephrasableIV(ScevAddRec* iv1,
bool allowRephrasingByScalingIV1,
ScevAddRec* iv2,
bool allowRephrasingByScalingIV2);
template <typename T>
ScevAddRec* ComputeRephrasableIVByScaling(ScevAddRec* iv1,
bool allowRephrasingByScalingIV1,
ScevAddRec* iv2,
bool allowRephrasingByScalingIV2);
GenTree* RephraseIV(ScevAddRec* iv, ScevAddRec* sourceIV, GenTree* sourceTree);
bool StaysWithinManagedObject(ArrayStack<CursorInfo>* cursors, ScevAddRec* addRec);
bool TryReplaceUsesWithNewPrimaryIV(ArrayStack<CursorInfo>* cursors, ScevAddRec* iv);
BasicBlock* FindUpdateInsertionPoint(ArrayStack<CursorInfo>* cursors, Statement** afterStmt);
Expand Down Expand Up @@ -1509,6 +1519,10 @@ bool StrengthReductionContext::TryStrengthReduce()
break;
}

JITDUMP(" Next IV is: ");
DBEXEC(VERBOSE, nextIV->Dump(m_comp));
JITDUMP("\n");

assert(nextIV != nullptr);

if (varTypeIsGC(nextIV->Type) && !StaysWithinManagedObject(nextCursors, nextIV))
Expand Down Expand Up @@ -1950,6 +1964,30 @@ void StrengthReductionContext::ExpandStoredCursors(ArrayStack<CursorInfo>* curso
}
}

//------------------------------------------------------------------------
// Gcd: Compute the greatest common divisor of two values.
//
// Parameters:
// a - First value
// b - Second value
//
// Returns:
// Greatest common divisor.
//
template <typename T>
static T Gcd(T a, T b)
{
while (a != 0)
{
T newA = b % a;
T newB = a;
a = newA;
b = newB;
}

return b;
}

//------------------------------------------------------------------------
// CheckAdvancedCursors: Check whether the specified advanced cursors still
// represent a valid set of cursors to introduce a new primary IV for.
Expand All @@ -1963,22 +2001,38 @@ void StrengthReductionContext::ExpandStoredCursors(ArrayStack<CursorInfo>* curso
// True if all cursors still represent a common derived IV and would be
// replacable by a new primary IV computing it.
//
// Remarks:
// This function may remove cursors from m_cursors1 and m_cursors2 if it
// decides to no longer consider some cursors for strength reduction.
//
bool StrengthReductionContext::CheckAdvancedCursors(ArrayStack<CursorInfo>* cursors, ScevAddRec** nextIV)
{
*nextIV = nullptr;
*nextIV = nullptr;
bool allowRephrasingNextIV = true;

for (int i = 0; i < cursors->Height(); i++)
{
CursorInfo& cursor = cursors->BottomRef(i);

if ((cursor.IV != nullptr) && ((*nextIV == nullptr) || Scev::Equals(cursor.IV, *nextIV)))
if (cursor.IV != nullptr)
{
*nextIV = cursor.IV;
continue;
bool allowRephrasingViaScaling = true;
#ifdef TARGET_ARM64
// On arm64 we break address modes if we have to scale, so disallow that.
allowRephrasingViaScaling = !cursor.Tree->IsPartOfAddressMode();
#endif

if (*nextIV == nullptr)
{
*nextIV = cursor.IV;
allowRephrasingNextIV = allowRephrasingViaScaling;
continue;
}

ScevAddRec* rephrasableAddRec =
ComputeRephrasableIV(cursor.IV, allowRephrasingViaScaling, *nextIV, allowRephrasingNextIV);
if (rephrasableAddRec != nullptr)
{
*nextIV = rephrasableAddRec;
allowRephrasingNextIV &= allowRephrasingViaScaling;
continue;
}
}

JITDUMP(" [%d] does not match; will not advance\n", i);
Expand All @@ -1988,6 +2042,174 @@ bool StrengthReductionContext::CheckAdvancedCursors(ArrayStack<CursorInfo>* curs
return *nextIV != nullptr;
}

//------------------------------------------------------------------------
// ComputeRephrasableIVWByScaling:
// Compute an IV that both "iv1" and "iv2" can be rephrased in terms of via
// scaling, assuming their step values do not match.
//
// Parameters:
// iv1 - First IV
// iv2 - Second IV
//
// Returns:
// The IV, or nullptr if no IV could be computed.
//
template <typename T>
ScevAddRec* StrengthReductionContext::ComputeRephrasableIVByScaling(ScevAddRec* iv1,
bool allowRephrasingByScalingIV1,
ScevAddRec* iv2,
bool allowRephrasingByScalingIV2)
{
// To rephrase the IVs we will need to scale them up. This requires the
// start value to be 0 since that starting value will be scaled too.
int64_t start;
if (!iv1->Start->GetConstantValue(m_comp, &start) || ((T)start != 0) ||
!iv2->Start->GetConstantValue(m_comp, &start) || ((T)start != 0))
{
return nullptr;
}

int64_t iv1Step;
int64_t iv2Step;
if (!iv1->Step->GetConstantValue(m_comp, &iv1Step) || !iv2->Step->GetConstantValue(m_comp, &iv2Step))
{
return nullptr;
}

T gcd = Gcd((T)iv1Step, (T)iv2Step);

if ((!allowRephrasingByScalingIV1 && (gcd != (T)iv1Step)) || (!allowRephrasingByScalingIV2 && (gcd != (T)iv2Step)))
{
return nullptr;
}

// Commonly one step value divides the other.
if (gcd == (T)iv1Step)
{
return iv1;
}
if (gcd == (T)iv2Step)
{
return iv2;
}
if ((gcd == 1) || (gcd == -1))
{
return nullptr;
}

return m_scevContext.NewAddRec(iv1->Start, m_scevContext.NewConstant(iv1->Type, gcd));
}

//------------------------------------------------------------------------
// ComputeRephrasableIV:
// Compute an IV that both "iv1" and "iv2" can be rephrased in terms of.
//
// Parameters:
// iv1 - First IV
// allowRephrasingByScalingIV1 - Whether we should allow rephrasing IV1 by scaling.
// iv2 - Second IV
// allowRephrasingByScalingIV2 - Whether we should allow rephrasing IV2 by scaling.
//
// Returns:
// The IV, or nullptr if no IV could be computed.
//
ScevAddRec* StrengthReductionContext::ComputeRephrasableIV(ScevAddRec* iv1,
bool allowRephrasingByScalingIV1,
ScevAddRec* iv2,
bool allowRephrasingByScalingIV2)
{
if (!Scev::Equals(iv1->Start, iv2->Start))
{
return nullptr;
}

if (Scev::Equals(iv1->Step, iv2->Step))
{
return iv1;
}

// Steps are not equal. However, if they have gcd > 1 it is still expected
// to be profitable to rewrite in terms of such a new IV.
if (iv1->Type == TYP_INT)
{
return ComputeRephrasableIVByScaling<int32_t>(iv1, allowRephrasingByScalingIV1, iv2,
allowRephrasingByScalingIV2);
}

if (iv1->Type == TYP_LONG)
{
return ComputeRephrasableIVByScaling<int64_t>(iv1, allowRephrasingByScalingIV1, iv2,
allowRephrasingByScalingIV2);
}

return nullptr;
}

//------------------------------------------------------------------------
// RephraseIV:
// Given an IV and a source IV with a tree that computes that source IV,
// compute a tree that calculates "iv" based on the source IV. Requires the
// source IV to have been computed via ComputeRephrasableIV.
//
// Parameters:
// iv - IV to rephrase in terms of the source IV
// sourceIV - Source IV
// sourceTree - Tree computing the source IV
//
// Returns:
// A tree computing "iv" via "sourceTree".
//
GenTree* StrengthReductionContext::RephraseIV(ScevAddRec* iv, ScevAddRec* sourceIV, GenTree* sourceTree)
{
assert(Scev::Equals(iv->Start, sourceIV->Start));

if (Scev::Equals(iv->Step, sourceIV->Step))
{
return sourceTree;
}

int64_t ivStep = 0;
int64_t sourceIVStep = 0;
if (!iv->Step->GetConstantValue(m_comp, &ivStep) || !sourceIV->Step->GetConstantValue(m_comp, &sourceIVStep))
{
unreached();
}

assert(iv->Type == sourceIV->Type);

if (iv->Type == TYP_INT)
{
assert((int32_t)ivStep % (int32_t)sourceIVStep == 0);
int32_t scale = (int32_t)ivStep / (int32_t)sourceIVStep;
if (isPow2(scale))
{
return m_comp->gtNewOperNode(GT_LSH, TYP_INT, sourceTree,
m_comp->gtNewIconNode(BitOperations::Log2((uint32_t)scale)));
}
else
{
return m_comp->gtNewOperNode(GT_MUL, TYP_INT, sourceTree, m_comp->gtNewIconNode(scale));
}
}

if (iv->Type == TYP_LONG)
{
assert(ivStep % sourceIVStep == 0);
int64_t scale = ivStep / sourceIVStep;
if (isPow2(scale))
{
return m_comp->gtNewOperNode(GT_LSH, TYP_LONG, sourceTree,
m_comp->gtNewLconNode(BitOperations::Log2((uint64_t)scale)));
}
else
{
return m_comp->gtNewOperNode(GT_MUL, TYP_LONG, sourceTree, m_comp->gtNewLconNode(scale));
}
}

unreached();
}

//------------------------------------------------------------------------
// StaysWithinManagedObject: Check whether the specified GC-pointer add-rec can
// be guaranteed to be inside the same managed object for the whole loop.
Expand Down Expand Up @@ -2211,6 +2433,7 @@ bool StrengthReductionContext::TryReplaceUsesWithNewPrimaryIV(ArrayStack<CursorI
{
CursorInfo& cursor = cursors->BottomRef(i);
GenTree* newUse = m_comp->gtNewLclVarNode(newPrimaryIV, iv->Type);
newUse = RephraseIV(cursor.IV, iv, newUse);

JITDUMP(" Replacing use [%06u] with [%06u]. Before:\n", Compiler::dspTreeID(cursor.Tree),
Compiler::dspTreeID(newUse));
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3402,7 +3402,7 @@ void Compiler::fgMoveOpsLeft(GenTree* tree)
}

// Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
if (tree->IsPartOfAddressMode())
{
return;
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/optcse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1825,7 +1825,7 @@ bool CSE_HeuristicCommon::CanConsiderTree(GenTree* tree, bool isReturn)
case GT_ADD: // Check for ADDRMODE flag on these Binary Operators
case GT_MUL:
case GT_LSH:
if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)
if (tree->IsPartOfAddressMode())
{
return false;
}
Expand Down

0 comments on commit 2504912

Please sign in to comment.