Skip to content

Commit a81f4bc

Browse files
authored
ARM64 - Optimizing a % b operations part 2 (#66407)
* Initial work for ARM64 mod optimization * Updated comment * Updated comment * Updated comment * Fixing build * Remove uneeded var * Use '%' morph logic for both x64/arm64 * Adding back in divisor check for x64 * Formatting * Update comments * Update comments * Fixing * Updated comment * Updated comment * Tweaking x64 transformation logic for the mod opt * Tweaking x64 transformation logic for the mod opt * Using IntCon * Fixed build * Minor tweak * Fixing x64 diffs * Removing flag set * Optimizing signed 'mod' for ARM64 when op2 is pow2 * Updated comments * Updated comments * Updated comments * Updated comments * Updated comments * Updated comments * Feedback * Merging and fixing test failure by lowering node rather than marking it as contained * Fixing build * Feedback * minor updates * minor updates * minor updates * Fixing tests * Fixing tests * Fixing tests * Formatting * Fixing tests * Using AsIntConCommonet()->IntegralValue() * Feedback * Fixing build * Fixing tests * Fixing tests * Fixing tests * Adding tests * Formatting * Renamed GT_CS_NEG_MI to GT_CSNEG_MI * Updated comments, fixed test * Formatting * Update codegenarm64.cpp * Do not use IsIntegralConstAbsPow2 for the check. Use IsIntegralConstPow2 * Update comment
1 parent 3b6a539 commit a81f4bc

File tree

11 files changed

+253
-10
lines changed

11 files changed

+253
-10
lines changed

src/coreclr/jit/codegen.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,6 +1381,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13811381
void genCodeForMsub(GenTreeOp* tree);
13821382
void genCodeForBfiz(GenTreeOp* tree);
13831383
void genCodeForAddEx(GenTreeOp* tree);
1384+
void genCodeForCond(GenTreeOp* tree);
13841385
#endif // TARGET_ARM64
13851386

13861387
#if defined(FEATURE_EH_FUNCLETS)

src/coreclr/jit/codegenarm64.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3039,6 +3039,18 @@ void CodeGen::genCodeForNegNot(GenTree* tree)
30393039
regNumber targetReg = tree->GetRegNum();
30403040
instruction ins = genGetInsForOper(tree->OperGet(), targetType);
30413041

3042+
if ((tree->gtFlags & GTF_SET_FLAGS) != 0)
3043+
{
3044+
switch (tree->OperGet())
3045+
{
3046+
case GT_NEG:
3047+
ins = INS_negs;
3048+
break;
3049+
default:
3050+
noway_assert(!"Unexpected UnaryOp with GTF_SET_FLAGS set");
3051+
}
3052+
}
3053+
30423054
// The arithmetic node must be sitting in a register (since it's not contained)
30433055
assert(!tree->isContained());
30443056
// The dst can only be a register.
@@ -10190,4 +10202,40 @@ void CodeGen::genCodeForAddEx(GenTreeOp* tree)
1019010202
genProduceReg(tree);
1019110203
}
1019210204

10205+
//------------------------------------------------------------------------
10206+
// genCodeForCond: Generates the code sequence for a GenTree node that
10207+
// represents a conditional instruction.
10208+
//
10209+
// Arguments:
10210+
// tree - conditional op
10211+
//
10212+
void CodeGen::genCodeForCond(GenTreeOp* tree)
10213+
{
10214+
assert(tree->OperIs(GT_CSNEG_MI));
10215+
assert(!(tree->gtFlags & GTF_SET_FLAGS) && (tree->gtFlags & GTF_USE_FLAGS));
10216+
genConsumeOperands(tree);
10217+
10218+
instruction ins;
10219+
insCond cond;
10220+
switch (tree->OperGet())
10221+
{
10222+
case GT_CSNEG_MI:
10223+
{
10224+
ins = INS_csneg;
10225+
cond = INS_COND_MI;
10226+
break;
10227+
}
10228+
10229+
default:
10230+
unreached();
10231+
}
10232+
10233+
regNumber dstReg = tree->GetRegNum();
10234+
regNumber op1Reg = tree->gtGetOp1()->GetRegNum();
10235+
regNumber op2Reg = tree->gtGetOp2()->GetRegNum();
10236+
10237+
GetEmitter()->emitIns_R_R_R_COND(ins, emitActualTypeSize(tree), dstReg, op1Reg, op2Reg, cond);
10238+
genProduceReg(tree);
10239+
}
10240+
1019310241
#endif // TARGET_ARM64

src/coreclr/jit/codegenarmarch.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
329329
case GT_BFIZ:
330330
genCodeForBfiz(treeNode->AsOp());
331331
break;
332+
333+
case GT_CSNEG_MI:
334+
genCodeForCond(treeNode->AsOp());
335+
break;
332336
#endif // TARGET_ARM64
333337

334338
case GT_JMP:
@@ -550,14 +554,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
550554

551555
default:
552556
{
553-
#ifdef DEBUG
554-
char message[256];
555-
_snprintf_s(message, ArrLen(message), _TRUNCATE, "NYI: Unimplemented node type %s",
556-
GenTree::OpName(treeNode->OperGet()));
557-
NYIRAW(message);
558-
#else
559-
NYI("unimplemented node");
560-
#endif
557+
unreached();
561558
}
562559
break;
563560
}

src/coreclr/jit/gentree.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2075,6 +2075,8 @@ struct GenTree
20752075

20762076
inline bool IsIntegralConst() const;
20772077

2078+
inline bool IsIntegralConstPow2() const;
2079+
20782080
inline bool IsIntegralConstUnsignedPow2() const;
20792081

20802082
inline bool IsIntegralConstAbsPow2() const;
@@ -8454,6 +8456,24 @@ inline bool GenTree::IsIntegralConst() const
84548456
#endif // !TARGET_64BIT
84558457
}
84568458

8459+
//-------------------------------------------------------------------------
8460+
// IsIntegralConstPow2: Determines whether an integral constant is
8461+
// the power of 2.
8462+
//
8463+
// Return Value:
8464+
// Returns true if the GenTree's integral constant
8465+
// is the power of 2.
8466+
//
8467+
inline bool GenTree::IsIntegralConstPow2() const
8468+
{
8469+
if (IsIntegralConst())
8470+
{
8471+
return isPow2(AsIntConCommon()->IntegralValue());
8472+
}
8473+
8474+
return false;
8475+
}
8476+
84578477
//-------------------------------------------------------------------------
84588478
// IsIntegralConstUnsignedPow2: Determines whether the unsigned value of
84598479
// an integral constant is the power of 2.

src/coreclr/jit/gtlist.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ GTNODE(MSUB , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Generat
227227
// enabling it for both armarch and xarch for floating-point MSUB "unsafe" math.
228228
GTNODE(ADDEX, GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Add with sign/zero extension.
229229
GTNODE(BFIZ , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Bitfield Insert in Zero.
230+
GTNODE(CSNEG_MI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Conditional select, negate, minus result
230231
#endif
231232

232233
//-----------------------------------------------------------------------------

src/coreclr/jit/lower.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5861,6 +5861,10 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
58615861
assert(!"unreachable: integral GT_DIV/GT_MOD should get morphed into helper calls");
58625862
#endif // USE_HELPERS_FOR_INT_DIV
58635863
#if defined(TARGET_ARM64)
5864+
if (divMod->OperIs(GT_MOD) && divisor->IsIntegralConstPow2())
5865+
{
5866+
return LowerModPow2(node);
5867+
}
58645868
assert(node->OperGet() != GT_MOD);
58655869
#endif // TARGET_ARM64
58665870

src/coreclr/jit/lower.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,7 @@ class Lowering final : public Phase
353353
#elif defined(TARGET_ARM64)
354354
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
355355
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
356+
GenTree* LowerModPow2(GenTree* node);
356357
#endif // !TARGET_XARCH && !TARGET_ARM64
357358

358359
union VectorConstant {

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,95 @@ void Lowering::LowerRotate(GenTree* tree)
610610
ContainCheckShiftRotate(tree->AsOp());
611611
}
612612

613+
#ifdef TARGET_ARM64
614+
//------------------------------------------------------------------------
615+
// LowerModPow2: Lower GT_MOD if the second operand is a constant power of 2.
616+
//
617+
// Arguments:
618+
// tree - the node to lower
619+
//
620+
// Return Value:
621+
// A new tree node if it changed.
622+
//
623+
// Notes:
624+
// {expr} % {cns}
625+
// Logically turns into:
626+
// let a = {expr}
627+
// if a > 0 then (a & ({cns} - 1)) else -(-a & ({cns} - 1))
628+
// which then turns into:
629+
// and reg1, reg0, #({cns} - 1)
630+
// negs reg0, reg0
631+
// and reg0, reg0, #({cns} - 1)
632+
// csneg reg0, reg1, reg0, mi
633+
// TODO: We could do this optimization in morph but we do not have
634+
// a conditional select op in HIR. At some point, we may
635+
// introduce such an op.
636+
GenTree* Lowering::LowerModPow2(GenTree* node)
637+
{
638+
assert(node->OperIs(GT_MOD));
639+
GenTree* mod = node;
640+
GenTree* dividend = mod->gtGetOp1();
641+
GenTree* divisor = mod->gtGetOp2();
642+
643+
assert(divisor->IsIntegralConstPow2());
644+
645+
const var_types type = mod->TypeGet();
646+
assert((type == TYP_INT) || (type == TYP_LONG));
647+
648+
LIR::Use use;
649+
if (!BlockRange().TryGetUse(node, &use))
650+
{
651+
return nullptr;
652+
}
653+
654+
ssize_t cnsValue = static_cast<ssize_t>(divisor->AsIntConCommon()->IntegralValue()) - 1;
655+
656+
BlockRange().Remove(divisor);
657+
658+
// We need to use the dividend node multiple times so its value needs to be
659+
// computed once and stored in a temp variable.
660+
LIR::Use opDividend(BlockRange(), &mod->AsOp()->gtOp1, mod);
661+
dividend = ReplaceWithLclVar(opDividend);
662+
663+
GenTree* dividend2 = comp->gtClone(dividend);
664+
BlockRange().InsertAfter(dividend, dividend2);
665+
666+
GenTreeIntCon* cns = comp->gtNewIconNode(cnsValue, type);
667+
BlockRange().InsertAfter(dividend2, cns);
668+
669+
GenTree* const trueExpr = comp->gtNewOperNode(GT_AND, type, dividend, cns);
670+
BlockRange().InsertAfter(cns, trueExpr);
671+
LowerNode(trueExpr);
672+
673+
GenTree* const neg = comp->gtNewOperNode(GT_NEG, type, dividend2);
674+
neg->gtFlags |= GTF_SET_FLAGS;
675+
BlockRange().InsertAfter(trueExpr, neg);
676+
677+
GenTreeIntCon* cns2 = comp->gtNewIconNode(cnsValue, type);
678+
BlockRange().InsertAfter(neg, cns2);
679+
680+
GenTree* const falseExpr = comp->gtNewOperNode(GT_AND, type, neg, cns2);
681+
BlockRange().InsertAfter(cns2, falseExpr);
682+
LowerNode(falseExpr);
683+
684+
GenTree* const cc = comp->gtNewOperNode(GT_CSNEG_MI, type, trueExpr, falseExpr);
685+
cc->gtFlags |= GTF_USE_FLAGS;
686+
687+
JITDUMP("Lower: optimize X MOD POW2");
688+
DISPNODE(mod);
689+
JITDUMP("to:\n");
690+
DISPNODE(cc);
691+
692+
BlockRange().InsertBefore(mod, cc);
693+
ContainCheckNode(cc);
694+
BlockRange().Remove(mod);
695+
696+
use.ReplaceWith(cc);
697+
698+
return cc->gtNext;
699+
}
700+
#endif
701+
613702
#ifdef FEATURE_HW_INTRINSICS
614703

615704
//----------------------------------------------------------------------------------------------
@@ -1723,7 +1812,7 @@ void Lowering::ContainCheckMul(GenTreeOp* node)
17231812
//
17241813
void Lowering::ContainCheckDivOrMod(GenTreeOp* node)
17251814
{
1726-
assert(node->OperIs(GT_DIV, GT_UDIV));
1815+
assert(node->OperIs(GT_DIV, GT_UDIV, GT_MOD));
17271816

17281817
// ARM doesn't have a div instruction with an immediate operand
17291818
}

src/coreclr/jit/morph.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11652,7 +11652,10 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
1165211652
}
1165311653
// ARM64 architecture manual suggests this transformation
1165411654
// for the mod operator.
11655-
else
11655+
// However, we do skip this optimization for ARM64 if the second operand
11656+
// is an integral constant power of 2 because there is an even better
11657+
// optimization in lowering that is specific for ARM64.
11658+
else if (!(tree->OperIs(GT_MOD) && op2->IsIntegralConstPow2()))
1165611659
#else
1165711660
// XARCH only applies this transformation if we know
1165811661
// that magic division will be used - which is determined
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Runtime.CompilerServices;
6+
7+
class Program
8+
{
9+
[MethodImpl(MethodImplOptions.NoInlining)]
10+
static uint PerformMod_1(uint i)
11+
{
12+
return i % 8;
13+
}
14+
15+
[MethodImpl(MethodImplOptions.NoInlining)]
16+
static int PerformMod_2(int i)
17+
{
18+
return i % 16;
19+
}
20+
21+
[MethodImpl(MethodImplOptions.NoInlining)]
22+
static int PerformMod_3(int i, int j)
23+
{
24+
return i % j;
25+
}
26+
27+
[MethodImpl(MethodImplOptions.NoInlining)]
28+
static int MSUB(int a, int b, int c)
29+
{
30+
return a - b * c;
31+
}
32+
33+
static int Main(string[] args)
34+
{
35+
var result = 100;
36+
37+
if (PerformMod_1(23) != 7)
38+
{
39+
result = -1;
40+
Console.WriteLine("Failed Mod1!");
41+
}
42+
43+
if (PerformMod_2(-23) != -7)
44+
{
45+
result = -1;
46+
Console.WriteLine("Failed Mod2!");
47+
}
48+
49+
if (PerformMod_3(23, 8) != 7)
50+
{
51+
result = -1;
52+
Console.WriteLine("Failed Mod3!");
53+
}
54+
55+
if (MSUB(3, 7, 8) != -53)
56+
{
57+
result = -1;
58+
Console.WriteLine("Failed MSUB");
59+
}
60+
61+
return result;
62+
}
63+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<OutputType>Exe</OutputType>
4+
</PropertyGroup>
5+
<PropertyGroup>
6+
<DebugType />
7+
<Optimize>True</Optimize>
8+
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
9+
</PropertyGroup>
10+
<ItemGroup>
11+
<Compile Include="$(MSBuildProjectName).cs" />
12+
</ItemGroup>
13+
<ItemGroup>
14+
<ProjectReference Include="$(TestSourceDir)Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj" />
15+
</ItemGroup>
16+
</Project>

0 commit comments

Comments
 (0)