Skip to content

Commit 8273939

Browse files
Implement GT_AND_NOT for ARM/ARM64 (#59881)
* Move late arithmetic to its own section in gtlist * Implement AND_NOT for AAarch * Delete the unnecessary platfrom-specific methods They were a leftover from some previous work. * Mention the SIMD origins of AND_NOT
1 parent b344bb5 commit 8273939

File tree

9 files changed

+93
-28
lines changed

9 files changed

+93
-28
lines changed

src/coreclr/jit/codegenarm.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,8 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
313313
var_types targetType = treeNode->TypeGet();
314314
emitter* emit = GetEmitter();
315315

316-
assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_ADD_LO || oper == GT_ADD_HI ||
317-
oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_OR || oper == GT_XOR || oper == GT_AND);
316+
assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_ADD_LO, GT_ADD_HI, GT_SUB_LO, GT_SUB_HI, GT_OR, GT_XOR, GT_AND,
317+
GT_AND_NOT));
318318

319319
GenTree* op1 = treeNode->gtGetOp1();
320320
GenTree* op2 = treeNode->gtGetOp2();
@@ -671,6 +671,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
671671
case GT_AND:
672672
ins = INS_AND;
673673
break;
674+
case GT_AND_NOT:
675+
ins = INS_bic;
676+
break;
674677
case GT_MUL:
675678
ins = INS_MUL;
676679
break;

src/coreclr/jit/codegenarm64.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,7 +1817,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
18171817
genProduceReg(treeNode);
18181818
}
18191819

1820-
// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR
1820+
// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, AND_NOT, OR and XOR
18211821
// This method is expected to have called genConsumeOperands() before calling it.
18221822
void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
18231823
{
@@ -1826,8 +1826,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
18261826
var_types targetType = treeNode->TypeGet();
18271827
emitter* emit = GetEmitter();
18281828

1829-
assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND ||
1830-
oper == GT_OR || oper == GT_XOR);
1829+
assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_DIV, GT_UDIV, GT_AND, GT_AND_NOT, GT_OR, GT_XOR));
18311830

18321831
GenTree* op1 = treeNode->gtGetOp1();
18331832
GenTree* op2 = treeNode->gtGetOp2();
@@ -1846,6 +1845,9 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
18461845
case GT_AND:
18471846
ins = INS_ands;
18481847
break;
1848+
case GT_AND_NOT:
1849+
ins = INS_bics;
1850+
break;
18491851
default:
18501852
noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set");
18511853
}
@@ -3119,6 +3121,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
31193121
case GT_AND:
31203122
ins = INS_and;
31213123
break;
3124+
case GT_AND_NOT:
3125+
ins = INS_bic;
3126+
break;
31223127
case GT_DIV:
31233128
ins = INS_sdiv;
31243129
break;

src/coreclr/jit/codegenarmarch.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
213213
case GT_OR:
214214
case GT_XOR:
215215
case GT_AND:
216+
case GT_AND_NOT:
216217
assert(varTypeIsIntegralOrI(treeNode));
217218

218219
FALLTHROUGH;

src/coreclr/jit/emitarm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8095,7 +8095,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
80958095
if (dst->gtSetFlags())
80968096
{
80978097
assert((ins == INS_add) || (ins == INS_adc) || (ins == INS_sub) || (ins == INS_sbc) || (ins == INS_and) ||
8098-
(ins == INS_orr) || (ins == INS_eor) || (ins == INS_orn));
8098+
(ins == INS_orr) || (ins == INS_eor) || (ins == INS_orn) || (ins == INS_bic));
80998099
flags = INS_FLAGS_SET;
81008100
}
81018101

src/coreclr/jit/gtlist.h

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -126,32 +126,12 @@ GTNODE(UMOD , GenTreeOp ,0,GTK_BINOP)
126126
GTNODE(OR , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP))
127127
GTNODE(XOR , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP))
128128
GTNODE(AND , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP))
129-
GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP)
130129

131130
GTNODE(LSH , GenTreeOp ,0,GTK_BINOP)
132131
GTNODE(RSH , GenTreeOp ,0,GTK_BINOP)
133132
GTNODE(RSZ , GenTreeOp ,0,GTK_BINOP)
134133
GTNODE(ROL , GenTreeOp ,0,GTK_BINOP)
135134
GTNODE(ROR , GenTreeOp ,0,GTK_BINOP)
136-
GTNODE(INC_SATURATE , GenTreeOp ,0,GTK_UNOP) // saturating increment, used in division by a constant (LowerUnsignedDivOrMod)
137-
138-
// Returns high bits (top N bits of the 2N bit result of an NxN multiply)
139-
// GT_MULHI is used in division by a constant (LowerUnsignedDivOrMod). We turn
140-
// the div into a MULHI + some adjustments. In codegen, we only use the
141-
// results of the high register, and we drop the low results.
142-
GTNODE(MULHI , GenTreeOp ,1,GTK_BINOP)
143-
144-
// A mul that returns the 2N bit result of an NxN multiply. This op is used for
145-
// multiplies that take two ints and return a long result. For 32 bit targets,
146-
// all other multiplies with long results are morphed into helper calls.
147-
// It is similar to GT_MULHI, the difference being that GT_MULHI drops the lo
148-
// part of the result, whereas GT_MUL_LONG keeps both parts of the result.
149-
// MUL_LONG is also used on ARM64, where 64 bit multiplication is more expensive.
150-
#if !defined(TARGET_64BIT)
151-
GTNODE(MUL_LONG , GenTreeMultiRegOp ,1,GTK_BINOP)
152-
#elif defined(TARGET_ARM64)
153-
GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP)
154-
#endif
155135

156136
GTNODE(ASG , GenTreeOp ,0,(GTK_BINOP|GTK_NOTLIR))
157137
GTNODE(EQ , GenTreeOp ,0,(GTK_BINOP|GTK_RELOP))
@@ -220,6 +200,31 @@ GTNODE(SIMD , GenTreeSIMD ,0,(GTK_BINOP|GTK_EXOP)) // SIM
220200
GTNODE(HWINTRINSIC , GenTreeHWIntrinsic ,0,(GTK_BINOP|GTK_EXOP)) // hardware intrinsics
221201
#endif // FEATURE_HW_INTRINSICS
222202

203+
//-----------------------------------------------------------------------------
204+
// Backend-specific arithmetic nodes:
205+
//-----------------------------------------------------------------------------
206+
207+
GTNODE(INC_SATURATE , GenTreeOp ,0,GTK_UNOP) // saturating increment, used in division by a constant (LowerUnsignedDivOrMod)
208+
209+
// Returns high bits (top N bits of the 2N bit result of an NxN multiply)
210+
// GT_MULHI is used in division by a constant (LowerUnsignedDivOrMod). We turn
211+
// the div into a MULHI + some adjustments. In codegen, we only use the
212+
// results of the high register, and we drop the low results.
213+
GTNODE(MULHI , GenTreeOp ,1,GTK_BINOP)
214+
215+
// A mul that returns the 2N bit result of an NxN multiply. This op is used for
216+
// multiplies that take two ints and return a long result. For 32 bit targets,
217+
// all other multiplies with long results are morphed into helper calls.
218+
// It is similar to GT_MULHI, the difference being that GT_MULHI drops the lo
219+
// part of the result, whereas GT_MUL_LONG keeps both parts of the result.
220+
// MUL_LONG is also used on ARM64, where 64 bit multiplication is more expensive.
221+
#if !defined(TARGET_64BIT)
222+
GTNODE(MUL_LONG , GenTreeMultiRegOp ,1,GTK_BINOP)
223+
#elif defined(TARGET_ARM64)
224+
GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP)
225+
#endif
226+
// AndNot - emitted on ARM/ARM64 as the BIC instruction. Also used for creating AndNot HWINTRINSIC vector nodes in a cross-ISA manner.
227+
GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP)
223228
//-----------------------------------------------------------------------------
224229
// LIR specific compare and conditional branch/set nodes:
225230
//-----------------------------------------------------------------------------

src/coreclr/jit/lower.cpp

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
139139
case GT_AND:
140140
case GT_OR:
141141
case GT_XOR:
142-
ContainCheckBinary(node->AsOp());
143-
break;
142+
return LowerBinaryArithmetic(node->AsOp());
144143

145144
case GT_MUL:
146145
case GT_MULHI:
@@ -5104,6 +5103,55 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
51045103
return nullptr;
51055104
}
51065105

5106+
//------------------------------------------------------------------------
5107+
// LowerBinaryArithmetic: lowers the given binary arithmetic node.
5108+
//
5109+
// Recognizes opportunities for using target-independent "combined" nodes
5110+
// (currently AND_NOT on ARMArch). Performs containment checks.
5111+
//
5112+
// Arguments:
5113+
// node - the arithmetic node to lower
5114+
//
5115+
// Returns:
5116+
// The next node to lower.
5117+
//
5118+
GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node)
5119+
{
5120+
// TODO-CQ-XArch: support BMI2 "andn" in codegen and condition
5121+
// this logic on the support for the instruction set on XArch.
5122+
CLANG_FORMAT_COMMENT_ANCHOR;
5123+
5124+
#ifdef TARGET_ARMARCH
5125+
if (comp->opts.OptimizationEnabled() && node->OperIs(GT_AND))
5126+
{
5127+
GenTree* opNode = nullptr;
5128+
GenTree* notNode = nullptr;
5129+
if (node->gtGetOp1()->OperIs(GT_NOT))
5130+
{
5131+
notNode = node->gtGetOp1();
5132+
opNode = node->gtGetOp2();
5133+
}
5134+
else if (node->gtGetOp2()->OperIs(GT_NOT))
5135+
{
5136+
notNode = node->gtGetOp2();
5137+
opNode = node->gtGetOp1();
5138+
}
5139+
5140+
if (notNode != nullptr)
5141+
{
5142+
node->gtOp1 = opNode;
5143+
node->gtOp2 = notNode->AsUnOp()->gtGetOp1();
5144+
node->ChangeOper(GT_AND_NOT);
5145+
BlockRange().Remove(notNode);
5146+
}
5147+
}
5148+
#endif // TARGET_ARMARCH
5149+
5150+
ContainCheckBinary(node);
5151+
5152+
return node->gtNext;
5153+
}
5154+
51075155
//------------------------------------------------------------------------
51085156
// LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node.
51095157
//

src/coreclr/jit/lower.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ class Lowering final : public Phase
297297
void LowerStoreIndir(GenTreeStoreInd* node);
298298
GenTree* LowerAdd(GenTreeOp* node);
299299
GenTree* LowerMul(GenTreeOp* mul);
300+
GenTree* LowerBinaryArithmetic(GenTreeOp* node);
300301
bool LowerUnsignedDivOrMod(GenTreeOp* divMod);
301302
GenTree* LowerConstIntDivOrMod(GenTree* node);
302303
GenTree* LowerSignedDivOrMod(GenTree* node);

src/coreclr/jit/lsraarm.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ int LinearScan::BuildNode(GenTree* tree)
368368
FALLTHROUGH;
369369

370370
case GT_AND:
371+
case GT_AND_NOT:
371372
case GT_OR:
372373
case GT_XOR:
373374
case GT_LSH:

src/coreclr/jit/lsraarm64.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ int LinearScan::BuildNode(GenTree* tree)
268268
FALLTHROUGH;
269269

270270
case GT_AND:
271+
case GT_AND_NOT:
271272
case GT_OR:
272273
case GT_XOR:
273274
case GT_LSH:

0 commit comments

Comments
 (0)