Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit da6ed11

Browse files
authored
Handle addressing modes for HW intrinsics (#22944)
* Handle addressing modes for HW intrinsics Also, eliminate some places where the code size estimates were over-estimating. Contribute to #19550 Fix #19521
1 parent aa072b6 commit da6ed11

17 files changed

+650
-247
lines changed

src/jit/codegen.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class CodeGen : public CodeGenInterface
9595
static bool genShouldRoundFP();
9696

9797
GenTreeIndir indirForm(var_types type, GenTree* base);
98+
GenTreeStoreInd storeIndirForm(var_types type, GenTree* base, GenTree* data);
9899

99100
GenTreeIntCon intForm(var_types type, ssize_t value);
100101

@@ -1040,6 +1041,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10401041

10411042
void genConsumeRegs(GenTree* tree);
10421043
void genConsumeOperands(GenTreeOp* tree);
1044+
#ifdef FEATURE_HW_INTRINSICS
1045+
void genConsumeHWIntrinsicOperands(GenTreeHWIntrinsic* tree);
1046+
#endif // FEATURE_HW_INTRINSICS
10431047
void genEmitGSCookieCheck(bool pushReg);
10441048
void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFlags flags = INS_FLAGS_DONT_CARE);
10451049
void genCodeForShift(GenTree* tree);
@@ -1309,6 +1313,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13091313

13101314
#if defined(_TARGET_XARCH_)
13111315
void inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival);
1316+
void inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival);
13121317
#endif
13131318

13141319
void inst_RV_RR(instruction ins, emitAttr size, regNumber reg1, regNumber reg2);

src/jit/codegencommon.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11316,6 +11316,17 @@ GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
1131611316
return i;
1131711317
}
1131811318

11319+
//------------------------------------------------------------------------
11320+
// indirForm: Make a temporary indir we can feed to pattern matching routines
11321+
// in cases where we don't want to instantiate all the indirs that happen.
11322+
//
11323+
GenTreeStoreInd CodeGen::storeIndirForm(var_types type, GenTree* base, GenTree* data)
11324+
{
11325+
GenTreeStoreInd i(type, base, data);
11326+
i.gtRegNum = REG_NA;
11327+
return i;
11328+
}
11329+
1131911330
//------------------------------------------------------------------------
1132011331
// intForm: Make a temporary int we can feed to pattern matching routines
1132111332
// in cases where we don't want to instantiate.

src/jit/codegenlinear.cpp

Lines changed: 68 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1339,12 +1339,27 @@ void CodeGen::genConsumeRegs(GenTree* tree)
13391339
// Update the life of the lcl var.
13401340
genUpdateLife(tree);
13411341
}
1342-
#endif // _TARGET_XARCH_
1343-
else if (tree->OperIsInitVal())
1342+
#ifdef FEATURE_HW_INTRINSICS
1343+
else if (tree->OperIs(GT_HWIntrinsic))
13441344
{
1345-
genConsumeReg(tree->gtGetOp1());
1345+
// Only load/store HW intrinsics can be contained (and the address may also be contained).
1346+
HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(tree->AsHWIntrinsic()->gtHWIntrinsicId);
1347+
assert((category == HW_Category_MemoryLoad) || (category == HW_Category_MemoryStore));
1348+
int numArgs = HWIntrinsicInfo::lookupNumArgs(tree->AsHWIntrinsic());
1349+
genConsumeAddress(tree->gtGetOp1());
1350+
if (category == HW_Category_MemoryStore)
1351+
{
1352+
assert((numArgs == 2) && !tree->gtGetOp2()->isContained());
1353+
genConsumeReg(tree->gtGetOp2());
1354+
}
1355+
else
1356+
{
1357+
assert(numArgs == 1);
1358+
}
13461359
}
1347-
else if (tree->OperIsHWIntrinsic())
1360+
#endif // FEATURE_HW_INTRINSICS
1361+
#endif // _TARGET_XARCH_
1362+
else if (tree->OperIsInitVal())
13481363
{
13491364
genConsumeReg(tree->gtGetOp1());
13501365
}
@@ -1374,11 +1389,6 @@ void CodeGen::genConsumeRegs(GenTree* tree)
13741389
// Return Value:
13751390
// None.
13761391
//
1377-
// Notes:
1378-
// Note that this logic is localized here because we must do the liveness update in
1379-
// the correct execution order. This is important because we may have two operands
1380-
// that involve the same lclVar, and if one is marked "lastUse" we must handle it
1381-
// after the first.
13821392

13831393
void CodeGen::genConsumeOperands(GenTreeOp* tree)
13841394
{
@@ -1395,6 +1405,55 @@ void CodeGen::genConsumeOperands(GenTreeOp* tree)
13951405
}
13961406
}
13971407

1408+
#ifdef FEATURE_HW_INTRINSICS
1409+
//------------------------------------------------------------------------
1410+
// genConsumeHWIntrinsicOperands: Do liveness update for the operands of a GT_HWIntrinsic node
1411+
//
1412+
// Arguments:
1413+
// node - the GenTreeHWIntrinsic node whose operands will have their liveness updated.
1414+
//
1415+
// Return Value:
1416+
// None.
1417+
//
1418+
1419+
void CodeGen::genConsumeHWIntrinsicOperands(GenTreeHWIntrinsic* node)
1420+
{
1421+
int numArgs = HWIntrinsicInfo::lookupNumArgs(node);
1422+
GenTree* op1 = node->gtGetOp1();
1423+
if (op1 == nullptr)
1424+
{
1425+
assert((numArgs == 0) && (node->gtGetOp2() == nullptr));
1426+
return;
1427+
}
1428+
if (op1->OperIs(GT_LIST))
1429+
{
1430+
int foundArgs = 0;
1431+
assert(node->gtGetOp2() == nullptr);
1432+
for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest())
1433+
{
1434+
GenTree* operand = list->Current();
1435+
genConsumeRegs(operand);
1436+
foundArgs++;
1437+
}
1438+
assert(foundArgs == numArgs);
1439+
}
1440+
else
1441+
{
1442+
genConsumeRegs(op1);
1443+
GenTree* op2 = node->gtGetOp2();
1444+
if (op2 != nullptr)
1445+
{
1446+
genConsumeRegs(op2);
1447+
assert(numArgs == 2);
1448+
}
1449+
else
1450+
{
1451+
assert(numArgs == 1);
1452+
}
1453+
}
1454+
}
1455+
#endif // FEATURE_HW_INTRINSICS
1456+
13981457
#if FEATURE_PUT_STRUCT_ARG_STK
13991458
//------------------------------------------------------------------------
14001459
// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.

src/jit/emitxarch.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2849,6 +2849,12 @@ void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, G
28492849
id->idReg1(dstReg);
28502850
emitHandleMemOp(mem, id, IF_RWR_ARD, ins);
28512851
UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
2852+
if (Is4ByteSSEInstruction(ins))
2853+
{
2854+
// The 4-Byte SSE instructions require an additional byte.
2855+
sz += 1;
2856+
}
2857+
28522858
id->idCodeSize(sz);
28532859
dispIns(id);
28542860
emitCurIGsize += sz;
@@ -4037,6 +4043,12 @@ void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTre
40374043
emitHandleMemOp(indir, id, IF_RRW_ARD, ins);
40384044

40394045
UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4046+
if (Is4ByteSSEInstruction(ins))
4047+
{
4048+
// The 4-Byte SSE instructions require an additional byte.
4049+
sz += 1;
4050+
}
4051+
40404052
id->idCodeSize(sz);
40414053

40424054
dispIns(id);
@@ -4088,8 +4100,8 @@ void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, reg
40884100

40894101
if (Is4ByteSSEInstruction(ins))
40904102
{
4091-
// The 4-Byte SSE instructions require two additional bytes
4092-
sz += 2;
4103+
// The 4-Byte SSE instructions require an additional byte.
4104+
sz += 1;
40934105
}
40944106

40954107
id->idCodeSize(sz);
@@ -5165,8 +5177,8 @@ void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNu
51655177

51665178
if (Is4ByteSSEInstruction(ins))
51675179
{
5168-
// The 4-Byte SSE instructions require two additional bytes
5169-
sz += 2;
5180+
// The 4-Byte SSE instructions require an additional byte.
5181+
sz += 1;
51705182
}
51715183

51725184
id->idCodeSize(sz);
@@ -5640,7 +5652,7 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu
56405652

56415653
#ifdef FEATURE_HW_INTRINSICS
56425654
//------------------------------------------------------------------------
5643-
// emitIns_SIMD_R_R_I: emits the code for a SIMD instruction that takes a register operand, an immediate operand
5655+
// emitIns_SIMD_R_R_I: emits the code for an instruction that takes a register operand, an immediate operand
56445656
// and that returns a value in register
56455657
//
56465658
// Arguments:
@@ -5650,6 +5662,13 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu
56505662
// op1Reg -- The register of the first operand
56515663
// ival -- The immediate value
56525664
//
5665+
// Notes:
5666+
// This will handle the required register copy if 'op1Reg' and 'targetReg' are not the same, and
5667+
// the 3-operand format is not available.
5668+
// This is not really SIMD-specific, but is currently only used in that context, as that's
5669+
// where we frequently need to handle the case of generating 3-operand or 2-operand forms
5670+
// depending on what target ISA is supported.
5671+
//
56535672
void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival)
56545673
{
56555674
if (UseVEXEncoding() || IsDstSrcImmAvxInstruction(ins))
@@ -5704,20 +5723,22 @@ void emitter::emitIns_SIMD_R_R_A(
57045723
// targetReg -- The target register
57055724
// op1Reg -- The register of the first operand
57065725
// base -- The base register used for the memory address
5726+
// offset -- The memory offset
57075727
//
5708-
void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base)
5728+
void emitter::emitIns_SIMD_R_R_AR(
5729+
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base, int offset)
57095730
{
57105731
if (UseVEXEncoding())
57115732
{
5712-
emitIns_R_R_AR(ins, attr, targetReg, op1Reg, base, 0);
5733+
emitIns_R_R_AR(ins, attr, targetReg, op1Reg, base, offset);
57135734
}
57145735
else
57155736
{
57165737
if (op1Reg != targetReg)
57175738
{
57185739
emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
57195740
}
5720-
emitIns_R_AR(ins, attr, targetReg, base, 0);
5741+
emitIns_R_AR(ins, attr, targetReg, base, offset);
57215742
}
57225743
}
57235744

src/jit/emitxarch.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,8 @@ void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg,
449449
void emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival);
450450

451451
void emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir);
452-
void emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base);
452+
void emitIns_SIMD_R_R_AR(
453+
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base, int offset);
453454
void emitIns_SIMD_R_R_C(
454455
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, CORINFO_FIELD_HANDLE fldHnd, int offs);
455456
void emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg);

src/jit/gentree.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3510,6 +3510,26 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
35103510
costSz = 2 * 2;
35113511
break;
35123512

3513+
#if defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_XARCH_)
3514+
case GT_HWIntrinsic:
3515+
{
3516+
if (tree->AsHWIntrinsic()->OperIsMemoryLoadOrStore())
3517+
{
3518+
costEx = IND_COST_EX;
3519+
costSz = 2;
3520+
// See if we can form a complex addressing mode.
3521+
3522+
GenTree* addr = op1->gtEffectiveVal();
3523+
3524+
if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, tree->TypeGet()))
3525+
{
3526+
goto DONE;
3527+
}
3528+
}
3529+
}
3530+
break;
3531+
#endif // FEATURE_HW_INTRINSICS && _TARGET_XARCH_
3532+
35133533
case GT_BLK:
35143534
case GT_IND:
35153535

0 commit comments

Comments
 (0)