Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 407cc7d

Browse files
committed
[WIP] Handle addressing modes for HW intrinsics
Contribute to #19550 Fix #19521
1 parent 557eac6 commit 407cc7d

16 files changed

+516
-226
lines changed

src/jit/codegen.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class CodeGen : public CodeGenInterface
9595
static bool genShouldRoundFP();
9696

9797
GenTreeIndir indirForm(var_types type, GenTree* base);
98+
GenTreeStoreInd storeIndirForm(var_types type, GenTree* base, GenTree* data);
9899

99100
GenTreeIntCon intForm(var_types type, ssize_t value);
100101

@@ -1042,6 +1043,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10421043

10431044
void genConsumeRegs(GenTree* tree);
10441045
void genConsumeOperands(GenTreeOp* tree);
1046+
#ifdef FEATURE_HW_INTRINSICS
1047+
void genConsumeHWIntrinsicOperands(GenTreeHWIntrinsic* tree);
1048+
#endif // FEATURE_HW_INTRINSICS
10451049
void genEmitGSCookieCheck(bool pushReg);
10461050
void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFlags flags = INS_FLAGS_DONT_CARE);
10471051
void genCodeForShift(GenTree* tree);

src/jit/codegencommon.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11373,6 +11373,17 @@ GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
1137311373
return i;
1137411374
}
1137511375

11376+
//------------------------------------------------------------------------
11377+
// indirForm: Make a temporary indir we can feed to pattern matching routines
11378+
// in cases where we don't want to instantiate all the indirs that happen.
11379+
//
11380+
GenTreeStoreInd CodeGen::storeIndirForm(var_types type, GenTree* base, GenTree* data)
11381+
{
11382+
GenTreeStoreInd i(type, base, data);
11383+
i.gtRegNum = REG_NA;
11384+
return i;
11385+
}
11386+
1137611387
//------------------------------------------------------------------------
1137711388
// intForm: Make a temporary int we can feed to pattern matching routines
1137811389
// in cases where we don't want to instantiate.

src/jit/codegenlinear.cpp

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,6 +1333,25 @@ void CodeGen::genConsumeRegs(GenTree* tree)
13331333
// Update the life of the lcl var.
13341334
genUpdateLife(tree);
13351335
}
1336+
#ifdef FEATURE_HW_INTRINSICS
1337+
else if (tree->OperIs(GT_HWIntrinsic))
1338+
{
1339+
// Only load/store HW intrinsics can be contained (and the address may also be contained).
1340+
HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(tree->AsHWIntrinsic()->gtHWIntrinsicId);
1341+
assert((category == HW_Category_MemoryLoad) || (category == HW_Category_MemoryStore));
1342+
int numArgs = HWIntrinsicInfo::lookupNumArgs(tree->AsHWIntrinsic());
1343+
genConsumeAddress(tree->gtGetOp1());
1344+
if (category == HW_Category_MemoryStore)
1345+
{
1346+
assert((numArgs == 2) && !tree->gtGetOp2()->isContained());
1347+
genConsumeReg(tree->gtGetOp2());
1348+
}
1349+
else
1350+
{
1351+
assert(numArgs == 1);
1352+
}
1353+
}
1354+
#endif // FEATURE_HW_INTRINSICS
13361355
#endif // _TARGET_XARCH_
13371356
else if (tree->OperIsInitVal())
13381357
{
@@ -1368,11 +1387,6 @@ void CodeGen::genConsumeRegs(GenTree* tree)
13681387
// Return Value:
13691388
// None.
13701389
//
1371-
// Notes:
1372-
// Note that this logic is localized here because we must do the liveness update in
1373-
// the correct execution order. This is important because we may have two operands
1374-
// that involve the same lclVar, and if one is marked "lastUse" we must handle it
1375-
// after the first.
13761390

13771391
void CodeGen::genConsumeOperands(GenTreeOp* tree)
13781392
{
@@ -1389,6 +1403,45 @@ void CodeGen::genConsumeOperands(GenTreeOp* tree)
13891403
}
13901404
}
13911405

1406+
#ifdef FEATURE_HW_INTRINSICS
1407+
//------------------------------------------------------------------------
1408+
// genConsumeHWIntrinsicOperands: Do liveness update for the operands of a GT_HWIntrinsic node
1409+
//
1410+
// Arguments:
1411+
// node - the GenTreeHWIntrinsic node whose operands will have their liveness updated.
1412+
//
1413+
// Return Value:
1414+
// None.
1415+
//
1416+
1417+
void CodeGen::genConsumeHWIntrinsicOperands(GenTreeHWIntrinsic* node)
1418+
{
1419+
GenTree* op1 = node->gtGetOp1();
1420+
if (op1 == nullptr)
1421+
{
1422+
assert(node->gtGetOp2() == nullptr);
1423+
return;
1424+
}
1425+
if (op1->OperIs(GT_LIST))
1426+
{
1427+
for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest())
1428+
{
1429+
GenTree* operand = list->Current();
1430+
genConsumeRegs(operand);
1431+
}
1432+
}
1433+
else
1434+
{
1435+
genConsumeRegs(op1);
1436+
GenTree* op2 = node->gtGetOp2();
1437+
if (op2 != nullptr)
1438+
{
1439+
genConsumeRegs(op2);
1440+
}
1441+
}
1442+
}
1443+
#endif // FEATURE_HW_INTRINSICS
1444+
13921445
#if FEATURE_PUT_STRUCT_ARG_STK
13931446
//------------------------------------------------------------------------
13941447
// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.

src/jit/emitxarch.cpp

Lines changed: 118 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4088,6 +4088,119 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT
40884088
emitCurIGsize += sz;
40894089
}
40904090

4091+
//------------------------------------------------------------------------
4092+
// emitIns_R_RM_I: emits the code for an instruction that takes 3 operands:
4093+
// a register operand, an operand that may be memory or register and an immediate
4094+
// and that returns a value in register
4095+
//
4096+
// Arguments:
4097+
// ins -- The instruction being emitted
4098+
// attr -- The emit attribute
4099+
// reg1 -- The first operand, a register
4100+
// rmOp -- The second operand, which may be a memory node or a node producing a register
4101+
// ival -- The immediate operand
4102+
//
4103+
void emitter::emitIns_R_RM_I(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival)
4104+
{
4105+
noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4106+
assert(IsSSEOrAVXInstruction(ins));
4107+
4108+
if (rmOp->isContained() || rmOp->isUsedFromSpillTemp())
4109+
{
4110+
TempDsc* tmpDsc = nullptr;
4111+
unsigned varNum = BAD_VAR_NUM;
4112+
unsigned offset = (unsigned)-1;
4113+
4114+
if (rmOp->isUsedFromSpillTemp())
4115+
{
4116+
assert(rmOp->IsRegOptional());
4117+
4118+
tmpDsc = codeGen->getSpillTempDsc(rmOp);
4119+
varNum = tmpDsc->tdTempNum();
4120+
offset = 0;
4121+
4122+
codeGen->regSet.tmpRlsTemp(tmpDsc);
4123+
}
4124+
else if (rmOp->OperIsHWIntrinsic())
4125+
{
4126+
emitIns_R_AR_I(ins, attr, reg1, rmOp->gtGetOp1()->gtRegNum, 0, ival);
4127+
return;
4128+
}
4129+
else if (rmOp->isIndir())
4130+
{
4131+
GenTreeIndir* memIndir = rmOp->AsIndir();
4132+
GenTree* memBase = memIndir->gtOp1;
4133+
4134+
switch (memBase->OperGet())
4135+
{
4136+
case GT_LCL_VAR_ADDR:
4137+
{
4138+
varNum = memBase->AsLclVarCommon()->GetLclNum();
4139+
offset = 0;
4140+
4141+
// Ensure that all the GenTreeIndir values are set to their defaults.
4142+
assert(!memIndir->HasIndex());
4143+
assert(memIndir->Scale() == 1);
4144+
assert(memIndir->Offset() == 0);
4145+
4146+
break;
4147+
}
4148+
4149+
case GT_CLS_VAR_ADDR:
4150+
{
4151+
emitIns_R_C_I(ins, attr, reg1, memBase->gtClsVar.gtClsVarHnd, 0, ival);
4152+
return;
4153+
}
4154+
4155+
default:
4156+
{
4157+
emitIns_R_A_I(ins, attr, reg1, memIndir, ival);
4158+
return;
4159+
}
4160+
}
4161+
}
4162+
else
4163+
{
4164+
switch (rmOp->OperGet())
4165+
{
4166+
case GT_LCL_FLD:
4167+
{
4168+
GenTreeLclFld* lclField = rmOp->AsLclFld();
4169+
4170+
varNum = lclField->GetLclNum();
4171+
offset = lclField->gtLclFld.gtLclOffs;
4172+
break;
4173+
}
4174+
4175+
case GT_LCL_VAR:
4176+
{
4177+
assert(rmOp->IsRegOptional() || !emitComp->lvaGetDesc(rmOp->gtLclVar.gtLclNum)->lvIsRegCandidate());
4178+
varNum = rmOp->AsLclVar()->GetLclNum();
4179+
offset = 0;
4180+
break;
4181+
}
4182+
4183+
default:
4184+
unreached();
4185+
break;
4186+
}
4187+
}
4188+
4189+
// Ensure we got a good varNum and offset.
4190+
// We also need to check for `tmpDsc != nullptr` since spill temp numbers
4191+
// are negative and start with -1, which also happens to be BAD_VAR_NUM.
4192+
assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
4193+
assert(offset != (unsigned)-1);
4194+
4195+
emitIns_R_S_I(ins, attr, reg1, varNum, offset, ival);
4196+
}
4197+
else
4198+
{
4199+
regNumber rmOpReg = rmOp->gtRegNum;
4200+
emitIns_R_R_I(ins, attr, reg1, rmOpReg, ival);
4201+
}
4202+
}
4203+
40914204
void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival)
40924205
{
40934206
noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
@@ -5722,20 +5835,22 @@ void emitter::emitIns_SIMD_R_R_A(
57225835
// targetReg -- The target register
57235836
// op1Reg -- The register of the first operand
57245837
// base -- The base register used for the memory address
5838+
// offset -- The memory offset
57255839
//
5726-
void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base)
5840+
void emitter::emitIns_SIMD_R_R_AR(
5841+
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base, int offset)
57275842
{
57285843
if (UseVEXEncoding())
57295844
{
5730-
emitIns_R_R_AR(ins, attr, targetReg, op1Reg, base, 0);
5845+
emitIns_R_R_AR(ins, attr, targetReg, op1Reg, base, offset);
57315846
}
57325847
else
57335848
{
57345849
if (op1Reg != targetReg)
57355850
{
57365851
emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
57375852
}
5738-
emitIns_R_AR(ins, attr, targetReg, base, 0);
5853+
emitIns_R_AR(ins, attr, targetReg, base, offset);
57395854
}
57405855
}
57415856

src/jit/emitxarch.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,8 @@ void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* i
333333

334334
void emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival);
335335

336+
void emitIns_R_RM_I(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival);
337+
336338
void emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival);
337339

338340
void emitIns_R_C_I(instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival);
@@ -449,7 +451,8 @@ void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg,
449451
void emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival);
450452

451453
void emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir);
452-
void emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base);
454+
void emitIns_SIMD_R_R_AR(
455+
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base, int offset);
453456
void emitIns_SIMD_R_R_C(
454457
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, CORINFO_FIELD_HANDLE fldHnd, int offs);
455458
void emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg);

src/jit/gentree.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3510,6 +3510,28 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
35103510
costSz = 2 * 2;
35113511
break;
35123512

3513+
#if defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_XARCH_)
3514+
case GT_HWIntrinsic:
3515+
{
3516+
HWIntrinsicCategory category =
3517+
HWIntrinsicInfo::lookupCategory(tree->AsHWIntrinsic()->gtHWIntrinsicId);
3518+
if ((category == HW_Category_MemoryLoad) || (category == HW_Category_MemoryStore))
3519+
{
3520+
costEx = IND_COST_EX;
3521+
costSz = 2;
3522+
// See if we can form a complex addressing mode.
3523+
3524+
GenTree* addr = op1->gtEffectiveVal();
3525+
3526+
if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, tree->TypeGet()))
3527+
{
3528+
goto DONE;
3529+
}
3530+
}
3531+
}
3532+
break;
3533+
#endif // FEATURE_HW_INTRINSICS && _TARGET_XARCH_
3534+
35133535
case GT_BLK:
35143536
case GT_IND:
35153537

0 commit comments

Comments
 (0)