Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Table-driven Intel hardware intrinsic #15749

Merged
merged 2 commits into from
Jan 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3042,11 +3042,11 @@ class Compiler
NamedIntrinsic lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method);

#if FEATURE_HW_INTRINSICS
InstructionSet lookupHWIntrinsicISA(const char* className);
NamedIntrinsic lookupHWIntrinsic(const char* methodName, InstructionSet isa);
InstructionSet isaOfHWIntrinsic(NamedIntrinsic intrinsic);
bool isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic);
bool isFullyImplmentedISAClass(InstructionSet isa);
static InstructionSet lookupHWIntrinsicISA(const char* className);
static NamedIntrinsic lookupHWIntrinsic(const char* methodName, InstructionSet isa);
static InstructionSet isaOfHWIntrinsic(NamedIntrinsic intrinsic);
static bool isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic);
static bool isFullyImplmentedISAClass(InstructionSet isa);
#ifdef _TARGET_XARCH_
GenTree* impUnsupportedHWIntrinsic(unsigned helper,
CORINFO_METHOD_HANDLE method,
Expand Down Expand Up @@ -3119,7 +3119,12 @@ class Compiler
bool compSupportsHWIntrinsic(InstructionSet isa);
bool isScalarISA(InstructionSet isa);
static int ivalOfHWIntrinsic(NamedIntrinsic intrinsic);
static int numArgsOfHWIntrinsic(NamedIntrinsic intrinsic);
static instruction insOfHWIntrinsic(NamedIntrinsic intrinsic, var_types type);
static HWIntrinsicCategory categoryOfHWIntrinsic(NamedIntrinsic intrinsic);
static HWIntrinsicFlag flagsOfHWIntrinsic(NamedIntrinsic intrinsic);
GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass);
GenTreeArgList* buildArgList(CORINFO_SIG_INFO* sig);
#endif // _TARGET_XARCH_
#endif // FEATURE_HW_INTRINSICS
GenTreePtr impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
Expand Down
14 changes: 14 additions & 0 deletions src/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,7 @@ class emitter
struct
{
regNumber _idReg3 : REGNUM_BITS;
regNumber _idReg4 : REGNUM_BITS;
};
#endif // defined(_TARGET_XARCH_)

Expand Down Expand Up @@ -1119,6 +1120,19 @@ class emitter
idAddr()->_idReg3 = reg;
assert(reg == idAddr()->_idReg3);
}
regNumber idReg4() const
{
assert(!idIsTiny());
assert(!idIsSmallDsc());
return idAddr()->_idReg4;
}
void idReg4(regNumber reg)
{
assert(!idIsTiny());
assert(!idIsSmallDsc());
idAddr()->_idReg4 = reg;
assert(reg == idAddr()->_idReg4);
}
#endif // defined(_TARGET_XARCH_)
#ifdef _TARGET_ARMARCH_
insOpts idInsOpt() const
Expand Down
2 changes: 2 additions & 0 deletions src/jit/emitfmtsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ IF_DEF(RRW_RRW_CNS, IS_R1_RW|IS_R2_RW, SCNS) // r/w reg , r/w r

IF_DEF(RWR_RRD_RRD, IS_R1_WR|IS_R2_RD|IS_R3_RD, NONE) // write reg , read reg2 , read reg3
IF_DEF(RWR_RRD_RRD_CNS, IS_R1_WR|IS_R2_RD|IS_R3_RD, SCNS) // write reg , read reg2 , read reg3, const

IF_DEF(RWR_RRD_RRD_RRD, IS_R1_WR|IS_R2_RD|IS_R3_RD|IS_R4_RD, NONE) // write reg , read reg2 , read reg3 , read reg4
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NOTE: There are several others of these we will end up needing. I added a few in my CORINFO_INTRINSIC_Round PR (#14736), but that has yet to be merged.

//----------------------------------------------------------------------------
// The following formats are used for direct addresses (e.g. static data members)
//----------------------------------------------------------------------------
Expand Down
164 changes: 123 additions & 41 deletions src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
case INS_pminub:
case INS_pminud:
case INS_pminuw:
case INS_pmuldq:
case INS_pmulld:
case INS_pmullw:
case INS_pmuludq:
Expand Down Expand Up @@ -4227,6 +4228,45 @@ void emitter::emitIns_R_R_S_I(
emitCurIGsize += sz;
}

#ifdef DEBUG
static bool isAvxBlendv(instruction ins)
{
return ins == INS_vblendvps || ins == INS_vblendvpd || ins == INS_vpblendvb;
}

static bool isSse41Blendv(instruction ins)
{
return ins == INS_blendvps || ins == INS_blendvpd || ins == INS_pblendvb;
}
#endif

void emitter::emitIns_R_R_R_R(
instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3)
{
assert(isAvxBlendv(ins));
assert(UseVEXEncoding());
// Currently vex prefix only use three bytes mode.
// size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6
// TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
UNATIVE_OFFSET sz = 6;

// AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb,
// which encodes the fourth register into imm8[7:4]
int ival = (reg3 - XMMBASE) << 4; // convert reg3 to ival
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we compute the ival later, when we actually emit the instruction bytes?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think computing the ival here is good to reuse underlying code.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to differ from most of the other emit logic I can find. emitIns looks to generally build the id table and emitOutput looks to generally transform that into the required format for a given instruction.


instrDesc* id = emitNewInstrCns(attr, ival);
id->idIns(ins);
id->idInsFmt(IF_RWR_RRD_RRD_RRD);
id->idReg1(targetReg);
id->idReg2(reg1);
id->idReg3(reg2);
id->idReg4(reg3);

id->idCodeSize(sz);
dispIns(id);
emitCurIGsize += sz;
}

/*****************************************************************************
*
* Add an instruction with a register + static member operands.
Expand Down Expand Up @@ -5166,158 +5206,191 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu
}

#if FEATURE_HW_INTRINSICS
void emitter::emitIns_SIMD_R_R(instruction ins, regNumber reg, regNumber reg1, var_types simdtype)
void emitter::emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir)
{
emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg1);
if (UseVEXEncoding())
{
emitIns_R_R_A(ins, attr, reg, reg1, indir, IF_RWR_RRD_ARD);
}
else
{
if (reg1 != reg)
{
emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_A(ins, attr, reg, indir, IF_RRW_ARD);
}
}

void emitter::emitIns_SIMD_R_R_A(
instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype)
void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base)
{
if (UseVEXEncoding())
{
emitIns_R_R_A(ins, emitTypeSize(simdtype), reg, reg1, indir, IF_RWR_RRD_ARD);
emitIns_R_R_AR(ins, attr, reg, reg1, base, 0);
}
else
{
if (reg1 != reg)
{
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_A(ins, emitTypeSize(simdtype), reg, indir, IF_RRW_ARD);
emitIns_R_AR(ins, attr, reg, base, 0);
}
}

void emitter::emitIns_SIMD_R_R_AR(instruction ins, regNumber reg, regNumber reg1, regNumber base, var_types simdtype)
void emitter::emitIns_SIMD_R_R_C(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs)
{
if (UseVEXEncoding())
{
emitIns_R_R_AR(ins, emitTypeSize(simdtype), reg, reg1, base, 0);
emitIns_R_R_C(ins, attr, reg, reg1, fldHnd, offs);
}
else
{
if (reg1 != reg)
{
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_AR(ins, emitTypeSize(simdtype), reg, base, 0);
emitIns_R_C(ins, attr, reg, fldHnd, offs);
}
}

void emitter::emitIns_SIMD_R_R_C(
instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype)
void emitter::emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2)
{
if (UseVEXEncoding())
{
emitIns_R_R_C(ins, emitTypeSize(simdtype), reg, reg1, fldHnd, offs);
emitIns_R_R_R(ins, attr, reg, reg1, reg2);
}
else
{
if (reg1 != reg)
{
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_C(ins, emitTypeSize(simdtype), reg, fldHnd, offs);
emitIns_R_R(ins, attr, reg, reg2);
}
}

void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype)
void emitter::emitIns_SIMD_R_R_R_R(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, regNumber reg3)
{
assert(isAvxBlendv(ins) || isSse41Blendv(ins));
if (UseVEXEncoding())
{
emitIns_R_R_R(ins, emitTypeSize(simdtype), reg, reg1, reg2);
// convert SSE encoding of SSE4.1 instructions to VEX encoding
switch (ins)
{
case INS_blendvps:
ins = INS_vblendvps;
break;
case INS_blendvpd:
ins = INS_vblendvpd;
break;
case INS_pblendvb:
ins = INS_vpblendvb;
break;
default:
break;
}
emitIns_R_R_R_R(ins, attr, reg, reg1, reg2, reg3);
}
else
{
assert(isSse41Blendv(ins));
// SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
if (reg3 != REG_XMM0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a bug tracking this? We should probably have a TODO-XArch-CQ with a link to it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I understand based on above discussion and practice, this is not a bug, the current code can assign XMM0 to reg3 for simple cases. But in some complex use cases, when reg3 cannot get XMM0, LSRA won't insert the "copy" and codgen is responsible to do it.
BTW, the current CQ looks good to me, a few more zero-latency movaps xmm, xmm does not impact the performance too much. But there may be some register pressure issues that we need more workloads and tests to investigate.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is by convention that when a specific register is required, the code generator is responsible for checking whether that is what was assigned, and otherwise doing the copy. It has been debated whether all these checks are more or less efficient than if the register allocator inserted copies. But for now, this is how it's done. It's usually done in codegen (rather than the emitter), I believe, but for this case this seems like the right place.

{
emitIns_R_R(INS_movaps, attr, REG_XMM0, reg3);
}
if (reg1 != reg)
{
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg2);
emitIns_R_R(ins, attr, reg, reg2);
}
}

void emitter::emitIns_SIMD_R_R_S(instruction ins, regNumber reg, regNumber reg1, int varx, int offs, var_types simdtype)
void emitter::emitIns_SIMD_R_R_S(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs)
{
if (UseVEXEncoding())
{
emitIns_R_R_S(ins, emitTypeSize(simdtype), reg, reg1, varx, offs);
emitIns_R_R_S(ins, attr, reg, reg1, varx, offs);
}
else
{
if (reg1 != reg)
{
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_S(ins, emitTypeSize(simdtype), reg, varx, offs);
emitIns_R_S(ins, attr, reg, varx, offs);
}
}

void emitter::emitIns_SIMD_R_R_A_I(
instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, int ival, var_types simdtype)
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir, int ival)
{
if (UseVEXEncoding())
{
emitIns_R_R_A_I(ins, emitTypeSize(simdtype), reg, reg1, indir, ival, IF_RWR_RRD_ARD_CNS);
emitIns_R_R_A_I(ins, attr, reg, reg1, indir, ival, IF_RWR_RRD_ARD_CNS);
}
else
{
if (reg1 != reg)
{
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_A_I(ins, emitTypeSize(simdtype), reg, indir, ival);
emitIns_R_A_I(ins, attr, reg, indir, ival);
}
}

void emitter::emitIns_SIMD_R_R_C_I(
instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival, var_types simdtype)
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
{
if (UseVEXEncoding())
{
emitIns_R_R_C_I(ins, emitTypeSize(simdtype), reg, reg1, fldHnd, offs, ival);
emitIns_R_R_C_I(ins, attr, reg, reg1, fldHnd, offs, ival);
}
else
{
if (reg1 != reg)
{
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_C_I(ins, emitTypeSize(simdtype), reg, fldHnd, offs, ival);
emitIns_R_C_I(ins, attr, reg, fldHnd, offs, ival);
}
}

void emitter::emitIns_SIMD_R_R_R_I(
instruction ins, regNumber reg, regNumber reg1, regNumber reg2, int ival, var_types simdtype)
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, int ival)
{
if (UseVEXEncoding())
{
emitIns_R_R_R_I(ins, emitTypeSize(simdtype), reg, reg1, reg2, ival);
emitIns_R_R_R_I(ins, attr, reg, reg1, reg2, ival);
}
else
{
if (reg1 != reg)
{
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_R_I(ins, emitTypeSize(simdtype), reg, reg2, ival);
emitIns_R_R_I(ins, attr, reg, reg2, ival);
}
}

void emitter::emitIns_SIMD_R_R_S_I(
instruction ins, regNumber reg, regNumber reg1, int varx, int offs, int ival, var_types simdtype)
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs, int ival)
{
if (UseVEXEncoding())
{
emitIns_R_R_S_I(ins, emitTypeSize(simdtype), reg, reg1, varx, offs, ival);
emitIns_R_R_S_I(ins, attr, reg, reg1, varx, offs, ival);
}
else
{
if (reg1 != reg)
{
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_S_I(ins, emitTypeSize(simdtype), reg, varx, offs, ival);
emitIns_R_S_I(ins, attr, reg, varx, offs, ival);
}
}
#endif
Expand Down Expand Up @@ -7653,6 +7726,14 @@ void emitter::emitDispIns(
val = emitGetInsSC(id);
goto PRINT_CONSTANT;
break;
case IF_RWR_RRD_RRD_RRD:
assert(IsAVXOnlyInstruction(ins));
assert(UseVEXEncoding());
printf("%s, ", emitRegName(id->idReg1(), attr));
printf("%s, ", emitRegName(id->idReg2(), attr));
printf("%s, ", emitRegName(id->idReg3(), attr));
printf("%s", emitRegName(id->idReg4(), attr));
break;
case IF_RRW_RRW_CNS:
printf("%s,", emitRegName(id->idReg1(), attr));
printf(" %s", emitRegName(id->idReg2(), attr));
Expand Down Expand Up @@ -10304,7 +10385,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)

instruction ins = id->idIns();
assert(IsAVXInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins));
regNumber targetReg = id->idReg1();
regNumber src1 = id->idReg2();
regNumber src2 = id->idReg3();
Expand Down Expand Up @@ -11570,6 +11651,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
sz = emitSizeOfInsDsc(id);
break;
case IF_RWR_RRD_RRD_CNS:
case IF_RWR_RRD_RRD_RRD:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it OK to treat IF_RWR_RRD_RRD_CNS and IF_RWR_RRD_RRD_RRD as same? Constant is a part of instruction encoding and does not need a register and my worry is that this may impact emitter at later stages. I have experienced this with IF_RRW_CNS and IF_RRW_RRW_CNS.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should treat these as same. There is other handling and/or encoding checks that this can potentially impact.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it OK to treat IF_RWR_RRD_RRD_CNS and IF_RWR_RRD_RRD_RRD as same?

At least, for JITDump, we cannot.

dst = emitOutputRRR(dst, id);
sz = emitSizeOfInsDsc(id);
dst += emitOutputByte(dst, emitGetInsSC(id));
Expand Down
Loading