-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Table-driven Intel hardware intrinsic #15749
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -146,6 +146,7 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins) | |
case INS_pminub: | ||
case INS_pminud: | ||
case INS_pminuw: | ||
case INS_pmuldq: | ||
case INS_pmulld: | ||
case INS_pmullw: | ||
case INS_pmuludq: | ||
|
@@ -4227,6 +4228,45 @@ void emitter::emitIns_R_R_S_I( | |
emitCurIGsize += sz; | ||
} | ||
|
||
#ifdef DEBUG | ||
static bool isAvxBlendv(instruction ins) | ||
{ | ||
return ins == INS_vblendvps || ins == INS_vblendvpd || ins == INS_vpblendvb; | ||
} | ||
|
||
static bool isSse41Blendv(instruction ins) | ||
{ | ||
return ins == INS_blendvps || ins == INS_blendvpd || ins == INS_pblendvb; | ||
} | ||
#endif | ||
|
||
void emitter::emitIns_R_R_R_R( | ||
instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3) | ||
{ | ||
assert(isAvxBlendv(ins)); | ||
assert(UseVEXEncoding()); | ||
// Currently vex prefix only use three bytes mode. | ||
// size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6 | ||
// TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future | ||
UNATIVE_OFFSET sz = 6; | ||
|
||
// AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb, | ||
// which encodes the fourth register into imm8[7:4] | ||
int ival = (reg3 - XMMBASE) << 4; // convert reg3 to ival | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we compute the ival later, when we actually emit the instruction bytes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think computing the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems to differ from most of the other emit logic I can find. |
||
|
||
instrDesc* id = emitNewInstrCns(attr, ival); | ||
id->idIns(ins); | ||
id->idInsFmt(IF_RWR_RRD_RRD_RRD); | ||
id->idReg1(targetReg); | ||
id->idReg2(reg1); | ||
id->idReg3(reg2); | ||
id->idReg4(reg3); | ||
|
||
id->idCodeSize(sz); | ||
dispIns(id); | ||
emitCurIGsize += sz; | ||
} | ||
|
||
/***************************************************************************** | ||
* | ||
* Add an instruction with a register + static member operands. | ||
|
@@ -5166,158 +5206,191 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu | |
} | ||
|
||
#if FEATURE_HW_INTRINSICS | ||
void emitter::emitIns_SIMD_R_R(instruction ins, regNumber reg, regNumber reg1, var_types simdtype) | ||
void emitter::emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir) | ||
{ | ||
emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg1); | ||
if (UseVEXEncoding()) | ||
{ | ||
emitIns_R_R_A(ins, attr, reg, reg1, indir, IF_RWR_RRD_ARD); | ||
} | ||
else | ||
{ | ||
if (reg1 != reg) | ||
{ | ||
emitIns_R_R(INS_movaps, attr, reg, reg1); | ||
} | ||
emitIns_R_A(ins, attr, reg, indir, IF_RRW_ARD); | ||
} | ||
} | ||
|
||
void emitter::emitIns_SIMD_R_R_A( | ||
instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype) | ||
void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base) | ||
{ | ||
if (UseVEXEncoding()) | ||
{ | ||
emitIns_R_R_A(ins, emitTypeSize(simdtype), reg, reg1, indir, IF_RWR_RRD_ARD); | ||
emitIns_R_R_AR(ins, attr, reg, reg1, base, 0); | ||
} | ||
else | ||
{ | ||
if (reg1 != reg) | ||
{ | ||
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); | ||
emitIns_R_R(INS_movaps, attr, reg, reg1); | ||
} | ||
emitIns_R_A(ins, emitTypeSize(simdtype), reg, indir, IF_RRW_ARD); | ||
emitIns_R_AR(ins, attr, reg, base, 0); | ||
} | ||
} | ||
|
||
void emitter::emitIns_SIMD_R_R_AR(instruction ins, regNumber reg, regNumber reg1, regNumber base, var_types simdtype) | ||
void emitter::emitIns_SIMD_R_R_C( | ||
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs) | ||
{ | ||
if (UseVEXEncoding()) | ||
{ | ||
emitIns_R_R_AR(ins, emitTypeSize(simdtype), reg, reg1, base, 0); | ||
emitIns_R_R_C(ins, attr, reg, reg1, fldHnd, offs); | ||
} | ||
else | ||
{ | ||
if (reg1 != reg) | ||
{ | ||
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); | ||
emitIns_R_R(INS_movaps, attr, reg, reg1); | ||
} | ||
emitIns_R_AR(ins, emitTypeSize(simdtype), reg, base, 0); | ||
emitIns_R_C(ins, attr, reg, fldHnd, offs); | ||
} | ||
} | ||
|
||
void emitter::emitIns_SIMD_R_R_C( | ||
instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype) | ||
void emitter::emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2) | ||
{ | ||
if (UseVEXEncoding()) | ||
{ | ||
emitIns_R_R_C(ins, emitTypeSize(simdtype), reg, reg1, fldHnd, offs); | ||
emitIns_R_R_R(ins, attr, reg, reg1, reg2); | ||
} | ||
else | ||
{ | ||
if (reg1 != reg) | ||
{ | ||
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); | ||
emitIns_R_R(INS_movaps, attr, reg, reg1); | ||
} | ||
emitIns_R_C(ins, emitTypeSize(simdtype), reg, fldHnd, offs); | ||
emitIns_R_R(ins, attr, reg, reg2); | ||
} | ||
} | ||
|
||
void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype) | ||
void emitter::emitIns_SIMD_R_R_R_R( | ||
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, regNumber reg3) | ||
{ | ||
assert(isAvxBlendv(ins) || isSse41Blendv(ins)); | ||
if (UseVEXEncoding()) | ||
{ | ||
emitIns_R_R_R(ins, emitTypeSize(simdtype), reg, reg1, reg2); | ||
// convert SSE encoding of SSE4.1 instructions to VEX encoding | ||
switch (ins) | ||
{ | ||
case INS_blendvps: | ||
ins = INS_vblendvps; | ||
break; | ||
case INS_blendvpd: | ||
ins = INS_vblendvpd; | ||
break; | ||
case INS_pblendvb: | ||
ins = INS_vpblendvb; | ||
break; | ||
default: | ||
break; | ||
} | ||
emitIns_R_R_R_R(ins, attr, reg, reg1, reg2, reg3); | ||
} | ||
else | ||
{ | ||
assert(isSse41Blendv(ins)); | ||
// SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 | ||
if (reg3 != REG_XMM0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have a bug tracking this? We should probably have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I understand based on above discussion and practice, this is not a bug, the current code can assign XMM0 to reg3 for simple cases. But in some complex use cases, when reg3 cannot get XMM0, LSRA won't insert the "copy" and codgen is responsible to do it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it is by convention that when a specific register is required, the code generator is responsible for checking whether that is what was assigned, and otherwise doing the copy. It has been debated whether all these checks are more or less efficient than if the register allocator inserted copies. But for now, this is how it's done. It's usually done in codegen (rather than the emitter), I believe, but for this case this seems like the right place. |
||
{ | ||
emitIns_R_R(INS_movaps, attr, REG_XMM0, reg3); | ||
} | ||
if (reg1 != reg) | ||
{ | ||
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); | ||
emitIns_R_R(INS_movaps, attr, reg, reg1); | ||
} | ||
emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg2); | ||
emitIns_R_R(ins, attr, reg, reg2); | ||
} | ||
} | ||
|
||
void emitter::emitIns_SIMD_R_R_S(instruction ins, regNumber reg, regNumber reg1, int varx, int offs, var_types simdtype) | ||
void emitter::emitIns_SIMD_R_R_S(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs) | ||
{ | ||
if (UseVEXEncoding()) | ||
{ | ||
emitIns_R_R_S(ins, emitTypeSize(simdtype), reg, reg1, varx, offs); | ||
emitIns_R_R_S(ins, attr, reg, reg1, varx, offs); | ||
} | ||
else | ||
{ | ||
if (reg1 != reg) | ||
{ | ||
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); | ||
emitIns_R_R(INS_movaps, attr, reg, reg1); | ||
} | ||
emitIns_R_S(ins, emitTypeSize(simdtype), reg, varx, offs); | ||
emitIns_R_S(ins, attr, reg, varx, offs); | ||
} | ||
} | ||
|
||
void emitter::emitIns_SIMD_R_R_A_I( | ||
instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, int ival, var_types simdtype) | ||
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir, int ival) | ||
{ | ||
if (UseVEXEncoding()) | ||
{ | ||
emitIns_R_R_A_I(ins, emitTypeSize(simdtype), reg, reg1, indir, ival, IF_RWR_RRD_ARD_CNS); | ||
emitIns_R_R_A_I(ins, attr, reg, reg1, indir, ival, IF_RWR_RRD_ARD_CNS); | ||
} | ||
else | ||
{ | ||
if (reg1 != reg) | ||
{ | ||
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); | ||
emitIns_R_R(INS_movaps, attr, reg, reg1); | ||
} | ||
emitIns_R_A_I(ins, emitTypeSize(simdtype), reg, indir, ival); | ||
emitIns_R_A_I(ins, attr, reg, indir, ival); | ||
} | ||
} | ||
|
||
void emitter::emitIns_SIMD_R_R_C_I( | ||
instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival, var_types simdtype) | ||
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival) | ||
{ | ||
if (UseVEXEncoding()) | ||
{ | ||
emitIns_R_R_C_I(ins, emitTypeSize(simdtype), reg, reg1, fldHnd, offs, ival); | ||
emitIns_R_R_C_I(ins, attr, reg, reg1, fldHnd, offs, ival); | ||
} | ||
else | ||
{ | ||
if (reg1 != reg) | ||
{ | ||
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); | ||
emitIns_R_R(INS_movaps, attr, reg, reg1); | ||
} | ||
emitIns_R_C_I(ins, emitTypeSize(simdtype), reg, fldHnd, offs, ival); | ||
emitIns_R_C_I(ins, attr, reg, fldHnd, offs, ival); | ||
} | ||
} | ||
|
||
void emitter::emitIns_SIMD_R_R_R_I( | ||
instruction ins, regNumber reg, regNumber reg1, regNumber reg2, int ival, var_types simdtype) | ||
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, int ival) | ||
{ | ||
if (UseVEXEncoding()) | ||
{ | ||
emitIns_R_R_R_I(ins, emitTypeSize(simdtype), reg, reg1, reg2, ival); | ||
emitIns_R_R_R_I(ins, attr, reg, reg1, reg2, ival); | ||
} | ||
else | ||
{ | ||
if (reg1 != reg) | ||
{ | ||
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); | ||
emitIns_R_R(INS_movaps, attr, reg, reg1); | ||
} | ||
emitIns_R_R_I(ins, emitTypeSize(simdtype), reg, reg2, ival); | ||
emitIns_R_R_I(ins, attr, reg, reg2, ival); | ||
} | ||
} | ||
|
||
void emitter::emitIns_SIMD_R_R_S_I( | ||
instruction ins, regNumber reg, regNumber reg1, int varx, int offs, int ival, var_types simdtype) | ||
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs, int ival) | ||
{ | ||
if (UseVEXEncoding()) | ||
{ | ||
emitIns_R_R_S_I(ins, emitTypeSize(simdtype), reg, reg1, varx, offs, ival); | ||
emitIns_R_R_S_I(ins, attr, reg, reg1, varx, offs, ival); | ||
} | ||
else | ||
{ | ||
if (reg1 != reg) | ||
{ | ||
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); | ||
emitIns_R_R(INS_movaps, attr, reg, reg1); | ||
} | ||
emitIns_R_S_I(ins, emitTypeSize(simdtype), reg, varx, offs, ival); | ||
emitIns_R_S_I(ins, attr, reg, varx, offs, ival); | ||
} | ||
} | ||
#endif | ||
|
@@ -7653,6 +7726,14 @@ void emitter::emitDispIns( | |
val = emitGetInsSC(id); | ||
goto PRINT_CONSTANT; | ||
break; | ||
case IF_RWR_RRD_RRD_RRD: | ||
assert(IsAVXOnlyInstruction(ins)); | ||
assert(UseVEXEncoding()); | ||
printf("%s, ", emitRegName(id->idReg1(), attr)); | ||
printf("%s, ", emitRegName(id->idReg2(), attr)); | ||
printf("%s, ", emitRegName(id->idReg3(), attr)); | ||
printf("%s", emitRegName(id->idReg4(), attr)); | ||
break; | ||
case IF_RRW_RRW_CNS: | ||
printf("%s,", emitRegName(id->idReg1(), attr)); | ||
printf(" %s", emitRegName(id->idReg2(), attr)); | ||
|
@@ -10304,7 +10385,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) | |
|
||
instruction ins = id->idIns(); | ||
assert(IsAVXInstruction(ins)); | ||
assert(IsThreeOperandAVXInstruction(ins)); | ||
assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins)); | ||
regNumber targetReg = id->idReg1(); | ||
regNumber src1 = id->idReg2(); | ||
regNumber src2 = id->idReg3(); | ||
|
@@ -11570,6 +11651,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) | |
sz = emitSizeOfInsDsc(id); | ||
break; | ||
case IF_RWR_RRD_RRD_CNS: | ||
case IF_RWR_RRD_RRD_RRD: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it OK to treat There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we should treat these as same. There is other handling and/or encoding checks that this can potentially impact. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
At least, for JITDump, we cannot. |
||
dst = emitOutputRRR(dst, id); | ||
sz = emitSizeOfInsDsc(id); | ||
dst += emitOutputByte(dst, emitGetInsSC(id)); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NOTE: There are several others of these we will end up needing. I added a few in my
CORINFO_INTRINSIC_Round
PR (#14736), but that has yet to be merged.