Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1536,7 +1536,7 @@ class CodeGen final : public CodeGenInterface
}
};

OperandDesc genOperandDesc(GenTree* op);
OperandDesc genOperandDesc(instruction ins, GenTree* op);

void inst_TT(instruction ins, emitAttr size, GenTree* op1);
void inst_RV_TT(instruction ins, emitAttr size, regNumber op1Reg, GenTree* op2);
Expand Down
17 changes: 6 additions & 11 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,8 +437,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
}
else
{
CORINFO_FIELD_HANDLE hnd = emit->emitSimd8Const(val8);
emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
emit->emitSimdConstCompressedLoad(val, attr, targetReg);
}
break;
}
Expand All @@ -465,10 +464,9 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
}
else
{
simd16_t val16 = {};
simd_t val16 = {};
memcpy(&val16, &val12, sizeof(val12));
CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(val16);
emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
emit->emitSimdConstCompressedLoad(val, EA_16BYTE, targetReg);
}
break;
}
Expand All @@ -495,8 +493,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
}
else
{
CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(val16);
emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
emit->emitSimdConstCompressedLoad(val, attr, targetReg);
}
break;
}
Expand All @@ -523,8 +520,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
}
else
{
CORINFO_FIELD_HANDLE hnd = emit->emitSimd32Const(val32);
emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
emit->emitSimdConstCompressedLoad(val, attr, targetReg);
}
break;
}
Expand All @@ -549,8 +545,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
}
else
{
CORINFO_FIELD_HANDLE hnd = emit->emitSimd64Const(val64);
emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
emit->emitSimdConstCompressedLoad(val, attr, targetReg);
}
break;
}
Expand Down
133 changes: 120 additions & 13 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8189,35 +8189,142 @@ CORINFO_FIELD_HANDLE emitter::emitSimd16Const(simd16_t constValue)
return emitComp->eeFindJitDataOffs(cnum);
}

#if defined(TARGET_XARCH)
CORINFO_FIELD_HANDLE emitter::emitSimd32Const(simd32_t constValue)
#ifdef TARGET_XARCH
//------------------------------------------------------------------------
// emitSimdConst: Create a simd data section constant.
//
// Arguments:
// constValue - constant value
// attr - The EA_SIZE for the constant type
//
// Return Value:
// A field handle representing the data offset to access the constant.
//
// Note:
// Access to inline data is 'abstracted' by a special type of static member
// (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
// to constant data, not a real static field.
//
CORINFO_FIELD_HANDLE emitter::emitSimdConst(simd_t* constValue, emitAttr attr)
{
unsigned cnsSize = 32;
unsigned cnsAlign = cnsSize;
unsigned cnsSize = EA_SIZE(attr);
unsigned cnsAlign = cnsSize;
var_types dataType = (cnsSize >= 8) ? emitComp->getSIMDTypeForSize(cnsSize) : TYP_FLOAT;

#ifdef TARGET_XARCH
if (emitComp->compCodeOpt() == Compiler::SMALL_CODE)
{
cnsAlign = dataSection::MIN_DATA_ALIGN;
}
#endif // TARGET_XARCH

UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD32);
UNATIVE_OFFSET cnum = emitDataConst(constValue, cnsSize, cnsAlign, dataType);
return emitComp->eeFindJitDataOffs(cnum);
}

CORINFO_FIELD_HANDLE emitter::emitSimd64Const(simd64_t constValue)
//------------------------------------------------------------------------
// emitSimdConstCompressedLoad: Create a simd data section constant,
// compressing it if possible, and emit an appropiate instruction
// to load or broadcast the constant to a register.
//
// Arguments:
// constValue - constant value
// attr - The EA_SIZE for the constant type
// targetReg - The target register
//
void emitter::emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, regNumber targetReg)
{
unsigned cnsSize = 64;
unsigned cnsAlign = cnsSize;
assert(EA_SIZE(attr) >= 8 && EA_SIZE(attr) <= 64);

if (emitComp->compCodeOpt() == Compiler::SMALL_CODE)
unsigned cnsSize = EA_SIZE(attr);
unsigned dataSize = cnsSize;
instruction ins = (cnsSize == 8) ? INS_movsd_simd : INS_movups;

// Most constant vectors tend to have repeated values, so we will first check to see if
// we can replace a full vector load with a smaller broadcast.

if ((dataSize == 64) && (constValue->v256[1] == constValue->v256[0]))
{
cnsAlign = dataSection::MIN_DATA_ALIGN;
assert(emitComp->IsBaselineVector512IsaSupportedDebugOnly());
dataSize = 32;
ins = INS_vbroadcastf32x8;
}

UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD64);
return emitComp->eeFindJitDataOffs(cnum);
}
if ((dataSize == 32) && (constValue->v128[1] == constValue->v128[0]))
{
assert(emitComp->IsBaselineVector256IsaSupportedDebugOnly());
dataSize = 16;
ins = INS_vbroadcastf128;
}

if ((dataSize == 16) && (constValue->u64[1] == constValue->u64[0]))
{
if (((cnsSize == 16) && emitComp->compOpportunisticallyDependsOn(InstructionSet_SSE3)) ||
emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX))
{
dataSize = 8;
ins = (cnsSize == 16) ? INS_movddup : INS_vbroadcastsd;
}
}

// `vbroadcastss` fills the full SIMD register, so we can't do this last step if the
// original constant was smaller than a full reg (e.g. TYP_SIMD8)

if ((dataSize == 8) && (cnsSize >= 16) && (constValue->u32[1] == constValue->u32[0]))
{
if (emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX))
{
dataSize = 4;
ins = INS_vbroadcastss;
}
}

if (dataSize < cnsSize)
{
// We found a broadcast match, so emit the broadcast instruction and return.
// Here we use the original emitAttr for the instruction, because we need to
// produce a register of the original constant's size, filled with the pattern.

CORINFO_FIELD_HANDLE hnd = emitSimdConst(constValue, EA_ATTR(dataSize));
emitIns_R_C(ins, attr, targetReg, hnd, 0);
return;
}

// Otherwise, if the upper lanes and/or elements of the constant are zero, we can use a
// smaller load, because all scalar and vector memory load instructions zero the uppers.

simd32_t zeroValue = {};

if ((dataSize == 64) && (constValue->v256[1] == zeroValue))
{
dataSize = 32;
}

if ((dataSize == 32) && (constValue->v128[1] == zeroValue.v128[0]))
{
dataSize = 16;
}

if ((dataSize == 16) && (constValue->u64[1] == 0))
{
dataSize = 8;
ins = INS_movsd_simd;
}

if ((dataSize == 8) && (constValue->u32[1] == 0))
{
dataSize = 4;
ins = INS_movss;
}

// Here we set the emitAttr to the size of the actual load. It will zero extend
// up to the native SIMD register size.

attr = EA_ATTR(dataSize);

CORINFO_FIELD_HANDLE hnd = emitSimdConst(constValue, attr);
emitIns_R_C(ins, attr, targetReg, hnd, 0);
}
#endif // TARGET_XARCH

#if defined(FEATURE_MASKED_HW_INTRINSICS)
Expand Down
5 changes: 2 additions & 3 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -2639,10 +2639,9 @@ class emitter
CORINFO_FIELD_HANDLE emitSimd8Const(simd8_t constValue);
CORINFO_FIELD_HANDLE emitSimd16Const(simd16_t constValue);
#if defined(TARGET_XARCH)
CORINFO_FIELD_HANDLE emitSimd32Const(simd32_t constValue);
CORINFO_FIELD_HANDLE emitSimd64Const(simd64_t constValue);
CORINFO_FIELD_HANDLE emitSimdConst(simd_t* constValue, emitAttr attr);
void emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, regNumber targetReg);
#endif // TARGET_XARCH

#if defined(FEATURE_MASKED_HW_INTRINSICS)
CORINFO_FIELD_HANDLE emitSimdMaskConst(simdmask_t constValue);
#endif // FEATURE_MASKED_HW_INTRINSICS
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7331,6 +7331,7 @@ bool emitter::IsMovInstruction(instruction ins)
case INS_vmovdqu8:
case INS_vmovdqu16:
case INS_vmovdqu64:
case INS_movq:
case INS_movsd_simd:
case INS_movss:
case INS_movsx:
Expand All @@ -7350,7 +7351,6 @@ bool emitter::IsMovInstruction(instruction ins)
}

#if defined(TARGET_AMD64)
case INS_movq:
case INS_movsxd:
{
return true;
Expand Down Expand Up @@ -7501,14 +7501,14 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size)
break;
}

#if defined(TARGET_AMD64)
case INS_movq:
{
// Clears the upper bits
hasSideEffect = true;
break;
}

#if defined(TARGET_AMD64)
case INS_movsxd:
{
// Sign-extends the source
Expand Down Expand Up @@ -7781,13 +7781,13 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN
break;
}

#if defined(TARGET_AMD64)
case INS_movq:
{
assert(isFloatReg(dstReg) && isFloatReg(srcReg));
break;
}

#if defined(TARGET_AMD64)
case INS_movsxd:
{
assert(isGeneralRegister(dstReg) && isGeneralRegister(srcReg));
Expand Down
3 changes: 0 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28277,9 +28277,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_AVX2_ConvertToVector256Int16:
case NI_AVX2_ConvertToVector256Int32:
case NI_AVX2_ConvertToVector256Int64:
case NI_AVX2_BroadcastVector128ToVector256:
case NI_AVX512F_BroadcastVector128ToVector512:
case NI_AVX512F_BroadcastVector256ToVector512:
if (GetAuxiliaryJitType() == CORINFO_TYPE_PTR)
{
addr = Op(1);
Expand Down
3 changes: 0 additions & 3 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2078,9 +2078,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
case NI_AVX2_ConvertToVector256Int16:
case NI_AVX2_ConvertToVector256Int32:
case NI_AVX2_ConvertToVector256Int64:
case NI_AVX2_BroadcastVector128ToVector256:
case NI_AVX512F_BroadcastVector128ToVector512:
case NI_AVX512F_BroadcastVector256ToVector512:
{
// These intrinsics have both pointer and vector overloads
// We want to be able to differentiate between them so lets
Expand Down
22 changes: 12 additions & 10 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,7 @@ void CodeGen::genHWIntrinsic_R_RM(
instOptions = AddEmbBroadcastMode(instOptions);
}

OperandDesc rmOpDesc = genOperandDesc(rmOp);
OperandDesc rmOpDesc = genOperandDesc(ins, rmOp);

if (((instOptions & INS_OPTS_EVEX_b_MASK) != 0) && (rmOpDesc.GetKind() == OperandKind::Reg))
{
Expand Down Expand Up @@ -1361,7 +1361,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins,
instOptions = AddEmbBroadcastMode(instOptions);
}

OperandDesc op2Desc = genOperandDesc(op2);
OperandDesc op2Desc = genOperandDesc(ins, op2);

if (op2Desc.IsContained())
{
Expand Down Expand Up @@ -1431,7 +1431,7 @@ void CodeGen::genHWIntrinsic_R_R_R_RM(instruction ins,
instOptions = AddEmbBroadcastMode(instOptions);
}

OperandDesc op3Desc = genOperandDesc(op3);
OperandDesc op3Desc = genOperandDesc(ins, op3);

if (((instOptions & INS_OPTS_EVEX_b_MASK) != 0) && (op3Desc.GetKind() == OperandKind::Reg))
{
Expand Down Expand Up @@ -1547,7 +1547,7 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I(
instOptions = AddEmbBroadcastMode(instOptions);
}

OperandDesc op3Desc = genOperandDesc(op3);
OperandDesc op3Desc = genOperandDesc(ins, op3);

switch (op3Desc.GetKind())
{
Expand Down Expand Up @@ -1898,11 +1898,15 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
op1 = loPart;
}

ins = INS_movq;
baseAttr = EA_8BYTE;
}
#endif // TARGET_X86

if (op1->isUsedFromMemory() && (baseAttr == EA_8BYTE))
{
ins = INS_movq;
}

genHWIntrinsic_R_RM(node, ins, baseAttr, targetReg, op1, instOptions);
}
else
Expand Down Expand Up @@ -1952,7 +1956,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
else
{
// `movq xmm xmm` zeroes the upper 64 bits.
genHWIntrinsic_R_RM(node, INS_movq, attr, targetReg, op1, instOptions);
emit->emitIns_Mov(INS_movq, attr, targetReg, op1Reg, /* canSkip */ false);
}
break;
}
Expand Down Expand Up @@ -2281,10 +2285,8 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
{
minValueInt.i32[i] = INT_MIN;
}
CORINFO_FIELD_HANDLE minValueFld = typeSize == EA_16BYTE ? emit->emitSimd16Const(minValueInt.v128[0])
: emit->emitSimd32Const(minValueInt.v256[0]);
CORINFO_FIELD_HANDLE negOneFld = typeSize == EA_16BYTE ? emit->emitSimd16Const(negOneIntVec.v128[0])
: emit->emitSimd32Const(negOneIntVec.v256[0]);
CORINFO_FIELD_HANDLE minValueFld = emit->emitSimdConst(&minValueInt, typeSize);
CORINFO_FIELD_HANDLE negOneFld = emit->emitSimdConst(&negOneIntVec, typeSize);

// div-by-zero check
emit->emitIns_SIMD_R_R_R(INS_xorpd, typeSize, tmpReg1, tmpReg1, tmpReg1, instOptions);
Expand Down
Loading
Loading