Skip to content

Commit 0d60428

Browse files
authored
ARM64-SVE: gathervector extends (#103370)
* ARM64-SVE: gathervector extends * Comment out 32bit address APIs * Replace triple quotes with doubles * restore commented API * Set HW_Category_MemoryLoad for all gatherloads * Fix cast checking for all load types * Remove GT_CAST changes
1 parent f8f4509 commit 0d60428

File tree

10 files changed

+1519
-48
lines changed

10 files changed

+1519
-48
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27511,6 +27511,27 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
2751127511
case NI_Sve_PrefetchInt64:
2751227512
addr = Op(2);
2751327513
break;
27514+
27515+
case NI_Sve_GatherVector:
27516+
case NI_Sve_GatherVectorByteZeroExtend:
27517+
case NI_Sve_GatherVectorInt16SignExtend:
27518+
case NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend:
27519+
case NI_Sve_GatherVectorInt32SignExtend:
27520+
case NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend:
27521+
case NI_Sve_GatherVectorSByteSignExtend:
27522+
case NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend:
27523+
case NI_Sve_GatherVectorUInt16ZeroExtend:
27524+
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend:
27525+
case NI_Sve_GatherVectorUInt32ZeroExtend:
27526+
addr = Op(2);
27527+
if (varTypeIsSIMD(addr->gtType))
27528+
{
27529+
// The address is a vector of addresses.
27530+
// Return true, but do not set pAddr.
27531+
return true;
27532+
}
27533+
break;
27534+
2751427535
#endif // TARGET_ARM64
2751527536

2751627537
default:

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,19 +1601,36 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
16011601
: gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType,
16021602
simdSize);
16031603

1604-
#ifdef TARGET_XARCH
1605-
if ((intrinsic == NI_AVX2_GatherVector128) || (intrinsic == NI_AVX2_GatherVector256))
1604+
switch (intrinsic)
16061605
{
1607-
assert(varTypeIsSIMD(op2->TypeGet()));
1608-
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd));
1609-
}
1606+
#if defined(TARGET_XARCH)
1607+
case NI_AVX2_GatherVector128:
1608+
case NI_AVX2_GatherVector256:
1609+
assert(varTypeIsSIMD(op2->TypeGet()));
1610+
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd));
1611+
break;
1612+
16101613
#elif defined(TARGET_ARM64)
1611-
if (intrinsic == NI_Sve_GatherVector)
1612-
{
1613-
assert(varTypeIsSIMD(op3->TypeGet()));
1614-
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd));
1615-
}
1614+
case NI_Sve_GatherVector:
1615+
case NI_Sve_GatherVectorByteZeroExtend:
1616+
case NI_Sve_GatherVectorInt16SignExtend:
1617+
case NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend:
1618+
case NI_Sve_GatherVectorInt32SignExtend:
1619+
case NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend:
1620+
case NI_Sve_GatherVectorSByteSignExtend:
1621+
case NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend:
1622+
case NI_Sve_GatherVectorUInt16ZeroExtend:
1623+
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend:
1624+
case NI_Sve_GatherVectorUInt32ZeroExtend:
1625+
assert(varTypeIsSIMD(op3->TypeGet()));
1626+
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd));
1627+
break;
16161628
#endif
1629+
1630+
default:
1631+
break;
1632+
}
1633+
16171634
break;
16181635
}
16191636

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1874,33 +1874,46 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
18741874
}
18751875

18761876
case NI_Sve_GatherVector:
1877+
case NI_Sve_GatherVectorByteZeroExtend:
1878+
case NI_Sve_GatherVectorInt16SignExtend:
1879+
case NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend:
1880+
case NI_Sve_GatherVectorInt32SignExtend:
1881+
case NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend:
1882+
case NI_Sve_GatherVectorSByteSignExtend:
1883+
case NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend:
1884+
case NI_Sve_GatherVectorUInt16ZeroExtend:
1885+
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend:
1886+
case NI_Sve_GatherVectorUInt32ZeroExtend:
18771887
{
18781888
if (!varTypeIsSIMD(intrin.op2->gtType))
18791889
{
1880-
// GatherVector(Vector<T> mask, T* address, Vector<T2> indices)
1890+
// GatherVector...(Vector<T> mask, T* address, Vector<T2> indices)
18811891

18821892
assert(intrin.numOperands == 3);
18831893
emitAttr baseSize = emitActualTypeSize(intrin.baseType);
18841894

18851895
if (baseSize == EA_8BYTE)
18861896
{
1887-
// Index is multiplied by 8
1888-
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt,
1889-
INS_SCALABLE_OPTS_LSL_N);
1897+
// Index is multiplied.
1898+
insScalableOpts sopt = (ins == INS_sve_ld1b || ins == INS_sve_ld1sb) ? INS_SCALABLE_OPTS_NONE
1899+
: INS_SCALABLE_OPTS_LSL_N;
1900+
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt, sopt);
18901901
}
18911902
else
18921903
{
1893-
// Index is sign or zero extended to 64bits, then multiplied by 4
1904+
// Index is sign or zero extended to 64bits, then multiplied.
18941905
assert(baseSize == EA_4BYTE);
18951906
opt = varTypeIsUnsigned(node->GetAuxiliaryType()) ? INS_OPTS_SCALABLE_S_UXTW
18961907
: INS_OPTS_SCALABLE_S_SXTW;
1897-
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt,
1898-
INS_SCALABLE_OPTS_MOD_N);
1908+
1909+
insScalableOpts sopt = (ins == INS_sve_ld1b || ins == INS_sve_ld1sb) ? INS_SCALABLE_OPTS_NONE
1910+
: INS_SCALABLE_OPTS_MOD_N;
1911+
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt, sopt);
18991912
}
19001913
}
19011914
else
19021915
{
1903-
// GatherVector(Vector<T> mask, Vector<T2> addresses)
1916+
// GatherVector...(Vector<T> mask, Vector<T2> addresses)
19041917

19051918
assert(intrin.numOperands == 2);
19061919
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt);

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,17 @@ HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated,
7575
HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
7676
HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation)
7777
HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
78-
HARDWARE_INTRINSIC(Sve, GatherVector, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
78+
HARDWARE_INTRINSIC(Sve, GatherVector, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
79+
HARDWARE_INTRINSIC(Sve, GatherVectorByteZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
80+
HARDWARE_INTRINSIC(Sve, GatherVectorInt16SignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
81+
HARDWARE_INTRINSIC(Sve, GatherVectorInt16WithByteOffsetsSignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
82+
HARDWARE_INTRINSIC(Sve, GatherVectorInt32SignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
83+
HARDWARE_INTRINSIC(Sve, GatherVectorInt32WithByteOffsetsSignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
84+
HARDWARE_INTRINSIC(Sve, GatherVectorSByteSignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_sve_ld1sb, INS_sve_ld1sb, INS_sve_ld1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
85+
HARDWARE_INTRINSIC(Sve, GatherVectorUInt16WithByteOffsetsZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
86+
HARDWARE_INTRINSIC(Sve, GatherVectorUInt16ZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
87+
HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
88+
HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
7989
HARDWARE_INTRINSIC(Sve, GetActiveElementCount, -1, 2, true, {INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation)
8090
HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
8191
HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)

0 commit comments

Comments
 (0)