Skip to content

Commit 621dc68

Browse files
Implement StoreSelectedScalar128x2 for Arm64
1 parent 9f4884c commit 621dc68

File tree

11 files changed

+621
-5
lines changed

11 files changed

+621
-5
lines changed

src/coreclr/jit/emitarm64.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5590,10 +5590,23 @@ void emitter::emitIns_R_R_I(
55905590
isLdSt = true;
55915591
break;
55925592

5593+
case INS_st2:
5594+
assert(isVectorRegister(reg1));
5595+
assert(isGeneralRegisterOrSP(reg2));
5596+
assert(insOptsNone(opt));
5597+
5598+
reg2 = encodingSPtoZR(reg2);
5599+
elemsize = size;
5600+
assert(isValidVectorElemsize(elemsize));
5601+
assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
5602+
5603+
// Load/Store single structure base register
5604+
fmt = IF_LS_2F;
5605+
break;
5606+
55935607
case INS_ld2:
55945608
case INS_ld3:
55955609
case INS_ld4:
5596-
case INS_st2:
55975610
case INS_st3:
55985611
case INS_st4:
55995612
assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,10 @@ void HWIntrinsicInfo::lookupImmBounds(
254254
case NI_AdvSimd_InsertScalar:
255255
case NI_AdvSimd_LoadAndInsertScalar:
256256
case NI_AdvSimd_StoreSelectedScalar:
257+
case NI_AdvSimd_StoreSelectedScalar64x2:
257258
case NI_AdvSimd_Arm64_DuplicateSelectedScalarToVector128:
258259
case NI_AdvSimd_Arm64_InsertSelectedScalar:
260+
case NI_AdvSimd_Arm64_StoreSelectedScalar128x2:
259261
immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1;
260262
break;
261263

@@ -1738,16 +1740,29 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
17381740
break;
17391741
}
17401742

1743+
case NI_AdvSimd_StoreSelectedScalar64x2:
17411744
case NI_AdvSimd_StoreVector64x2:
1745+
case NI_AdvSimd_Arm64_StoreSelectedScalar128x2:
17421746
case NI_AdvSimd_Arm64_StoreVector128x2:
17431747
{
1744-
assert(sig->numArgs == 2);
17451748
assert(retType == TYP_VOID);
17461749

17471750
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
17481751
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
17491752
var_types argType = TYP_UNKNOWN;
17501753
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
1754+
const bool isSingleStructStore = sig->numArgs == 3;
1755+
1756+
if (isSingleStructStore)
1757+
{
1758+
CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
1759+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
1760+
op3 = impPopStack().val;
1761+
}
1762+
else
1763+
{
1764+
assert(sig->numArgs == 2);
1765+
}
17511766

17521767
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
17531768
op2 = impPopStack().val;
@@ -1768,15 +1783,23 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
17681783

17691784
if (!op2->OperIs(GT_LCL_VAR))
17701785
{
1771-
unsigned tmp = lvaGrabTemp(true DEBUGARG("StoreVectorNx2 temp tree"));
1786+
unsigned tmp = lvaGrabTemp(true DEBUGARG( isSingleStructStore ? "StoreSelectedScalarNx2 temp tree" : "StoreVectorNx2 temp tree"));
17721787

17731788
impStoreTemp(tmp, op2, CHECK_SPILL_NONE);
17741789
op2 = gtNewLclvNode(tmp, argType);
17751790
}
17761791
op2 = gtConvertTableOpToFieldList(op2, fieldCount);
17771792

17781793
info.compNeedsConsecutiveRegisters = true;
1779-
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
1794+
1795+
if (isSingleStructStore)
1796+
{
1797+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
1798+
}
1799+
else
1800+
{
1801+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
1802+
}
17801803
break;
17811804
}
17821805

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
764764
GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), op2Reg, op3Reg, op1Reg);
765765
break;
766766

767+
case NI_AdvSimd_StoreSelectedScalar64x2:
768+
case NI_AdvSimd_Arm64_StoreSelectedScalar128x2:
769+
{
770+
assert(intrin.op2->OperIsFieldList());
771+
772+
GenTreeFieldList* fieldList = intrin.op2->AsFieldList();
773+
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
774+
op2Reg = firstField->GetRegNum();
775+
776+
HWIntrinsicImmOpHelper helper(this, intrin.op3, node);
777+
778+
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
779+
{
780+
const int elementIndex = helper.ImmValue();
781+
782+
GetEmitter()->emitIns_R_R_I(ins, emitSize, op2Reg, op1Reg, elementIndex, opt);
783+
}
784+
break;
785+
}
786+
767787
case NI_AdvSimd_StoreVector64x2:
768788
case NI_AdvSimd_Arm64_StoreVector128x2:
769789
{

src/coreclr/jit/hwintrinsiclistarm64.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper,
473473
HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar)
474474
HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen)
475475
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, -1, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
476+
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar64x2, 8, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_SIMDScalar|HW_Flag_HasImmediateOperand)
476477
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2, 8, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
477478
HARDWARE_INTRINSIC(AdvSimd, Subtract, -1, 2, true, {INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_fsub, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
478479
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingLower, 8, 2, true, {INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
@@ -658,6 +659,7 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePair,
658659
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalar, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stp, INS_stp, INS_invalid, INS_invalid, INS_stp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
659660
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stnp, INS_stnp, INS_invalid, INS_invalid, INS_stnp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
660661
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
662+
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar128x2, 16, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_SIMDScalar|HW_Flag_HasImmediateOperand)
661663
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
662664
HARDWARE_INTRINSIC(AdvSimd_Arm64, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsub}, HW_Category_SIMD, HW_Flag_NoFlag)
663665
HARDWARE_INTRINSIC(AdvSimd_Arm64, SubtractSaturateScalar, 8, 2, true, {INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar)

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3025,6 +3025,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
30253025
case NI_AdvSimd_ExtractVector64:
30263026
case NI_AdvSimd_ExtractVector128:
30273027
case NI_AdvSimd_StoreSelectedScalar:
3028+
case NI_AdvSimd_StoreSelectedScalar64x2:
3029+
case NI_AdvSimd_Arm64_StoreSelectedScalar128x2:
30283030
assert(hasImmediateOperand);
30293031
assert(varTypeIsIntegral(intrin.op3));
30303032
if (intrin.op3->IsCnsIntOrI())

src/coreclr/jit/lsraarm64.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,6 +1416,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
14161416
case NI_AdvSimd_ExtractVector64:
14171417
case NI_AdvSimd_ExtractVector128:
14181418
case NI_AdvSimd_StoreSelectedScalar:
1419+
case NI_AdvSimd_StoreSelectedScalar64x2:
1420+
case NI_AdvSimd_Arm64_StoreSelectedScalar128x2:
14191421
needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
14201422
break;
14211423

@@ -1558,16 +1560,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
15581560
{
15591561
case NI_AdvSimd_VectorTableLookup:
15601562
case NI_AdvSimd_Arm64_VectorTableLookup:
1563+
{
15611564
assert(intrin.op2 != nullptr);
15621565
srcCount += BuildOperandUses(intrin.op2);
15631566
assert(dstCount == 1);
15641567
buildInternalRegisterUses();
15651568
BuildDef(intrinsicTree);
15661569
*pDstCount = 1;
15671570
break;
1571+
}
15681572

15691573
case NI_AdvSimd_VectorTableLookupExtension:
15701574
case NI_AdvSimd_Arm64_VectorTableLookupExtension:
1575+
{
15711576
assert(intrin.op2 != nullptr);
15721577
assert(intrin.op3 != nullptr);
15731578
assert(isRMW);
@@ -1578,14 +1583,35 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
15781583
BuildDef(intrinsicTree);
15791584
*pDstCount = 1;
15801585
break;
1586+
}
1587+
1588+
case NI_AdvSimd_StoreSelectedScalar64x2:
1589+
case NI_AdvSimd_Arm64_StoreSelectedScalar128x2:
1590+
{
1591+
assert(intrin.op1 != nullptr);
1592+
assert(intrin.op3 != nullptr);
1593+
srcCount += BuildConsecutiveRegistersForUse(intrin.op2);
1594+
if (!intrin.op3->isContainedIntOrIImmed())
1595+
{
1596+
srcCount += BuildOperandUses(intrin.op3);
1597+
}
1598+
assert(dstCount == 0);
1599+
buildInternalRegisterUses();
1600+
*pDstCount = 0;
1601+
break;
1602+
}
1603+
15811604
case NI_AdvSimd_StoreVector64x2:
15821605
case NI_AdvSimd_Arm64_StoreVector128x2:
1606+
{
15831607
assert(intrin.op1 != nullptr);
15841608
srcCount += BuildConsecutiveRegistersForUse(intrin.op2);
15851609
assert(dstCount == 0);
15861610
buildInternalRegisterUses();
15871611
*pDstCount = 0;
15881612
break;
1613+
}
1614+
15891615
case NI_AdvSimd_LoadVector64x2:
15901616
case NI_AdvSimd_LoadVector64x3:
15911617
case NI_AdvSimd_LoadVector64x4:

0 commit comments

Comments
 (0)