Skip to content

Commit f818517

Browse files
Add support for Sve.Store() (#102262)
* Add support for Sve.Store() * Fix formatting issues * Remove incorrect instructions from comment * Rename Sve.Store() -> Sve.StoreAndZip() * Refactor test templates
1 parent 6e43b6a commit f818517

File tree

13 files changed

+2221
-2
lines changed

13 files changed

+2221
-2
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26706,7 +26706,14 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore(GenTree** pAddr) const
2670626706
case NI_SSE2_MaskMove:
2670726707
addr = Op(3);
2670826708
break;
26709-
#endif // TARGET_XARCH
26709+
#elif defined(TARGET_ARM64)
26710+
case NI_Sve_StoreAndZip:
26711+
case NI_Sve_StoreAndZipx2:
26712+
case NI_Sve_StoreAndZipx3:
26713+
case NI_Sve_StoreAndZipx4:
26714+
addr = Op(2);
26715+
break;
26716+
#endif // TARGET_ARM64
2671026717

2671126718
default:
2671226719
addr = Op(1);

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,6 +2426,59 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
24262426
break;
24272427
}
24282428

2429+
case NI_Sve_StoreAndZip:
2430+
{
2431+
assert(sig->numArgs == 3);
2432+
assert(retType == TYP_VOID);
2433+
2434+
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
2435+
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
2436+
CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
2437+
var_types argType = TYP_UNKNOWN;
2438+
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
2439+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
2440+
op3 = impPopStack().val;
2441+
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);
2442+
2443+
if (op3->TypeGet() == TYP_STRUCT)
2444+
{
2445+
info.compNeedsConsecutiveRegisters = true;
2446+
switch (fieldCount)
2447+
{
2448+
case 2:
2449+
intrinsic = NI_Sve_StoreAndZipx2;
2450+
break;
2451+
2452+
case 3:
2453+
intrinsic = NI_Sve_StoreAndZipx3;
2454+
break;
2455+
2456+
case 4:
2457+
intrinsic = NI_Sve_StoreAndZipx4;
2458+
break;
2459+
2460+
default:
2461+
assert("unsupported");
2462+
}
2463+
2464+
if (!op3->OperIs(GT_LCL_VAR))
2465+
{
2466+
unsigned tmp = lvaGrabTemp(true DEBUGARG("SveStoreN"));
2467+
2468+
impStoreToTemp(tmp, op3, CHECK_SPILL_NONE);
2469+
op3 = gtNewLclvNode(tmp, argType);
2470+
}
2471+
op3 = gtConvertTableOpToFieldList(op3, fieldCount);
2472+
}
2473+
2474+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
2475+
op2 = getArgForHWIntrinsic(argType, argClass);
2476+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
2477+
op1 = getArgForHWIntrinsic(argType, argClass);
2478+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
2479+
break;
2480+
}
2481+
24292482
default:
24302483
{
24312484
return nullptr;

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,67 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
13481348
}
13491349
break;
13501350

1351+
case NI_Sve_StoreAndZipx2:
1352+
case NI_Sve_StoreAndZipx3:
1353+
case NI_Sve_StoreAndZipx4:
1354+
{
1355+
assert(intrin.op3->OperIsFieldList());
1356+
GenTreeFieldList* fieldList = intrin.op3->AsFieldList();
1357+
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
1358+
op3Reg = firstField->GetRegNum();
1359+
1360+
#ifdef DEBUG
1361+
unsigned regCount = 0;
1362+
regNumber argReg = op3Reg;
1363+
for (GenTreeFieldList::Use& use : fieldList->Uses())
1364+
{
1365+
regCount++;
1366+
1367+
GenTree* argNode = use.GetNode();
1368+
assert(argReg == argNode->GetRegNum());
1369+
argReg = getNextSIMDRegWithWraparound(argReg);
1370+
}
1371+
1372+
switch (ins)
1373+
{
1374+
case INS_sve_st2b:
1375+
case INS_sve_st2d:
1376+
case INS_sve_st2h:
1377+
case INS_sve_st2w:
1378+
case INS_sve_st2q:
1379+
assert(regCount == 2);
1380+
break;
1381+
1382+
case INS_sve_st3b:
1383+
case INS_sve_st3d:
1384+
case INS_sve_st3h:
1385+
case INS_sve_st3w:
1386+
case INS_sve_st3q:
1387+
assert(regCount == 3);
1388+
break;
1389+
1390+
case INS_sve_st4b:
1391+
case INS_sve_st4d:
1392+
case INS_sve_st4h:
1393+
case INS_sve_st4w:
1394+
case INS_sve_st4q:
1395+
assert(regCount == 4);
1396+
break;
1397+
1398+
default:
1399+
unreached();
1400+
}
1401+
#endif
1402+
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, op3Reg, op1Reg, op2Reg, 0, opt);
1403+
break;
1404+
}
1405+
1406+
case NI_Sve_StoreAndZip:
1407+
{
1408+
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, op3Reg, op1Reg, op2Reg, 0, opt);
1409+
break;
1410+
}
1411+
13511412
case NI_Vector64_ToVector128:
13521413
GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false);
13531414
break;

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ HARDWARE_INTRINSIC(Sve, SignExtend32,
114114
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
115115
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
116116
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
117+
HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
117118
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
118119
HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, true, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
119120
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
@@ -138,6 +139,9 @@ HARDWARE_INTRINSIC(Sve, ZipLow,
138139
HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, true, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation)
139140
HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, true, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation)
140141
HARDWARE_INTRINSIC(Sve, CreateTrueMaskAll, -1, -1, false, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask)
142+
HARDWARE_INTRINSIC(Sve, StoreAndZipx2, -1, 3, true, {INS_sve_st2b, INS_sve_st2b, INS_sve_st2h, INS_sve_st2h, INS_sve_st2w, INS_sve_st2w, INS_sve_st2d, INS_sve_st2d, INS_sve_st2w, INS_sve_st2d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
143+
HARDWARE_INTRINSIC(Sve, StoreAndZipx3, -1, 3, true, {INS_sve_st3b, INS_sve_st3b, INS_sve_st3h, INS_sve_st3h, INS_sve_st3w, INS_sve_st3w, INS_sve_st3d, INS_sve_st3d, INS_sve_st3w, INS_sve_st3d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
144+
HARDWARE_INTRINSIC(Sve, StoreAndZipx4, -1, 3, true, {INS_sve_st4b, INS_sve_st4b, INS_sve_st4h, INS_sve_st4h, INS_sve_st4w, INS_sve_st4w, INS_sve_st4d, INS_sve_st4d, INS_sve_st4w, INS_sve_st4d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
141145

142146

143147
#endif // FEATURE_HW_INTRINSIC

src/coreclr/jit/lsraarm64.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1758,6 +1758,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
17581758
break;
17591759
}
17601760

1761+
case NI_Sve_StoreAndZipx2:
1762+
case NI_Sve_StoreAndZipx3:
1763+
case NI_Sve_StoreAndZipx4:
1764+
{
1765+
assert(intrin.op2 != nullptr);
1766+
assert(intrin.op3 != nullptr);
1767+
srcCount += BuildAddrUses(intrin.op2);
1768+
srcCount += BuildConsecutiveRegistersForUse(intrin.op3);
1769+
assert(dstCount == 0);
1770+
buildInternalRegisterUses();
1771+
*pDstCount = 0;
1772+
break;
1773+
}
1774+
17611775
default:
17621776
noway_assert(!"Not a supported as multiple consecutive register intrinsic");
17631777
}
@@ -1894,6 +1908,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
18941908
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
18951909
}
18961910
}
1911+
else if (intrin.id == NI_Sve_StoreAndZip)
1912+
{
1913+
srcCount += BuildAddrUses(intrin.op2);
1914+
}
18971915
else
18981916
{
18991917
regMaskTP candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;

0 commit comments

Comments
 (0)