Skip to content

Commit ab013a3

Browse files
Arm AdvSimd: Rename StoreVectorMxN to Store (#103689)
* Arm AdvSimd: Rename StoreVectorMxN to Store * Restore HW_Flag_InvalidNodeId flag for AdvSimd.Store node * Rename StoreVectorMxN to Store for Mono * Incorporate review comments and remove InvalidNodeId flag --------- Co-authored-by: fanyang-mono <yangfan@microsoft.com>
1 parent 117cfcc commit ab013a3

File tree

12 files changed

+311
-354
lines changed

12 files changed

+311
-354
lines changed

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 51 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1909,22 +1909,63 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
19091909
}
19101910

19111911
case NI_AdvSimd_Store:
1912+
case NI_AdvSimd_Arm64_Store:
19121913
{
1913-
assert(retType == TYP_VOID);
1914-
assert(sig->numArgs == 2);
1914+
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
1915+
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
1916+
var_types argType = TYP_UNKNOWN;
1917+
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
1918+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
1919+
op2 = impPopStack().val;
19151920

1916-
var_types simdType = getSIMDTypeForSize(simdSize);
1921+
if (op2->TypeGet() == TYP_STRUCT)
1922+
{
1923+
info.compNeedsConsecutiveRegisters = true;
1924+
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);
19171925

1918-
op2 = impSIMDPopStack();
1919-
op1 = impPopStack().val;
1926+
if (!op2->OperIs(GT_LCL_VAR))
1927+
{
1928+
unsigned tmp = lvaGrabTemp(true DEBUGARG("StoreVectorN"));
19201929

1921-
if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF))
1922-
{
1923-
// If what we have is a BYREF, that's what we really want, so throw away the cast.
1924-
op1 = op1->gtGetOp1();
1930+
impStoreToTemp(tmp, op2, CHECK_SPILL_NONE);
1931+
op2 = gtNewLclvNode(tmp, argType);
1932+
}
1933+
op2 = gtConvertTableOpToFieldList(op2, fieldCount);
1934+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
1935+
op1 = getArgForHWIntrinsic(argType, argClass);
1936+
1937+
if (op1->OperIs(GT_CAST))
1938+
{
1939+
// Although the API specifies a pointer, if what we have is a BYREF, that's what
1940+
// we really want, so throw away the cast.
1941+
if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
1942+
{
1943+
op1 = op1->gtGetOp1();
1944+
}
1945+
}
1946+
1947+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
19251948
}
1949+
else
1950+
{
1951+
if (op2->TypeGet() == TYP_SIMD16)
1952+
{
1953+
// Update the simdSize explicitly as Vector128 variant of Store() is present in AdvSimd instead of
1954+
// AdvSimd.Arm64.
1955+
simdSize = 16;
1956+
}
1957+
1958+
var_types simdType = getSIMDTypeForSize(simdSize);
1959+
op1 = impPopStack().val;
1960+
1961+
if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF))
1962+
{
1963+
// If what we have is a BYREF, that's what we really want, so throw away the cast.
1964+
op1 = op1->gtGetOp1();
1965+
}
19261966

1927-
retNode = gtNewSimdStoreNode(op1, op2, simdBaseJitType, simdSize);
1967+
retNode = gtNewSimdStoreNode(op1, op2, simdBaseJitType, simdSize);
1968+
}
19281969
break;
19291970
}
19301971

@@ -2080,52 +2121,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
20802121
break;
20812122
}
20822123

2083-
case NI_AdvSimd_StoreVector64x2:
2084-
case NI_AdvSimd_StoreVector64x3:
2085-
case NI_AdvSimd_StoreVector64x4:
2086-
case NI_AdvSimd_Arm64_StoreVector128x2:
2087-
case NI_AdvSimd_Arm64_StoreVector128x3:
2088-
case NI_AdvSimd_Arm64_StoreVector128x4:
2089-
{
2090-
assert(sig->numArgs == 2);
2091-
assert(retType == TYP_VOID);
2092-
2093-
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
2094-
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
2095-
var_types argType = TYP_UNKNOWN;
2096-
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
2097-
2098-
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
2099-
op2 = impPopStack().val;
2100-
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);
2101-
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
2102-
op1 = getArgForHWIntrinsic(argType, argClass);
2103-
2104-
assert(op2->TypeGet() == TYP_STRUCT);
2105-
if (op1->OperIs(GT_CAST))
2106-
{
2107-
// Although the API specifies a pointer, if what we have is a BYREF, that's what
2108-
// we really want, so throw away the cast.
2109-
if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
2110-
{
2111-
op1 = op1->gtGetOp1();
2112-
}
2113-
}
2114-
2115-
if (!op2->OperIs(GT_LCL_VAR))
2116-
{
2117-
unsigned tmp = lvaGrabTemp(true DEBUGARG("StoreVectorNx2 temp tree"));
2118-
2119-
impStoreToTemp(tmp, op2, CHECK_SPILL_NONE);
2120-
op2 = gtNewLclvNode(tmp, argType);
2121-
}
2122-
op2 = gtConvertTableOpToFieldList(op2, fieldCount);
2123-
2124-
info.compNeedsConsecutiveRegisters = true;
2125-
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
2126-
break;
2127-
}
2128-
21292124
case NI_AdvSimd_StoreSelectedScalar:
21302125
case NI_AdvSimd_Arm64_StoreSelectedScalar:
21312126
{

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,40 +1280,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
12801280
break;
12811281
}
12821282

1283-
case NI_AdvSimd_StoreVector64x2:
1284-
case NI_AdvSimd_StoreVector64x3:
1285-
case NI_AdvSimd_StoreVector64x4:
1286-
case NI_AdvSimd_Arm64_StoreVector128x2:
1287-
case NI_AdvSimd_Arm64_StoreVector128x3:
1288-
case NI_AdvSimd_Arm64_StoreVector128x4:
1289-
{
1290-
unsigned regCount = 0;
1291-
1292-
assert(intrin.op2->OperIsFieldList());
1293-
1294-
GenTreeFieldList* fieldList = intrin.op2->AsFieldList();
1295-
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
1296-
op2Reg = firstField->GetRegNum();
1297-
1298-
#ifdef DEBUG
1299-
regNumber argReg = op2Reg;
1300-
for (GenTreeFieldList::Use& use : fieldList->Uses())
1301-
{
1302-
regCount++;
1303-
1304-
GenTree* argNode = use.GetNode();
1305-
assert(argReg == argNode->GetRegNum());
1306-
argReg = getNextSIMDRegWithWraparound(argReg);
1307-
}
1308-
assert((ins == INS_st1_2regs && regCount == 2) || (ins == INS_st2 && regCount == 2) ||
1309-
(ins == INS_st1_3regs && regCount == 3) || (ins == INS_st3 && regCount == 3) ||
1310-
(ins == INS_st1_4regs && regCount == 4) || (ins == INS_st4 && regCount == 4));
1311-
#endif
1312-
1313-
GetEmitter()->emitIns_R_R(ins, emitSize, op2Reg, op1Reg, opt);
1314-
break;
1315-
}
1316-
1283+
case NI_AdvSimd_Store:
1284+
case NI_AdvSimd_Arm64_Store:
13171285
case NI_AdvSimd_StoreVectorAndZip:
13181286
case NI_AdvSimd_Arm64_StoreVectorAndZip:
13191287
{
@@ -1336,24 +1304,24 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
13361304
#endif
13371305
}
13381306

1307+
bool isSequentialStore = (intrin.id == NI_AdvSimd_Arm64_Store || intrin.id == NI_AdvSimd_Store);
13391308
switch (regCount)
13401309
{
13411310
case 2:
1342-
ins = INS_st2;
1311+
ins = isSequentialStore ? INS_st1_2regs : INS_st2;
13431312
break;
13441313

13451314
case 3:
1346-
ins = INS_st3;
1315+
ins = isSequentialStore ? INS_st1_3regs : INS_st3;
13471316
break;
13481317

13491318
case 4:
1350-
ins = INS_st4;
1319+
ins = isSequentialStore ? INS_st1_4regs : INS_st4;
13511320
break;
13521321

13531322
default:
13541323
unreached();
13551324
}
1356-
13571325
GetEmitter()->emitIns_R_R(ins, emitSize, op2Reg, op1Reg, opt);
13581326
break;
13591327
}

src/coreclr/jit/hwintrinsiclistarm64.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -464,12 +464,9 @@ HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalScalar,
464464
HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower, 8, 1, true, {INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg)
465465
HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, true, {INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg)
466466
HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar)
467-
HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg)
467+
HARDWARE_INTRINSIC(AdvSimd, Store, 8, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
468468
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
469469
HARDWARE_INTRINSIC(AdvSimd, StoreVectorAndZip, 8, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
470-
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2, 8, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
471-
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3, 8, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_invalid, INS_invalid, INS_st1_3regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
472-
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4, 8, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_invalid, INS_invalid, INS_st1_4regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
473470
HARDWARE_INTRINSIC(AdvSimd, Subtract, -1, 2, true, {INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_fsub, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
474471
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingLower, 8, 2, true, {INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
475472
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingUpper, 16, 3, true, {INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics)
@@ -663,9 +660,7 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal,
663660
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
664661
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
665662
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVectorAndZip, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
666-
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2, 16, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
667-
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3, 16, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
668-
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4, 16, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
663+
HARDWARE_INTRINSIC(AdvSimd_Arm64, Store, 16, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
669664
HARDWARE_INTRINSIC(AdvSimd_Arm64, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsub}, HW_Category_SIMD, HW_Flag_NoFlag)
670665
HARDWARE_INTRINSIC(AdvSimd_Arm64, SubtractSaturateScalar, 8, 2, true, {INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar)
671666
HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeEven, -1, 2, true, {INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1}, HW_Category_SIMD, HW_Flag_NoFlag)

src/coreclr/jit/lsraarm64.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1725,14 +1725,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
17251725
break;
17261726
}
17271727

1728+
case NI_AdvSimd_Store:
1729+
case NI_AdvSimd_Arm64_Store:
17281730
case NI_AdvSimd_StoreVectorAndZip:
17291731
case NI_AdvSimd_Arm64_StoreVectorAndZip:
1730-
case NI_AdvSimd_StoreVector64x2:
1731-
case NI_AdvSimd_StoreVector64x3:
1732-
case NI_AdvSimd_StoreVector64x4:
1733-
case NI_AdvSimd_Arm64_StoreVector128x2:
1734-
case NI_AdvSimd_Arm64_StoreVector128x3:
1735-
case NI_AdvSimd_Arm64_StoreVector128x4:
17361732
{
17371733
assert(intrin.op1 != nullptr);
17381734
srcCount += BuildConsecutiveRegistersForUse(intrin.op2);

0 commit comments

Comments
 (0)