Skip to content

JIT: Encode SIMD base type in VN for all HW intrinsics #105869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -9531,10 +9531,6 @@ class Compiler
#endif // FEATURE_SIMD
}

#ifdef FEATURE_HW_INTRINSICS
static bool vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID);
#endif // FEATURE_HW_INTRINSICS

private:
// Returns true if the TYP_SIMD locals on stack are aligned at their
// preferred byte boundary specified by getSIMDTypeAlignment().
Expand Down
73 changes: 2 additions & 71 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
// clang-format off
#if defined(TARGET_XARCH)
#define HARDWARE_INTRINSIC(isa, name, size, numarg, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
{ \
/* name */ #name, \
/* flags */ static_cast<HWIntrinsicFlag>(flag), \
Expand All @@ -22,7 +22,7 @@ static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
},
#include "hwintrinsiclistxarch.h"
#elif defined (TARGET_ARM64)
#define HARDWARE_INTRINSIC(isa, name, size, numarg, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
{ \
/* name */ #name, \
/* flags */ static_cast<HWIntrinsicFlag>(flag), \
Expand Down Expand Up @@ -759,75 +759,6 @@ CorInfoType Compiler::getBaseJitTypeFromArgIfNeeded(NamedIntrinsic intrins
return simdBaseJitType;
}

//------------------------------------------------------------------------
// vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID):
//
// Arguments:
// hwIntrinsicID -- The id for the HW intrinsic
//
// Return Value:
// Returns true if this intrinsic requires value numbering to add an
// extra SimdType argument that encodes the resulting type.
// If we don't do this overloaded versions can return the same VN
// leading to incorrect CSE substitutions.
//
/* static */ bool Compiler::vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID)
{
// No extra type information is needed for scalar/special HW Intrinsic.
//
unsigned simdSize = 0;
if (HWIntrinsicInfo::tryLookupSimdSize(hwIntrinsicID, &simdSize) && (simdSize == 0))
{
return false;
}

int numArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicID);

// HW Intrinsic's with -1 for numArgs have a varying number of args, so we currently
// give them a unique value number, and don't add an extra argument.
//
if (numArgs == -1)
{
return false;
}

// We iterate over all of the different baseType's for this intrinsic in the HWIntrinsicInfo table
// We set diffInsCount to the number of instructions that can execute differently.
//
unsigned diffInsCount = 0;
#ifdef TARGET_XARCH
instruction lastIns = INS_invalid;
#endif
for (var_types baseType = TYP_BYTE; (baseType <= TYP_DOUBLE); baseType = (var_types)(baseType + 1))
{
instruction curIns = HWIntrinsicInfo::lookupIns(hwIntrinsicID, baseType);
if (curIns != INS_invalid)
{
#ifdef TARGET_XARCH
if (curIns != lastIns)
{
diffInsCount++;
// remember the last valid instruction that we saw
lastIns = curIns;
}
#elif defined(TARGET_ARM64)
// On ARM64 we use the same instruction and specify an insOpt arrangement
// so we always consider the instruction operation to be different
//
diffInsCount++;
#endif // TARGET
if (diffInsCount >= 2)
{
// We can early exit the loop now
break;
}
}
}

// If we see two (or more) different instructions we need the extra VNF_SimdType arg
return (diffInsCount >= 2);
}

struct HWIntrinsicIsaRange
{
NamedIntrinsic FirstId;
Expand Down
1,446 changes: 723 additions & 723 deletions src/coreclr/jit/hwintrinsiclistarm64.h

Large diffs are not rendered by default.

570 changes: 285 additions & 285 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h

Large diffs are not rendered by default.

2,622 changes: 1,310 additions & 1,312 deletions src/coreclr/jit/hwintrinsiclistxarch.h

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/coreclr/jit/namedintrinsiclist.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,11 @@ enum NamedIntrinsic : unsigned short
#ifdef FEATURE_HW_INTRINSICS
NI_HW_INTRINSIC_START,
#if defined(TARGET_XARCH)
#define HARDWARE_INTRINSIC(isa, name, size, numarg, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
NI_##isa##_##name,
#include "hwintrinsiclistxarch.h"
#elif defined(TARGET_ARM64)
#define HARDWARE_INTRINSIC(isa, name, size, numarg, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
NI_##isa##_##name,
#include "hwintrinsiclistarm64.h"
#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64)
Expand Down
143 changes: 36 additions & 107 deletions src/coreclr/jit/valuenum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2947,7 +2947,8 @@ ValueNum ValueNumStore::VNForFunc(
assert(arg1VN == VNNormalValue(arg1VN));
assert(arg2VN == VNNormalValue(arg2VN));
assert((func == VNF_MapStore) || (arg3VN == VNNormalValue(arg3VN)));
assert(VNFuncArity(func) == 4);
// Some SIMD functions with variable number of arguments are defined with zero arity
assert((VNFuncArity(func) == 0) || (VNFuncArity(func) == 4));

// Have we already assigned a ValueNum for 'func'('arg0VN','arg1VN','arg2VN','arg3VN') ?
//
Expand Down Expand Up @@ -7823,8 +7824,10 @@ ValueNum EvaluateSimdCvtVectorToMask(ValueNumStore* vns, var_types simdType, var
return vns->VNForSimdMaskCon(result);
}

ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(
GenTreeHWIntrinsic* tree, VNFunc func, ValueNum arg0VN, bool encodeResultType, ValueNum resultTypeVN)
ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree,
VNFunc func,
ValueNum arg0VN,
ValueNum resultTypeVN)
{
var_types type = tree->TypeGet();
var_types baseType = tree->GetSimdBaseType();
Expand Down Expand Up @@ -8158,19 +8161,11 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(
}
}

if (encodeResultType)
{
return VNForFunc(type, func, arg0VN, resultTypeVN);
}
return VNForFunc(type, func, arg0VN);
return VNForFunc(type, func, arg0VN, resultTypeVN);
}

ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(GenTreeHWIntrinsic* tree,
VNFunc func,
ValueNum arg0VN,
ValueNum arg1VN,
bool encodeResultType,
ValueNum resultTypeVN)
ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(
GenTreeHWIntrinsic* tree, VNFunc func, ValueNum arg0VN, ValueNum arg1VN, ValueNum resultTypeVN)
{
var_types type = tree->TypeGet();
var_types baseType = tree->GetSimdBaseType();
Expand Down Expand Up @@ -8953,11 +8948,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(GenTreeHWIntrinsic* tree,
}
}

if (encodeResultType)
{
return VNForFunc(type, func, arg0VN, arg1VN, resultTypeVN);
}
return VNForFunc(type, func, arg0VN, arg1VN);
return VNForFunc(type, func, arg0VN, arg1VN, resultTypeVN);
}

ValueNum EvaluateSimdWithElementFloating(
Expand Down Expand Up @@ -9068,13 +9059,8 @@ ValueNum EvaluateSimdWithElementIntegral(
}
}

ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(GenTreeHWIntrinsic* tree,
VNFunc func,
ValueNum arg0VN,
ValueNum arg1VN,
ValueNum arg2VN,
bool encodeResultType,
ValueNum resultTypeVN)
ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(
GenTreeHWIntrinsic* tree, VNFunc func, ValueNum arg0VN, ValueNum arg1VN, ValueNum arg2VN, ValueNum resultTypeVN)
{
var_types type = tree->TypeGet();
var_types baseType = tree->GetSimdBaseType();
Expand Down Expand Up @@ -9185,14 +9171,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(GenTreeHWIntrinsic* tree,
}
}

if (encodeResultType)
{
return VNForFunc(type, func, arg0VN, arg1VN, arg2VN, resultTypeVN);
}
else
{
return VNForFunc(type, func, arg0VN, arg1VN, arg2VN);
}
return VNForFunc(type, func, arg0VN, arg1VN, arg2VN, resultTypeVN);
}

#endif // FEATURE_HW_INTRINSICS
Expand Down Expand Up @@ -10381,25 +10360,6 @@ void ValueNumStore::vnDumpZeroObj(Compiler* comp, VNFuncApp* zeroObj)

// Static fields, methods.

#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic, extra) \
static_assert((arity) >= 0 || !(extra), "valuenumfuncs.h has EncodesExtraTypeArg==true and arity<0 for " #vnf);
#include "valuenumfuncs.h"

#ifdef FEATURE_HW_INTRINSICS

#define HARDWARE_INTRINSIC(isa, name, size, argCount, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
static_assert((size) != 0 || !(extra), \
"hwintrinsicslist<arch>.h has EncodesExtraTypeArg==true and size==0 for " #isa " " #name);
#if defined(TARGET_XARCH)
#include "hwintrinsiclistxarch.h"
#elif defined(TARGET_ARM64)
#include "hwintrinsiclistarm64.h"
#else
#error Unsupported platform
#endif

#endif // FEATURE_HW_INTRINSICS

/* static */ constexpr uint8_t ValueNumStore::GetOpAttribsForArity(genTreeOps oper, GenTreeOperKind kind)
{
return ((GenTree::StaticOperIs(oper, GT_SELECT) ? 3 : (((kind & GTK_UNOP) >> 1) | ((kind & GTK_BINOP) >> 1)))
Expand Down Expand Up @@ -10434,8 +10394,8 @@ const uint8_t ValueNumStore::s_vnfOpAttribs[VNF_COUNT] = {

0, // VNF_Boundary

#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic, extra) \
GetOpAttribsForFunc((arity) + static_cast<int>(extra), commute, knownNonNull, sharedStatic),
#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) \
GetOpAttribsForFunc(arity, commute, knownNonNull, sharedStatic),
#include "valuenumfuncs.h"
};

Expand Down Expand Up @@ -10490,7 +10450,7 @@ void ValueNumStore::ValidateValueNumStoreStatics()

int vnfNum = VNF_Boundary + 1; // The macro definition below will update this after using it.

#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic, extra) \
#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) \
if (commute) \
arr[vnfNum] |= VNFOA_Commutative; \
if (knownNonNull) \
Expand All @@ -10514,19 +10474,6 @@ void ValueNumStore::ValidateValueNumStoreStatics()
for (NamedIntrinsic id = (NamedIntrinsic)(NI_HW_INTRINSIC_START + 1); (id < NI_HW_INTRINSIC_END);
id = (NamedIntrinsic)(id + 1))
{
bool encodeResultType = Compiler::vnEncodesResultTypeForHWIntrinsic(id);

if (encodeResultType)
{
// These HW_Intrinsic's have an extra VNF_SimdType arg.
//
VNFunc func = VNFunc(VNF_HWI_FIRST + (id - NI_HW_INTRINSIC_START - 1));
unsigned oldArity = (arr[func] & VNFOA_ArityMask) >> VNFOA_ArityShift;
unsigned newArity = oldArity + 1;

ValueNumFuncSetArity(func, newArity);
}

if (HWIntrinsicInfo::IsCommutative(id))
{
VNFunc func = VNFunc(VNF_HWI_FIRST + (id - NI_HW_INTRINSIC_START - 1));
Expand All @@ -10553,7 +10500,7 @@ void ValueNumStore::ValidateValueNumStoreStatics()

#ifdef DEBUG
// Define the name array.
#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic, extra) #vnf,
#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) #vnf,

const char* ValueNumStore::VNFuncNameArr[] = {
#include "valuenumfuncs.h"
Expand Down Expand Up @@ -13014,19 +12961,13 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree)
}
else
{
VNFunc func = GetVNFuncForNode(tree);
ValueNumPair resultTypeVNPair = ValueNumPair();
bool encodeResultType = vnEncodesResultTypeForHWIntrinsic(intrinsicId);
VNFunc func = GetVNFuncForNode(tree);
ValueNum simdTypeVN = vnStore->VNForSimdType(tree->GetSimdSize(), tree->GetNormalizedSimdBaseJitType());
ValueNumPair resultTypeVNPair(simdTypeVN, simdTypeVN);

if (encodeResultType)
{
ValueNum simdTypeVN = vnStore->VNForSimdType(tree->GetSimdSize(), tree->GetNormalizedSimdBaseJitType());
resultTypeVNPair.SetBoth(simdTypeVN);

JITDUMP(" simdTypeVN is ");
JITDUMPEXEC(vnPrint(simdTypeVN, 1));
JITDUMP("\n");
}
JITDUMP(" simdTypeVN is ");
JITDUMPEXEC(vnPrint(simdTypeVN, 1));
JITDUMP("\n");

auto getOperandVNs = [this, addr](GenTree* operand, ValueNumPair* pNormVNPair, ValueNumPair* pExcVNPair) {
vnStore->VNPUnpackExc(operand->gtVNPair, pNormVNPair, pExcVNPair);
Expand All @@ -13049,22 +12990,12 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree)
}
};

const bool isVariableNumArgs = HWIntrinsicInfo::lookupNumArgs(intrinsicId) == -1;

// There are some HWINTRINSICS operations that have zero args, i.e. NI_Vector128_Zero
if (opCount == 0)
{
if (encodeResultType)
{
// There are zero arg HWINTRINSICS operations that encode the result type, i.e. Vector128_AllBitSet
normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, resultTypeVNPair);
assert(vnStore->VNFuncArity(func) == 1);
}
else
{
normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func);
assert(vnStore->VNFuncArity(func) == 0);
}
// There are zero arg HWINTRINSICS operations that encode the result type, i.e. Vector128_AllBitSet
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is notably out of date, we produce GT_CNS_VEC for cases like AllBitsSet now. Off the top of my head, other zero arg intrinsics should also be getting converted to constant nodes and I'm not sure any cases actually exist that can hit this anymore.

Should be safe to leave in for .NET 9, but might be something we can cleanup early in .NET 10

normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, resultTypeVNPair);
assert(vnStore->VNFuncArity(func) == 1);
}
else // HWINTRINSIC unary or binary or ternary operator.
{
Expand All @@ -13074,11 +13005,10 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree)

if (opCount == 1)
{
ValueNum normalLVN = vnStore->EvalHWIntrinsicFunUnary(tree, func, op1vnp.GetLiberal(), encodeResultType,
resultTypeVNPair.GetLiberal());
ValueNum normalCVN =
vnStore->EvalHWIntrinsicFunUnary(tree, func, op1vnp.GetConservative(), encodeResultType,
resultTypeVNPair.GetConservative());
ValueNum normalLVN =
vnStore->EvalHWIntrinsicFunUnary(tree, func, op1vnp.GetLiberal(), resultTypeVNPair.GetLiberal());
ValueNum normalCVN = vnStore->EvalHWIntrinsicFunUnary(tree, func, op1vnp.GetConservative(),
resultTypeVNPair.GetConservative());

normalPair = ValueNumPair(normalLVN, normalCVN);
excSetPair = op1Xvnp;
Expand All @@ -13093,10 +13023,10 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree)
{
ValueNum normalLVN =
vnStore->EvalHWIntrinsicFunBinary(tree, func, op1vnp.GetLiberal(), op2vnp.GetLiberal(),
encodeResultType, resultTypeVNPair.GetLiberal());
ValueNum normalCVN = vnStore->EvalHWIntrinsicFunBinary(tree, func, op1vnp.GetConservative(),
op2vnp.GetConservative(), encodeResultType,
resultTypeVNPair.GetConservative());
resultTypeVNPair.GetLiberal());
ValueNum normalCVN =
vnStore->EvalHWIntrinsicFunBinary(tree, func, op1vnp.GetConservative(),
op2vnp.GetConservative(), resultTypeVNPair.GetConservative());

normalPair = ValueNumPair(normalLVN, normalCVN);
excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp);
Expand All @@ -13111,12 +13041,11 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree)

ValueNum normalLVN =
vnStore->EvalHWIntrinsicFunTernary(tree, func, op1vnp.GetLiberal(), op2vnp.GetLiberal(),
op3vnp.GetLiberal(), encodeResultType,
resultTypeVNPair.GetLiberal());
op3vnp.GetLiberal(), resultTypeVNPair.GetLiberal());
ValueNum normalCVN =
vnStore->EvalHWIntrinsicFunTernary(tree, func, op1vnp.GetConservative(),
op2vnp.GetConservative(), op3vnp.GetConservative(),
encodeResultType, resultTypeVNPair.GetConservative());
resultTypeVNPair.GetConservative());

normalPair = ValueNumPair(normalLVN, normalCVN);

Expand Down
Loading
Loading