Skip to content

Commit c3843c5

Browse files
authored
Optimize ValueNumStore::GetVNFunc (#67395)
`ValueNumStore::GetVNFunc` is very hot, in fact so hot that it shows up right at the top in a profile of SPMI over libraries.pmi, next to `fgMorphSmpOp`. Instead of copying all args just return a pointer to the arguments which allows all the cases to be unified and makes the function small enough for MSVC to inline it. Most of the work here is to make sure we do not end up with undefined behavior. I also removed some of the duplication by using a templated struct for func apps instead of the old VNDefFuncNArg. Windows x64 SPMI over libraries.pmi: PIN before: 329991512865 PIN after: 326098430289 (-1.19%)
1 parent 321cec8 commit c3843c5

File tree

4 files changed

+184
-240
lines changed

4 files changed

+184
-240
lines changed

src/coreclr/jit/gentree.h

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -720,17 +720,6 @@ inline GenTreeDebugFlags& operator &=(GenTreeDebugFlags& a, GenTreeDebugFlags b)
720720

721721
// clang-format on
722722

723-
constexpr bool OpersAreContiguous(genTreeOps firstOper, genTreeOps secondOper)
724-
{
725-
return (firstOper + 1) == secondOper;
726-
}
727-
728-
template <typename... Opers>
729-
constexpr bool OpersAreContiguous(genTreeOps firstOper, genTreeOps secondOper, Opers... otherOpers)
730-
{
731-
return OpersAreContiguous(firstOper, secondOper) && OpersAreContiguous(secondOper, otherOpers...);
732-
}
733-
734723
#ifndef HOST_64BIT
735724
#include <pshpack4.h>
736725
#endif
@@ -1181,7 +1170,7 @@ struct GenTree
11811170

11821171
static bool OperIsConst(genTreeOps gtOper)
11831172
{
1184-
static_assert_no_msg(OpersAreContiguous(GT_CNS_INT, GT_CNS_LNG, GT_CNS_DBL, GT_CNS_STR));
1173+
static_assert_no_msg(AreContiguous(GT_CNS_INT, GT_CNS_LNG, GT_CNS_DBL, GT_CNS_STR));
11851174
return (GT_CNS_INT <= gtOper) && (gtOper <= GT_CNS_STR);
11861175
}
11871176

@@ -1202,8 +1191,7 @@ struct GenTree
12021191

12031192
static bool OperIsLocal(genTreeOps gtOper)
12041193
{
1205-
static_assert_no_msg(
1206-
OpersAreContiguous(GT_PHI_ARG, GT_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD));
1194+
static_assert_no_msg(AreContiguous(GT_PHI_ARG, GT_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD));
12071195
return (GT_PHI_ARG <= gtOper) && (gtOper <= GT_STORE_LCL_FLD);
12081196
}
12091197

@@ -1375,7 +1363,7 @@ struct GenTree
13751363

13761364
static bool OperIsCompare(genTreeOps gtOper)
13771365
{
1378-
static_assert_no_msg(OpersAreContiguous(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT, GT_TEST_EQ, GT_TEST_NE));
1366+
static_assert_no_msg(AreContiguous(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT, GT_TEST_EQ, GT_TEST_NE));
13791367
return (GT_EQ <= gtOper) && (gtOper <= GT_TEST_NE);
13801368
}
13811369

src/coreclr/jit/utils.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@ inline bool isPow2(T i)
4444
return (i > 0 && ((i - 1) & i) == 0);
4545
}
4646

47+
template <typename T>
48+
constexpr bool AreContiguous(T val1, T val2)
49+
{
50+
return (val1 + 1) == val2;
51+
}
52+
53+
template <typename T, typename... Ts>
54+
constexpr bool AreContiguous(T val1, T val2, Ts... rest)
55+
{
56+
return ((val1 + 1) == val2) && AreContiguous(val2, rest...);
57+
}
58+
4759
// Adapter for iterators to a type that is compatible with C++11
4860
// range-based for loops.
4961
template <typename TIterator>

src/coreclr/jit/valuenum.cpp

Lines changed: 115 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1686,16 +1686,16 @@ ValueNumStore::Chunk::Chunk(CompAllocator alloc, ValueNum* pNextBaseVN, var_type
16861686
break;
16871687

16881688
case CEA_Func1:
1689-
m_defs = new (alloc) VNDefFunc1Arg[ChunkSize];
1689+
m_defs = alloc.allocate<char>((sizeof(VNDefFuncAppFlexible) + sizeof(ValueNum) * 1) * ChunkSize);
16901690
break;
16911691
case CEA_Func2:
1692-
m_defs = new (alloc) VNDefFunc2Arg[ChunkSize];
1692+
m_defs = alloc.allocate<char>((sizeof(VNDefFuncAppFlexible) + sizeof(ValueNum) * 2) * ChunkSize);
16931693
break;
16941694
case CEA_Func3:
1695-
m_defs = new (alloc) VNDefFunc3Arg[ChunkSize];
1695+
m_defs = alloc.allocate<char>((sizeof(VNDefFuncAppFlexible) + sizeof(ValueNum) * 3) * ChunkSize);
16961696
break;
16971697
case CEA_Func4:
1698-
m_defs = new (alloc) VNDefFunc4Arg[ChunkSize];
1698+
m_defs = alloc.allocate<char>((sizeof(VNDefFuncAppFlexible) + sizeof(ValueNum) * 4) * ChunkSize);
16991699
break;
17001700
default:
17011701
unreached();
@@ -2009,17 +2009,17 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN)
20092009
ValueNum resultVN;
20102010

20112011
// Have we already assigned a ValueNum for 'func'('arg0VN') ?
2012-
VNDefFunc1Arg fstruct(func, arg0VN);
2012+
VNDefFuncApp<1> fstruct(func, arg0VN);
20132013
if (!GetVNFunc1Map()->Lookup(fstruct, &resultVN))
20142014
{
20152015
// Otherwise, Allocate a new ValueNum for 'func'('arg0VN')
20162016
//
2017-
Chunk* const c = GetAllocChunk(typ, CEA_Func1);
2018-
unsigned const offsetWithinChunk = c->AllocVN();
2019-
VNDefFunc1Arg* const chunkSlots = reinterpret_cast<VNDefFunc1Arg*>(c->m_defs);
2020-
2021-
chunkSlots[offsetWithinChunk] = fstruct;
2022-
resultVN = c->m_baseVN + offsetWithinChunk;
2017+
Chunk* const c = GetAllocChunk(typ, CEA_Func1);
2018+
unsigned const offsetWithinChunk = c->AllocVN();
2019+
VNDefFuncAppFlexible* fapp = c->PointerToFuncApp(offsetWithinChunk, 1);
2020+
fapp->m_func = func;
2021+
fapp->m_args[0] = arg0VN;
2022+
resultVN = c->m_baseVN + offsetWithinChunk;
20232023

20242024
// Record 'resultVN' in the Func1Map
20252025
GetVNFunc1Map()->Set(fstruct, resultVN);
@@ -2117,7 +2117,7 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
21172117

21182118
// Have we already assigned a ValueNum for 'func'('arg0VN','arg1VN') ?
21192119
//
2120-
VNDefFunc2Arg fstruct(func, arg0VN, arg1VN);
2120+
VNDefFuncApp<2> fstruct(func, arg0VN, arg1VN);
21212121
if (!GetVNFunc2Map()->Lookup(fstruct, &resultVN))
21222122
{
21232123
if (func == VNF_CastClass)
@@ -2136,12 +2136,13 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
21362136
{
21372137
// Otherwise, Allocate a new ValueNum for 'func'('arg0VN','arg1VN')
21382138
//
2139-
Chunk* const c = GetAllocChunk(typ, CEA_Func2);
2140-
unsigned const offsetWithinChunk = c->AllocVN();
2141-
VNDefFunc2Arg* const chunkSlots = reinterpret_cast<VNDefFunc2Arg*>(c->m_defs);
2142-
2143-
chunkSlots[offsetWithinChunk] = fstruct;
2144-
resultVN = c->m_baseVN + offsetWithinChunk;
2139+
Chunk* const c = GetAllocChunk(typ, CEA_Func2);
2140+
unsigned const offsetWithinChunk = c->AllocVN();
2141+
VNDefFuncAppFlexible* fapp = c->PointerToFuncApp(offsetWithinChunk, 2);
2142+
fapp->m_func = func;
2143+
fapp->m_args[0] = arg0VN;
2144+
fapp->m_args[1] = arg1VN;
2145+
resultVN = c->m_baseVN + offsetWithinChunk;
21452146
// Record 'resultVN' in the Func2Map
21462147
GetVNFunc2Map()->Set(fstruct, resultVN);
21472148
}
@@ -2194,17 +2195,19 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
21942195

21952196
// Have we already assigned a ValueNum for 'func'('arg0VN','arg1VN','arg2VN') ?
21962197
//
2197-
VNDefFunc3Arg fstruct(func, arg0VN, arg1VN, arg2VN);
2198+
VNDefFuncApp<3> fstruct(func, arg0VN, arg1VN, arg2VN);
21982199
if (!GetVNFunc3Map()->Lookup(fstruct, &resultVN))
21992200
{
22002201
// Otherwise, Allocate a new ValueNum for 'func'('arg0VN','arg1VN','arg2VN')
22012202
//
2202-
Chunk* const c = GetAllocChunk(typ, CEA_Func3);
2203-
unsigned const offsetWithinChunk = c->AllocVN();
2204-
VNDefFunc3Arg* const chunkSlots = reinterpret_cast<VNDefFunc3Arg*>(c->m_defs);
2205-
2206-
chunkSlots[offsetWithinChunk] = fstruct;
2207-
resultVN = c->m_baseVN + offsetWithinChunk;
2203+
Chunk* const c = GetAllocChunk(typ, CEA_Func3);
2204+
unsigned const offsetWithinChunk = c->AllocVN();
2205+
VNDefFuncAppFlexible* fapp = c->PointerToFuncApp(offsetWithinChunk, 3);
2206+
fapp->m_func = func;
2207+
fapp->m_args[0] = arg0VN;
2208+
fapp->m_args[1] = arg1VN;
2209+
fapp->m_args[2] = arg2VN;
2210+
resultVN = c->m_baseVN + offsetWithinChunk;
22082211

22092212
// Record 'resultVN' in the Func3Map
22102213
GetVNFunc3Map()->Set(fstruct, resultVN);
@@ -2245,17 +2248,20 @@ ValueNum ValueNumStore::VNForFunc(
22452248

22462249
// Have we already assigned a ValueNum for 'func'('arg0VN','arg1VN','arg2VN','arg3VN') ?
22472250
//
2248-
VNDefFunc4Arg fstruct(func, arg0VN, arg1VN, arg2VN, arg3VN);
2251+
VNDefFuncApp<4> fstruct(func, arg0VN, arg1VN, arg2VN, arg3VN);
22492252
if (!GetVNFunc4Map()->Lookup(fstruct, &resultVN))
22502253
{
22512254
// Otherwise, Allocate a new ValueNum for 'func'('arg0VN','arg1VN','arg2VN','arg3VN')
22522255
//
2253-
Chunk* const c = GetAllocChunk(typ, CEA_Func4);
2254-
unsigned const offsetWithinChunk = c->AllocVN();
2255-
VNDefFunc4Arg* const chunkSlots = reinterpret_cast<VNDefFunc4Arg*>(c->m_defs);
2256-
2257-
chunkSlots[offsetWithinChunk] = fstruct;
2258-
resultVN = c->m_baseVN + offsetWithinChunk;
2256+
Chunk* const c = GetAllocChunk(typ, CEA_Func4);
2257+
unsigned const offsetWithinChunk = c->AllocVN();
2258+
VNDefFuncAppFlexible* fapp = c->PointerToFuncApp(offsetWithinChunk, 4);
2259+
fapp->m_func = func;
2260+
fapp->m_args[0] = arg0VN;
2261+
fapp->m_args[1] = arg1VN;
2262+
fapp->m_args[2] = arg2VN;
2263+
fapp->m_args[3] = arg3VN;
2264+
resultVN = c->m_baseVN + offsetWithinChunk;
22592265

22602266
// Record 'resultVN' in the Func4Map
22612267
GetVNFunc4Map()->Set(fstruct, resultVN);
@@ -2375,7 +2381,7 @@ ValueNum ValueNumStore::VNForMapSelectWork(
23752381
#endif
23762382
ValueNum res;
23772383

2378-
VNDefFunc2Arg fstruct(VNF_MapSelect, map, index);
2384+
VNDefFuncApp<2> fstruct(VNF_MapSelect, map, index);
23792385
if (GetVNFunc2Map()->Lookup(fstruct, &res))
23802386
{
23812387
return res;
@@ -2462,7 +2468,7 @@ ValueNum ValueNumStore::VNForMapSelectWork(
24622468
// Get the first argument of the phi.
24632469

24642470
// We need to be careful about breaking infinite recursion. Record the outer select.
2465-
m_fixedPointMapSels.Push(VNDefFunc2Arg(VNF_MapSelect, map, index));
2471+
m_fixedPointMapSels.Push(VNDefFuncApp<2>(VNF_MapSelect, map, index));
24662472

24672473
assert(IsVNConstant(phiFuncApp.m_args[0]));
24682474
unsigned phiArgSsaNum = ConstantValue<unsigned>(phiFuncApp.m_args[0]);
@@ -2580,12 +2586,13 @@ ValueNum ValueNumStore::VNForMapSelectWork(
25802586
if (!GetVNFunc2Map()->Lookup(fstruct, &res))
25812587
{
25822588
// Otherwise, assign a new VN for the function application.
2583-
Chunk* const c = GetAllocChunk(type, CEA_Func2);
2584-
unsigned const offsetWithinChunk = c->AllocVN();
2585-
VNDefFunc2Arg* const chunkSlots = reinterpret_cast<VNDefFunc2Arg*>(c->m_defs);
2586-
2587-
chunkSlots[offsetWithinChunk] = fstruct;
2588-
res = c->m_baseVN + offsetWithinChunk;
2589+
Chunk* const c = GetAllocChunk(type, CEA_Func2);
2590+
unsigned const offsetWithinChunk = c->AllocVN();
2591+
VNDefFuncAppFlexible* fapp = c->PointerToFuncApp(offsetWithinChunk, 2);
2592+
fapp->m_func = fstruct.m_func;
2593+
fapp->m_args[0] = fstruct.m_args[0];
2594+
fapp->m_args[1] = fstruct.m_args[1];
2595+
res = c->m_baseVN + offsetWithinChunk;
25892596

25902597
GetVNFunc2Map()->Set(fstruct, res);
25912598
}
@@ -2697,25 +2704,72 @@ bool ValueNumStore::SelectIsBeingEvaluatedRecursively(ValueNum map, ValueNum ind
26972704
{
26982705
for (unsigned i = 0; i < m_fixedPointMapSels.Size(); i++)
26992706
{
2700-
VNDefFunc2Arg& elem = m_fixedPointMapSels.GetRef(i);
2707+
VNDefFuncApp<2>& elem = m_fixedPointMapSels.GetRef(i);
27012708
assert(elem.m_func == VNF_MapSelect);
2702-
if (elem.m_arg0 == map && elem.m_arg1 == ind)
2709+
if (elem.m_args[0] == map && elem.m_args[1] == ind)
27032710
{
27042711
return true;
27052712
}
27062713
}
27072714
return false;
27082715
}
27092716

2717+
// Specialized here as MSVC does not do well with naive version with loop.
2718+
template <>
2719+
bool ValueNumStore::VNDefFuncApp<1>::operator==(const ValueNumStore::VNDefFuncApp<1>& y) const
2720+
{
2721+
return m_func == y.m_func && m_args[0] == y.m_args[0];
2722+
}
2723+
2724+
template <>
2725+
bool ValueNumStore::VNDefFuncApp<2>::operator==(const ValueNumStore::VNDefFuncApp<2>& y) const
2726+
{
2727+
return m_func == y.m_func && m_args[0] == y.m_args[0] && m_args[1] == y.m_args[1];
2728+
}
2729+
2730+
template <>
2731+
bool ValueNumStore::VNDefFuncApp<3>::operator==(const ValueNumStore::VNDefFuncApp<3>& y) const
2732+
{
2733+
return m_func == y.m_func && m_args[0] == y.m_args[0] && m_args[1] == y.m_args[1] && m_args[2] == y.m_args[2];
2734+
}
2735+
2736+
template <>
2737+
bool ValueNumStore::VNDefFuncApp<4>::operator==(const ValueNumStore::VNDefFuncApp<4>& y) const
2738+
{
2739+
return m_func == y.m_func && m_args[0] == y.m_args[0] && m_args[1] == y.m_args[1] && m_args[2] == y.m_args[2] &&
2740+
m_args[3] == y.m_args[3];
2741+
}
2742+
2743+
template <>
2744+
unsigned ValueNumStore::VNDefFuncAppKeyFuncs<1>::GetHashCode(const ValueNumStore::VNDefFuncApp<1>& val)
2745+
{
2746+
return (val.m_func << 24) + val.m_args[0];
2747+
}
2748+
template <>
2749+
unsigned ValueNumStore::VNDefFuncAppKeyFuncs<2>::GetHashCode(const ValueNumStore::VNDefFuncApp<2>& val)
2750+
{
2751+
return (val.m_func << 24) + (val.m_args[0] << 8) + val.m_args[1];
2752+
}
2753+
template <>
2754+
unsigned ValueNumStore::VNDefFuncAppKeyFuncs<3>::GetHashCode(const ValueNumStore::VNDefFuncApp<3>& val)
2755+
{
2756+
return (val.m_func << 24) + (val.m_args[0] << 16) + (val.m_args[1] << 8) + val.m_args[2];
2757+
}
2758+
template <>
2759+
unsigned ValueNumStore::VNDefFuncAppKeyFuncs<4>::GetHashCode(const ValueNumStore::VNDefFuncApp<4>& val)
2760+
{
2761+
return (val.m_func << 24) + (val.m_args[0] << 16) + (val.m_args[1] << 8) + val.m_args[2] + (val.m_args[3] << 12);
2762+
}
2763+
27102764
#ifdef DEBUG
27112765
bool ValueNumStore::FixedPointMapSelsTopHasValue(ValueNum map, ValueNum index)
27122766
{
27132767
if (m_fixedPointMapSels.Size() == 0)
27142768
{
27152769
return false;
27162770
}
2717-
VNDefFunc2Arg& top = m_fixedPointMapSels.TopRef();
2718-
return top.m_func == VNF_MapSelect && top.m_arg0 == map && top.m_arg1 == index;
2771+
VNDefFuncApp<2>& top = m_fixedPointMapSels.TopRef();
2772+
return top.m_func == VNF_MapSelect && top.m_args[0] == map && top.m_args[1] == index;
27192773
}
27202774
#endif
27212775

@@ -3916,12 +3970,11 @@ ValueNum ValueNumStore::VNForExpr(BasicBlock* block, var_types typ)
39163970

39173971
// VNForFunc(typ, func, vn) but bypasses looking in the cache
39183972
//
3919-
VNDefFunc1Arg fstruct(VNF_MemOpaque, loopNum);
3920-
Chunk* const c = GetAllocChunk(typ, CEA_Func1);
3921-
unsigned const offsetWithinChunk = c->AllocVN();
3922-
VNDefFunc1Arg* const chunkSlots = reinterpret_cast<VNDefFunc1Arg*>(c->m_defs);
3923-
3924-
chunkSlots[offsetWithinChunk] = fstruct;
3973+
Chunk* const c = GetAllocChunk(typ, CEA_Func1);
3974+
unsigned const offsetWithinChunk = c->AllocVN();
3975+
VNDefFuncAppFlexible* fapp = c->PointerToFuncApp(offsetWithinChunk, 1);
3976+
fapp->m_func = VNF_MemOpaque;
3977+
fapp->m_args[0] = loopNum;
39253978

39263979
ValueNum resultVN = c->m_baseVN + offsetWithinChunk;
39273980
return resultVN;
@@ -5916,56 +5969,19 @@ bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp)
59165969
Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
59175970
unsigned offset = ChunkOffset(vn);
59185971
assert(offset < c->m_numUsed);
5919-
switch (c->m_attribs)
5920-
{
5921-
case CEA_Func4:
5922-
{
5923-
VNDefFunc4Arg* farg4 = &reinterpret_cast<VNDefFunc4Arg*>(c->m_defs)[offset];
5924-
funcApp->m_func = farg4->m_func;
5925-
funcApp->m_arity = 4;
5926-
funcApp->m_args[0] = farg4->m_arg0;
5927-
funcApp->m_args[1] = farg4->m_arg1;
5928-
funcApp->m_args[2] = farg4->m_arg2;
5929-
funcApp->m_args[3] = farg4->m_arg3;
5930-
return true;
5931-
}
5932-
case CEA_Func3:
5933-
{
5934-
VNDefFunc3Arg* farg3 = &reinterpret_cast<VNDefFunc3Arg*>(c->m_defs)[offset];
5935-
funcApp->m_func = farg3->m_func;
5936-
funcApp->m_arity = 3;
5937-
funcApp->m_args[0] = farg3->m_arg0;
5938-
funcApp->m_args[1] = farg3->m_arg1;
5939-
funcApp->m_args[2] = farg3->m_arg2;
5940-
return true;
5941-
}
5942-
case CEA_Func2:
5943-
{
5944-
VNDefFunc2Arg* farg2 = &reinterpret_cast<VNDefFunc2Arg*>(c->m_defs)[offset];
5945-
funcApp->m_func = farg2->m_func;
5946-
funcApp->m_arity = 2;
5947-
funcApp->m_args[0] = farg2->m_arg0;
5948-
funcApp->m_args[1] = farg2->m_arg1;
5949-
return true;
5950-
}
5951-
case CEA_Func1:
5952-
{
5953-
VNDefFunc1Arg* farg1 = &reinterpret_cast<VNDefFunc1Arg*>(c->m_defs)[offset];
5954-
funcApp->m_func = farg1->m_func;
5955-
funcApp->m_arity = 1;
5956-
funcApp->m_args[0] = farg1->m_arg0;
5957-
return true;
5958-
}
5959-
case CEA_Func0:
5960-
{
5961-
VNDefFunc0Arg* farg0 = &reinterpret_cast<VNDefFunc0Arg*>(c->m_defs)[offset];
5962-
funcApp->m_func = farg0->m_func;
5963-
funcApp->m_arity = 0;
5964-
return true;
5965-
}
5966-
default:
5967-
return false;
5972+
static_assert_no_msg(AreContiguous(CEA_Func0, CEA_Func1, CEA_Func2, CEA_Func3, CEA_Func4));
5973+
unsigned arity = c->m_attribs - CEA_Func0;
5974+
if (arity <= 4)
5975+
{
5976+
static_assert_no_msg(sizeof(VNFunc) == sizeof(VNDefFuncAppFlexible));
5977+
funcApp->m_arity = arity;
5978+
VNDefFuncAppFlexible* farg = c->PointerToFuncApp(offset, arity);
5979+
funcApp->m_func = farg->m_func;
5980+
funcApp->m_args = farg->m_args;
5981+
return true;
59685982
}
5983+
5984+
return false;
59695985
}
59705986

59715987
ValueNum ValueNumStore::VNForRefInAddr(ValueNum vn)

0 commit comments

Comments
 (0)