Skip to content

Commit 5f15498

Browse files
Fix a couple issues with Vector128.Get/WithElement (#52985)
* Fix an issue with Vector128.WithElement around unused nodes for pre SSE4.1 * Fixing the expected exception for a structreturn test * Ensure we check if the baseline SIMD ISAs are supported in morph * Ensure TYP_SIMD12 LclVar can be cloned in lowering * Fixing up the non SSE41 path for WithElement * Applying formatting patch * Ensure ReplaceWithLclVar lowers the created LclVar and assignment * Don't check the JitLog for compiled methods when the baseline ISAs aren't supported * Address PR feedback * Responding to more PR feedback * Applying formatting patch * Fixing the more PR review feedback
1 parent 959c327 commit 5f15498

File tree

12 files changed

+190
-119
lines changed

12 files changed

+190
-119
lines changed

src/coreclr/jit/compiler.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8228,6 +8228,23 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
82288228
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
82298229
*/
82308230

8231+
bool IsBaselineSimdIsaSupported()
8232+
{
8233+
#ifdef FEATURE_SIMD
8234+
#if defined(TARGET_XARCH)
8235+
CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
8236+
#elif defined(TARGET_ARM64)
8237+
CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
8238+
#else
8239+
#error Unsupported platform
8240+
#endif // !TARGET_XARCH && !TARGET_ARM64
8241+
8242+
return compOpportunisticallyDependsOn(minimumIsa) && JitConfig.EnableHWIntrinsic();
8243+
#else
8244+
return false;
8245+
#endif
8246+
}
8247+
82318248
// Get highest available level for SIMD codegen
82328249
SIMDLevel getSIMDSupportLevel()
82338250
{

src/coreclr/jit/gentree.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6507,7 +6507,13 @@ GenTree* Compiler::gtNewLclvNode(unsigned lnum, var_types type DEBUGARG(IL_OFFSE
65076507
// should be able to remove this exception and handle the assignment mismatch in
65086508
// Lowering.
65096509
LclVarDsc* varDsc = lvaGetDesc(lnum);
6510-
assert((type == varDsc->lvType) ||
6510+
6511+
bool simd12ToSimd16Widening = false;
6512+
#if FEATURE_SIMD
6513+
// We can additionally have a SIMD12 that was widened to a SIMD16, generally as part of lowering
6514+
simd12ToSimd16Widening = (type == TYP_SIMD16) && (varDsc->lvType == TYP_SIMD12);
6515+
#endif
6516+
assert((type == varDsc->lvType) || simd12ToSimd16Widening ||
65116517
(lvaIsImplicitByRefLocal(lnum) && fgGlobalMorph && (varDsc->lvType == TYP_BYREF)) ||
65126518
((varDsc->lvType == TYP_STRUCT) && (genTypeSize(type) == varDsc->lvExactSize)));
65136519
}

src/coreclr/jit/lir.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,10 +246,11 @@ void LIR::Use::ReplaceWith(Compiler* compiler, GenTree* replacement)
246246
// lclNum - The local to use for temporary storage. If BAD_VAR_NUM (the
247247
// default) is provided, this method will create and use a new
248248
// local var.
249+
// assign - On return, if non null, contains the created assignment node
249250
//
250251
// Return Value: The number of the local var used for temporary storage.
251252
//
252-
unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum)
253+
unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum, GenTree** assign)
253254
{
254255
assert(IsInitialized());
255256
assert(compiler != nullptr);
@@ -277,6 +278,10 @@ unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum)
277278
JITDUMP("ReplaceWithLclVar created store :\n");
278279
DISPNODE(store);
279280

281+
if (assign != nullptr)
282+
{
283+
*assign = store;
284+
}
280285
return lclNum;
281286
}
282287

src/coreclr/jit/lir.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class LIR final
7474
bool IsDummyUse() const;
7575

7676
void ReplaceWith(Compiler* compiler, GenTree* replacement);
77-
unsigned ReplaceWithLclVar(Compiler* compiler, unsigned lclNum = BAD_VAR_NUM);
77+
unsigned ReplaceWithLclVar(Compiler* compiler, unsigned lclNum = BAD_VAR_NUM, GenTree** assign = nullptr);
7878
};
7979

8080
//------------------------------------------------------------------------

src/coreclr/jit/lower.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,18 @@ class Lowering final : public Phase
217217
GenTree* oldUseNode = use.Def();
218218
if ((oldUseNode->gtOper != GT_LCL_VAR) || (tempNum != BAD_VAR_NUM))
219219
{
220-
use.ReplaceWithLclVar(comp, tempNum);
220+
GenTree* assign;
221+
use.ReplaceWithLclVar(comp, tempNum, &assign);
222+
221223
GenTree* newUseNode = use.Def();
222224
ContainCheckRange(oldUseNode->gtNext, newUseNode);
225+
226+
// We need to lower the LclVar and assignment since there may be certain
227+
// types or scenarios, such as TYP_SIMD12, that need special handling
228+
229+
LowerNode(assign);
230+
LowerNode(newUseNode);
231+
223232
return newUseNode->AsLclVar();
224233
}
225234
return oldUseNode->AsLclVar();

src/coreclr/jit/lowerxarch.cpp

Lines changed: 65 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3007,15 +3007,16 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
30073007

30083008
NamedIntrinsic resIntrinsic = NI_Illegal;
30093009

3010-
idx = comp->gtNewIconNode(imm8);
3011-
BlockRange().InsertBefore(node, idx);
3012-
30133010
switch (simdBaseType)
30143011
{
30153012
case TYP_LONG:
30163013
case TYP_ULONG:
30173014
{
3018-
op2 = idx;
3015+
idx = comp->gtNewIconNode(imm8);
3016+
BlockRange().InsertBefore(node, idx);
3017+
3018+
op1 = comp->gtNewArgList(op1, op3, idx);
3019+
op2 = nullptr;
30193020
resIntrinsic = NI_SSE41_X64_Insert;
30203021
break;
30213022
}
@@ -3033,7 +3034,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
30333034

30343035
tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op3, NI_Vector128_CreateScalarUnsafe, CORINFO_TYPE_FLOAT,
30353036
16);
3036-
BlockRange().InsertBefore(idx, tmp1);
3037+
BlockRange().InsertBefore(node, tmp1);
30373038
LowerNode(tmp1);
30383039

30393040
if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
@@ -3088,26 +3089,36 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
30883089
ssize_t controlBits1;
30893090
ssize_t controlBits2;
30903091

3092+
// The comments beside the control bits below are listed using the managed API operands
3093+
//
3094+
// In practice, for the first step the value being inserted (op3) is in tmp1
3095+
// while the other elements of the result (op1) are in tmp2. The result ends
3096+
// up containing the value being inserted and its immediate neighbor.
3097+
//
3098+
// The second step takes that result (which is in op1) plus the other elements
3099+
// from op2 (a clone of op1/tmp2 from the previous step) and combines them to
3100+
// create the final result.
3101+
30913102
switch (imm8)
30923103
{
30933104
case 1:
30943105
{
3095-
controlBits1 = 0;
3096-
controlBits2 = 226;
3106+
controlBits1 = 0; // 00 00 00 00; op1 = { X = op3, Y = op3, Z = op1.X, W = op1.X }
3107+
controlBits2 = 226; // 11 10 00 10; node = { X = op1.X, Y = op3, Z = op1.Z, W = op1.W }
30973108
break;
30983109
}
30993110

31003111
case 2:
31013112
{
3102-
controlBits1 = 48;
3103-
controlBits2 = 132;
3113+
controlBits1 = 15; // 00 00 11 11; op1 = { X = op1.W, Y = op1.W, Z = op3, W = op3 }
3114+
controlBits2 = 36; // 00 10 01 00; node = { X = op1.X, Y = op1.Y, Z = op3, W = op1.W }
31043115
break;
31053116
}
31063117

31073118
case 3:
31083119
{
3109-
controlBits1 = 32;
3110-
controlBits2 = 36;
3120+
controlBits1 = 10; // 00 00 10 10; op1 = { X = op1.Z, Y = op1.Z, Z = op3, W = op3 }
3121+
controlBits2 = 132; // 10 00 01 00; node = { X = op1.X, Y = op1.Y, Z = op1.Z, W = op3 }
31113122
break;
31123123
}
31133124

@@ -3118,19 +3129,24 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
31183129
idx = comp->gtNewIconNode(controlBits1);
31193130
BlockRange().InsertAfter(tmp2, idx);
31203131

3121-
if (imm8 == 1)
3132+
if (imm8 != 1)
31223133
{
31233134
std::swap(tmp1, tmp2);
31243135
}
31253136

3126-
op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp2, tmp1, idx, NI_SSE_Shuffle,
3137+
op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, tmp2, idx, NI_SSE_Shuffle,
31273138
CORINFO_TYPE_FLOAT, 16);
31283139
BlockRange().InsertAfter(idx, op1);
31293140
LowerNode(op1);
31303141

31313142
idx = comp->gtNewIconNode(controlBits2);
31323143
BlockRange().InsertAfter(op1, idx);
31333144

3145+
if (imm8 != 1)
3146+
{
3147+
std::swap(op1, op2);
3148+
}
3149+
31343150
op1 = comp->gtNewArgList(op1, op2, idx);
31353151
op2 = nullptr;
31363152
resIntrinsic = NI_SSE_Shuffle;
@@ -3139,8 +3155,8 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
31393155
}
31403156
else
31413157
{
3142-
op3 = tmp1;
3143-
idx->AsIntCon()->SetIconValue(imm8 * 16);
3158+
imm8 = imm8 * 16;
3159+
op3 = tmp1;
31443160
FALLTHROUGH;
31453161
}
31463162
}
@@ -3150,6 +3166,9 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
31503166
case TYP_INT:
31513167
case TYP_UINT:
31523168
{
3169+
idx = comp->gtNewIconNode(imm8);
3170+
BlockRange().InsertBefore(node, idx);
3171+
31533172
op1 = comp->gtNewArgList(op1, op3, idx);
31543173
op2 = nullptr;
31553174
resIntrinsic = NI_SSE41_Insert;
@@ -3159,6 +3178,9 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
31593178
case TYP_SHORT:
31603179
case TYP_USHORT:
31613180
{
3181+
idx = comp->gtNewIconNode(imm8);
3182+
BlockRange().InsertBefore(node, idx);
3183+
31623184
op1 = comp->gtNewArgList(op1, op3, idx);
31633185
op2 = nullptr;
31643186
resIntrinsic = NI_SSE2_Insert;
@@ -3178,7 +3200,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
31783200

31793201
tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op3, NI_Vector128_CreateScalarUnsafe, CORINFO_TYPE_DOUBLE,
31803202
16);
3181-
BlockRange().InsertBefore(idx, tmp1);
3203+
BlockRange().InsertBefore(node, tmp1);
31823204
LowerNode(tmp1);
31833205

31843206
op2 = tmp1;
@@ -5474,8 +5496,11 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
54745496

54755497
default:
54765498
{
5477-
// These intrinsics only expect 16 or 32-byte nodes for containment
5478-
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
5499+
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
5500+
{
5501+
// These intrinsics only expect 16 or 32-byte nodes for containment
5502+
break;
5503+
}
54795504

54805505
if (!comp->canUseVexEncoding())
54815506
{
@@ -5535,9 +5560,12 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
55355560
case NI_AVX2_ShuffleHigh:
55365561
case NI_AVX2_ShuffleLow:
55375562
{
5538-
// These intrinsics only expect 16 or 32-byte nodes for containment
5539-
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
5540-
assert(supportsSIMDScalarLoads == false);
5563+
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
5564+
{
5565+
// These intrinsics only expect 16 or 32-byte nodes for containment
5566+
break;
5567+
}
5568+
assert(!supportsSIMDScalarLoads);
55415569

55425570
supportsAlignedSIMDLoads = !comp->canUseVexEncoding() || !comp->opts.MinOpts();
55435571
supportsUnalignedSIMDLoads = comp->canUseVexEncoding();
@@ -5553,7 +5581,12 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
55535581
if (containingNode->GetSimdBaseType() == TYP_FLOAT)
55545582
{
55555583
assert(containingIntrinsicId == NI_SSE41_Insert);
5556-
assert(genTypeSize(node->TypeGet()) == 16);
5584+
5585+
if (genTypeSize(node->TypeGet()) != 16)
5586+
{
5587+
// These intrinsics only expect 16-byte nodes for containment
5588+
break;
5589+
}
55575590

55585591
// Sse41.Insert(V128<float>, V128<float>, byte) is a bit special
55595592
// in that it has different behavior depending on whether the
@@ -5620,8 +5653,11 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
56205653

56215654
case NI_AVX_CompareScalar:
56225655
{
5623-
// These intrinsics only expect 16 or 32-byte nodes for containment
5624-
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
5656+
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
5657+
{
5658+
// These intrinsics only expect 16 or 32-byte nodes for containment
5659+
break;
5660+
}
56255661

56265662
assert(supportsAlignedSIMDLoads == false);
56275663
assert(supportsUnalignedSIMDLoads == false);
@@ -5700,8 +5736,11 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
57005736

57015737
default:
57025738
{
5703-
// These intrinsics only expect 16 or 32-byte nodes for containment
5704-
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
5739+
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
5740+
{
5741+
// These intrinsics only expect 16 or 32-byte nodes for containment
5742+
break;
5743+
}
57055744

57065745
supportsSIMDScalarLoads = true;
57075746
supportsGeneralLoads = supportsSIMDScalarLoads;

src/coreclr/jit/morph.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6009,11 +6009,14 @@ GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac)
60096009
// if this field belongs to simd struct, translate it to simd intrinsic.
60106010
if (mac == nullptr)
60116011
{
6012-
GenTree* newTree = fgMorphFieldToSimdGetElement(tree);
6013-
if (newTree != tree)
6012+
if (IsBaselineSimdIsaSupported())
60146013
{
6015-
newTree = fgMorphSmpOp(newTree);
6016-
return newTree;
6014+
GenTree* newTree = fgMorphFieldToSimdGetElement(tree);
6015+
if (newTree != tree)
6016+
{
6017+
newTree = fgMorphSmpOp(newTree);
6018+
return newTree;
6019+
}
60176020
}
60186021
}
60196022
else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
@@ -12238,6 +12241,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
1223812241
op2 = tree->AsOp()->gtOp2;
1223912242

1224012243
#ifdef FEATURE_SIMD
12244+
if (IsBaselineSimdIsaSupported())
1224112245
{
1224212246
// We should check whether op2 should be assigned to a SIMD field or not.
1224312247
// If it is, we should tranlate the tree to simd intrinsic.

src/coreclr/jit/simd.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1901,15 +1901,7 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode,
19011901
return nullptr;
19021902
}
19031903

1904-
#if defined(TARGET_XARCH)
1905-
CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
1906-
#elif defined(TARGET_ARM64)
1907-
CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
1908-
#else
1909-
#error Unsupported platform
1910-
#endif // !TARGET_XARCH && !TARGET_ARM64
1911-
1912-
if (!compOpportunisticallyDependsOn(minimumIsa) || !JitConfig.EnableHWIntrinsic())
1904+
if (!IsBaselineSimdIsaSupported())
19131905
{
19141906
// The user disabled support for the baseline ISA so
19151907
// don't emit any SIMD intrinsics as they all require

src/coreclr/jit/simdashwintrinsic.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,15 +176,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic,
176176
return nullptr;
177177
}
178178

179-
#if defined(TARGET_XARCH)
180-
CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
181-
#elif defined(TARGET_ARM64)
182-
CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
183-
#else
184-
#error Unsupported platform
185-
#endif // !TARGET_XARCH && !TARGET_ARM64
186-
187-
if (!compOpportunisticallyDependsOn(minimumIsa) || !JitConfig.EnableHWIntrinsic())
179+
if (!IsBaselineSimdIsaSupported())
188180
{
189181
// The user disabled support for the baseline ISA so
190182
// don't emit any SIMD intrinsics as they all require

src/tests/JIT/Directed/StructABI/structreturn.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1332,7 +1332,7 @@ private static void TestReturnViaThrowing<T>() where T : struct
13321332
T value = vector[Vector<T>.Count];
13331333
System.Diagnostics.Debug.Assert(false);
13341334
}
1335-
catch (IndexOutOfRangeException)
1335+
catch (ArgumentOutOfRangeException)
13361336
{
13371337
return;
13381338
}

0 commit comments

Comments
 (0)