Skip to content

Commit d1f333e

Browse files
Fix a couple issues with Vector.WithElement (#115648)
* Reapply "Vector128.WithElement codegen regression in .NET 9.0 (#115645)" This reverts commit 2fbc9e9. * Ensure that WithElement always uses an in range constant during codegen * Ensure WithElement picks a valid store instruction * Apply suggestions from code review * Ensure that Mono doesn't assert for bounds checked indices * Ensure that WithElement restricts itself to allByteRegs if the base type is byte or sbyte
1 parent b13d90d commit d1f333e

File tree

11 files changed

+257
-94
lines changed

11 files changed

+257
-94
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27867,14 +27867,8 @@ GenTree* Compiler::gtNewSimdWithElementNode(
2786727867
var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
2786827868

2786927869
assert(varTypeIsArithmetic(simdBaseType));
27870-
assert(op2->IsCnsIntOrI());
2787127870
assert(varTypeIsArithmetic(op3));
2787227871

27873-
ssize_t imm8 = op2->AsIntCon()->IconValue();
27874-
ssize_t count = simdSize / genTypeSize(simdBaseType);
27875-
27876-
assert((0 <= imm8) && (imm8 < count));
27877-
2787827872
#if defined(TARGET_XARCH)
2787927873
switch (simdBaseType)
2788027874
{
@@ -27940,6 +27934,20 @@ GenTree* Compiler::gtNewSimdWithElementNode(
2794027934
#error Unsupported platform
2794127935
#endif // !TARGET_XARCH && !TARGET_ARM64
2794227936

27937+
int immUpperBound = getSIMDVectorLength(simdSize, simdBaseType) - 1;
27938+
bool rangeCheckNeeded = !op2->OperIsConst();
27939+
27940+
if (!rangeCheckNeeded)
27941+
{
27942+
ssize_t imm8 = op2->AsIntCon()->IconValue();
27943+
rangeCheckNeeded = (imm8 < 0) || (imm8 > immUpperBound);
27944+
}
27945+
27946+
if (rangeCheckNeeded)
27947+
{
27948+
op2 = addRangeCheckForHWIntrinsic(op2, 0, immUpperBound);
27949+
}
27950+
2794327951
return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize);
2794427952
}
2794527953

src/coreclr/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1832,6 +1832,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
18321832

18331833
GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr;
18341834
GenTree* op2 = (node->GetOperandCount() >= 2) ? node->Op(2) : nullptr;
1835+
GenTree* op3 = (node->GetOperandCount() >= 3) ? node->Op(3) : nullptr;
18351836

18361837
genConsumeMultiOpOperands(node);
18371838
regNumber op1Reg = (op1 == nullptr) ? REG_NA : op1->GetRegNum();
@@ -1968,6 +1969,58 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
19681969
break;
19691970
}
19701971

1972+
case NI_Vector128_WithElement:
1973+
case NI_Vector256_WithElement:
1974+
case NI_Vector512_WithElement:
1975+
{
1976+
// Optimize the case where op2 is not a constant.
1977+
1978+
assert(!op1->isContained());
1979+
assert(!op2->OperIsConst());
1980+
1981+
// We don't have an instruction to implement this intrinsic if the index is not a constant.
1982+
// So we will use the SIMD temp location to store the vector, set the value and then reload it.
1983+
// The range check will already have been performed, so at this point we know we have an index
1984+
// within the bounds of the vector.
1985+
1986+
unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
1987+
noway_assert(simdInitTempVarNum != BAD_VAR_NUM);
1988+
1989+
bool isEBPbased;
1990+
unsigned offs = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased);
1991+
1992+
#if !FEATURE_FIXED_OUT_ARGS
1993+
if (!isEBPbased)
1994+
{
1995+
// Adjust the offset by the amount currently pushed on the CPU stack
1996+
offs += genStackLevel;
1997+
}
1998+
#else
1999+
assert(genStackLevel == 0);
2000+
#endif // !FEATURE_FIXED_OUT_ARGS
2001+
2002+
regNumber indexReg = op2->GetRegNum();
2003+
regNumber valueReg = op3->GetRegNum(); // New element value to be stored
2004+
2005+
// Store the vector to the temp location.
2006+
GetEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)),
2007+
emitTypeSize(simdType), op1Reg, simdInitTempVarNum, 0);
2008+
2009+
// Set the desired element.
2010+
GetEmitter()->emitIns_ARX_R(ins_Store(op3->TypeGet()), // Store
2011+
emitTypeSize(baseType), // Of the vector baseType
2012+
valueReg, // From valueReg
2013+
(isEBPbased) ? REG_EBP : REG_ESP, // Stack-based
2014+
indexReg, // Indexed
2015+
genTypeSize(baseType), // by the size of the baseType
2016+
offs); // Offset
2017+
2018+
// Write back the modified vector to the original location.
2019+
GetEmitter()->emitIns_R_S(ins_Load(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)),
2020+
emitTypeSize(simdType), targetReg, simdInitTempVarNum, 0);
2021+
break;
2022+
}
2023+
19712024
case NI_Vector128_GetElement:
19722025
case NI_Vector256_GetElement:
19732026
case NI_Vector512_GetElement:

src/coreclr/jit/hwintrinsicxarch.cpp

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4273,34 +4273,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
42734273
assert(sig->numArgs == 3);
42744274
GenTree* indexOp = impStackTop(1).val;
42754275

4276-
if (!indexOp->OperIsConst())
4277-
{
4278-
if (!opts.OptimizationEnabled())
4279-
{
4280-
// Only enable late stage rewriting if optimizations are enabled
4281-
// as we won't otherwise encounter a constant at the later point
4282-
return nullptr;
4283-
}
4284-
4285-
op3 = impPopStack().val;
4286-
op2 = impPopStack().val;
4287-
op1 = impSIMDPopStack();
4288-
4289-
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
4290-
4291-
retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint));
4292-
break;
4293-
}
4294-
4295-
ssize_t imm8 = indexOp->AsIntCon()->IconValue();
4296-
ssize_t count = simdSize / genTypeSize(simdBaseType);
4297-
4298-
if ((imm8 >= count) || (imm8 < 0))
4299-
{
4300-
// Using software fallback if index is out of range (throw exception)
4301-
return nullptr;
4302-
}
4303-
43044276
switch (simdBaseType)
43054277
{
43064278
// Using software fallback if simdBaseType is not supported by hardware

src/coreclr/jit/lowerxarch.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5172,8 +5172,8 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node)
51725172
return LowerNode(node);
51735173
}
51745174

5175-
uint32_t count = simdSize / genTypeSize(simdBaseType);
51765175
uint32_t elemSize = genTypeSize(simdBaseType);
5176+
uint32_t count = simdSize / elemSize;
51775177

51785178
if (op1->OperIs(GT_IND))
51795179
{
@@ -5658,14 +5658,24 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
56585658
GenTree* op2 = node->Op(2);
56595659
GenTree* op3 = node->Op(3);
56605660

5661-
assert(op2->OperIsConst());
5661+
if (!op2->OperIsConst())
5662+
{
5663+
// We will specially handle WithElement in codegen when op2 isn't a constant
5664+
ContainCheckHWIntrinsic(node);
5665+
return node->gtNext;
5666+
}
5667+
5668+
// We should have a bounds check inserted for any index outside the allowed range
5669+
// but we need to generate some code anyways, and so we'll simply mask here for simplicity.
56625670

5663-
ssize_t count = simdSize / genTypeSize(simdBaseType);
5664-
ssize_t imm8 = op2->AsIntCon()->IconValue();
5665-
ssize_t simd16Cnt = 16 / genTypeSize(simdBaseType);
5666-
ssize_t simd16Idx = imm8 / simd16Cnt;
5671+
uint32_t elemSize = genTypeSize(simdBaseType);
5672+
uint32_t count = simdSize / elemSize;
56675673

5668-
assert(0 <= imm8 && imm8 < count);
5674+
uint32_t imm8 = static_cast<uint8_t>(op2->AsIntCon()->IconValue()) % count;
5675+
uint32_t simd16Cnt = 16 / elemSize;
5676+
uint32_t simd16Idx = imm8 / simd16Cnt;
5677+
5678+
assert((0 <= imm8) && (imm8 < count));
56695679

56705680
switch (simdBaseType)
56715681
{

src/coreclr/jit/lsraxarch.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2317,6 +2317,33 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
23172317
break;
23182318
}
23192319

2320+
case NI_Vector128_WithElement:
2321+
case NI_Vector256_WithElement:
2322+
case NI_Vector512_WithElement:
2323+
{
2324+
assert(numArgs == 3);
2325+
2326+
assert(!op1->isContained());
2327+
assert(!op2->OperIsConst());
2328+
2329+
// If the index is not a constant
2330+
// we will use the SIMD temp location to store the vector.
2331+
2332+
var_types requiredSimdTempType = intrinsicTree->TypeGet();
2333+
compiler->getSIMDInitTempVarNum(requiredSimdTempType);
2334+
2335+
// We then customize the uses as we will effectively be spilling
2336+
// op1, storing op3 to that spill location based on op2. Then
2337+
// reloading the updated value to the destination
2338+
2339+
srcCount += BuildOperandUses(op1);
2340+
srcCount += BuildOperandUses(op2);
2341+
srcCount += BuildOperandUses(op3, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE);
2342+
2343+
buildUses = false;
2344+
break;
2345+
}
2346+
23202347
case NI_Vector128_AsVector128Unsafe:
23212348
case NI_Vector128_AsVector2:
23222349
case NI_Vector128_AsVector3:

src/coreclr/jit/rationalize.cpp

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -385,57 +385,6 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStack<GenTre
385385
break;
386386
}
387387

388-
case NI_Vector128_WithElement:
389-
#if defined(TARGET_XARCH)
390-
case NI_Vector256_WithElement:
391-
case NI_Vector512_WithElement:
392-
#elif defined(TARGET_ARM64)
393-
case NI_Vector64_WithElement:
394-
#endif
395-
{
396-
assert(operandCount == 3);
397-
398-
GenTree* op1 = operands[0];
399-
GenTree* op2 = operands[1];
400-
GenTree* op3 = operands[2];
401-
402-
if (op2->OperIsConst())
403-
{
404-
ssize_t imm8 = op2->AsIntCon()->IconValue();
405-
ssize_t count = simdSize / genTypeSize(simdBaseType);
406-
407-
if ((imm8 >= count) || (imm8 < 0))
408-
{
409-
// Using software fallback if index is out of range (throw exception)
410-
break;
411-
}
412-
413-
#if defined(TARGET_XARCH)
414-
if (varTypeIsIntegral(simdBaseType))
415-
{
416-
if (varTypeIsLong(simdBaseType))
417-
{
418-
if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41_X64))
419-
{
420-
break;
421-
}
422-
}
423-
else if (!varTypeIsShort(simdBaseType))
424-
{
425-
if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
426-
{
427-
break;
428-
}
429-
}
430-
}
431-
#endif // TARGET_XARCH
432-
433-
result = comp->gtNewSimdWithElementNode(retType, op1, op2, op3, simdBaseJitType, simdSize);
434-
break;
435-
}
436-
break;
437-
}
438-
439388
default:
440389
{
441390
if (sigInfo.numArgs == 0)

src/mono/mono/mini/simd-intrinsics.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2553,6 +2553,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
25532553
if (index < 0 || index >= elems) {
25542554
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
25552555
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
2556+
2557+
// Fixup the index to be in range so codegen is still valid
2558+
index %= elems;
25562559
}
25572560

25582561
// Bounds check is elided if we know the index is safe.
@@ -3220,8 +3223,11 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
32203223
// If the index is provably a constant, we can generate vastly better code.
32213224
int index = GTMREG_TO_INT (args[1]->inst_c0);
32223225
if (index < 0 || index >= elems) {
3223-
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
3224-
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
3226+
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
3227+
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
3228+
3229+
// Fixup the index to be in range so codegen is still valid
3230+
index %= elems;
32253231
}
32263232

32273233
return emit_vector_insert_element (cfg, klass, args [0], arg0_type, args [2], index, FALSE);
@@ -3766,6 +3772,9 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
37663772
if (index < 0 || index >= len) {
37673773
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
37683774
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
3775+
3776+
// Fixup the index to be in range so codegen is still valid
3777+
index %= len;
37693778
}
37703779

37713780
int opcode = type_to_extract_op (ty);
@@ -3852,6 +3861,9 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
38523861
if (index < 0 || index >= len) {
38533862
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
38543863
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
3864+
3865+
// Fixup the index to be in range so codegen is still valid
3866+
index %= len;
38553867
}
38563868

38573869
if (args [0]->dreg == dreg) {
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
// Found by Antigen
5+
// Reduced from 313.92 KB to 1.05 KB.
6+
7+
using System;
8+
using System.Collections.Generic;
9+
using System.Runtime.CompilerServices;
10+
using System.Runtime.Intrinsics;
11+
using System.Runtime.Intrinsics.Arm;
12+
using System.Runtime.Intrinsics.X86;
13+
using System.Numerics;
14+
using Xunit;
15+
16+
public class Runtime_115487
17+
{
18+
static int s_int_13 = 0;
19+
static Vector128<ushort> s_v128_ushort_25 = Vector128.Create((ushort)5, 1, 1, 1, 5, 4, 53, 2);
20+
ushort ushort_81 = 1;
21+
Vector128<ushort> v128_ushort_88 = Vector128.Create((ushort)2, 53, 5, 5, 0, 1, 1, 4);
22+
private static List<string> toPrint = new List<string>();
23+
24+
private void Method0()
25+
{
26+
unchecked
27+
{
28+
s_v128_ushort_25 = Vector128.WithElement(s_v128_ushort_25 * v128_ushort_88 ^ s_v128_ushort_25 & Vector128.MaxNative(s_v128_ushort_25, s_v128_ushort_25), s_int_13 >> s_int_13 & 3, ushort_81);
29+
return;
30+
}
31+
}
32+
33+
[Fact]
34+
public static void Problem()
35+
{
36+
Assert.Equal(Vector128.Create((ushort)5, 1, 1, 1, 5, 4, 53, 2), s_v128_ushort_25);
37+
_ = Antigen();
38+
Assert.Equal(Vector128.Create((ushort)1, 52, 4, 4, 5, 0, 0, 10), s_v128_ushort_25);
39+
}
40+
41+
private static int Antigen()
42+
{
43+
new Runtime_115487().Method0();
44+
return string.Join(Environment.NewLine, toPrint).GetHashCode();
45+
}
46+
}
47+
/*
48+
Environment:
49+
50+
set DOTNET_TieredCompilation=0
51+
52+
JIT assert failed:
53+
Assertion failed '(insCodesMR[ins] != BAD_CODE)' in 'TestClass:Method0():this' during 'Generate code' (IL size 83; hash 0x46e9aa75; MinOpts)
54+
55+
File: D:\a\_work\1\s\src\coreclr\jit\emitxarch.cpp Line: 4203
56+
57+
*/
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<Optimize>True</Optimize>
4+
</PropertyGroup>
5+
<ItemGroup>
6+
<Compile Include="$(MSBuildProjectName).cs" />
7+
</ItemGroup>
8+
</Project>

0 commit comments

Comments
 (0)