Skip to content

Commit 7219954

Browse files
Adding more SIMD constant folding support (#82190)
* Adding more SIMD constant folding support * Adding tests for the new SIMD constant folding paths * Ensure bitcasting float/double is using well-defined behavior
1 parent 2ffe0fe commit 7219954

File tree

6 files changed

+1687
-100
lines changed

6 files changed

+1687
-100
lines changed

src/coreclr/jit/simd.h

Lines changed: 170 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,39 @@ struct simd32_t
149149
}
150150
};
151151

152+
template <typename TBase>
153+
TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0)
154+
{
155+
switch (oper)
156+
{
157+
case GT_NOT:
158+
{
159+
return ~arg0;
160+
}
161+
162+
default:
163+
{
164+
unreached();
165+
}
166+
}
167+
}
168+
169+
template <>
170+
inline float EvaluateUnaryScalarSpecialized<float>(genTreeOps oper, float arg0)
171+
{
172+
uint32_t arg0Bits = BitOperations::SingleToUInt32Bits(arg0);
173+
uint32_t resultBits = EvaluateUnaryScalarSpecialized<uint32_t>(oper, arg0Bits);
174+
return BitOperations::UInt32BitsToSingle(resultBits);
175+
}
176+
177+
template <>
178+
inline double EvaluateUnaryScalarSpecialized<double>(genTreeOps oper, double arg0)
179+
{
180+
uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits(arg0);
181+
uint64_t resultBits = EvaluateUnaryScalarSpecialized<uint64_t>(oper, arg0Bits);
182+
return BitOperations::UInt64BitsToDouble(resultBits);
183+
}
184+
152185
template <typename TBase>
153186
TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
154187
{
@@ -161,7 +194,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
161194

162195
default:
163196
{
164-
unreached();
197+
return EvaluateUnaryScalarSpecialized<TBase>(oper, arg0);
165198
}
166199
}
167200
}
@@ -268,6 +301,119 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
268301
}
269302
}
270303

304+
template <typename TBase>
305+
TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1)
306+
{
307+
return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
308+
}
309+
310+
template <>
311+
inline int8_t EvaluateBinaryScalarRSZ<int8_t>(int8_t arg0, int8_t arg1)
312+
{
313+
uint8_t arg0Bits = static_cast<uint8_t>(arg0);
314+
uint8_t arg1Bits = static_cast<uint8_t>(arg1);
315+
316+
uint8_t resultBits = EvaluateBinaryScalarRSZ<uint8_t>(arg0Bits, arg1Bits);
317+
return static_cast<int8_t>(resultBits);
318+
}
319+
320+
template <>
321+
inline int16_t EvaluateBinaryScalarRSZ<int16_t>(int16_t arg0, int16_t arg1)
322+
{
323+
uint16_t arg0Bits = static_cast<uint16_t>(arg0);
324+
uint16_t arg1Bits = static_cast<uint16_t>(arg1);
325+
326+
uint16_t resultBits = EvaluateBinaryScalarRSZ<uint16_t>(arg0Bits, arg1Bits);
327+
return static_cast<int16_t>(resultBits);
328+
}
329+
330+
template <>
331+
inline int32_t EvaluateBinaryScalarRSZ<int32_t>(int32_t arg0, int32_t arg1)
332+
{
333+
uint32_t arg0Bits = static_cast<uint32_t>(arg0);
334+
uint32_t arg1Bits = static_cast<uint32_t>(arg1);
335+
336+
uint32_t resultBits = EvaluateBinaryScalarRSZ<uint32_t>(arg0Bits, arg1Bits);
337+
return static_cast<int32_t>(resultBits);
338+
}
339+
340+
template <>
341+
inline int64_t EvaluateBinaryScalarRSZ<int64_t>(int64_t arg0, int64_t arg1)
342+
{
343+
uint64_t arg0Bits = static_cast<uint64_t>(arg0);
344+
uint64_t arg1Bits = static_cast<uint64_t>(arg1);
345+
346+
uint64_t resultBits = EvaluateBinaryScalarRSZ<uint64_t>(arg0Bits, arg1Bits);
347+
return static_cast<int64_t>(resultBits);
348+
}
349+
350+
template <typename TBase>
351+
TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1)
352+
{
353+
switch (oper)
354+
{
355+
case GT_AND:
356+
{
357+
return arg0 & arg1;
358+
}
359+
360+
case GT_AND_NOT:
361+
{
362+
return arg0 & ~arg1;
363+
}
364+
365+
case GT_LSH:
366+
{
367+
return arg0 << (arg1 & ((sizeof(TBase) * 8) - 1));
368+
}
369+
370+
case GT_OR:
371+
{
372+
return arg0 | arg1;
373+
}
374+
375+
case GT_RSH:
376+
{
377+
return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
378+
}
379+
380+
case GT_RSZ:
381+
{
382+
return EvaluateBinaryScalarRSZ<TBase>(arg0, arg1);
383+
}
384+
385+
case GT_XOR:
386+
{
387+
return arg0 ^ arg1;
388+
}
389+
390+
default:
391+
{
392+
unreached();
393+
}
394+
}
395+
}
396+
397+
template <>
398+
inline float EvaluateBinaryScalarSpecialized<float>(genTreeOps oper, float arg0, float arg1)
399+
{
400+
uint32_t arg0Bits = BitOperations::SingleToUInt32Bits(arg0);
401+
uint32_t arg1Bits = BitOperations::SingleToUInt32Bits(arg1);
402+
403+
uint32_t resultBits = EvaluateBinaryScalarSpecialized<uint32_t>(oper, arg0Bits, arg1Bits);
404+
return BitOperations::UInt32BitsToSingle(resultBits);
405+
}
406+
407+
template <>
408+
inline double EvaluateBinaryScalarSpecialized<double>(genTreeOps oper, double arg0, double arg1)
409+
{
410+
uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits(arg0);
411+
uint64_t arg1Bits = BitOperations::DoubleToUInt64Bits(arg1);
412+
413+
uint64_t resultBits = EvaluateBinaryScalarSpecialized<uint64_t>(oper, arg0Bits, arg1Bits);
414+
return BitOperations::UInt64BitsToDouble(resultBits);
415+
}
416+
271417
template <typename TBase>
272418
TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
273419
{
@@ -278,14 +424,24 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
278424
return arg0 + arg1;
279425
}
280426

427+
case GT_DIV:
428+
{
429+
return arg0 / arg1;
430+
}
431+
432+
case GT_MUL:
433+
{
434+
return arg0 * arg1;
435+
}
436+
281437
case GT_SUB:
282438
{
283439
return arg0 - arg1;
284440
}
285441

286442
default:
287443
{
288-
unreached();
444+
return EvaluateBinaryScalarSpecialized<TBase>(oper, arg0, arg1);
289445
}
290446
}
291447
}
@@ -395,6 +551,18 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
395551
}
396552
}
397553

554+
template <typename TSimd, typename TBase>
555+
void BroadcastConstantToSimd(TSimd* result, TBase arg0)
556+
{
557+
uint32_t count = sizeof(TSimd) / sizeof(TBase);
558+
559+
for (uint32_t i = 0; i < count; i++)
560+
{
561+
// Safely execute `result[i] = arg0`
562+
memcpy(&result->u8[i * sizeof(TBase)], &arg0, sizeof(TBase));
563+
}
564+
}
565+
398566
#ifdef FEATURE_SIMD
399567

400568
#ifdef TARGET_XARCH

src/coreclr/jit/utils.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2645,6 +2645,22 @@ uint32_t BitOperations::BitScanReverse(uint64_t value)
26452645
#endif
26462646
}
26472647

2648+
//------------------------------------------------------------------------
2649+
// BitOperations::DoubleToUInt64Bits: Gets the underlying bits for a double-precision floating-point value.
2650+
//
2651+
// Arguments:
2652+
// value - The number to convert
2653+
//
2654+
// Return Value:
2655+
// The underlying bits for value.
2656+
//
2657+
uint64_t BitOperations::DoubleToUInt64Bits(double value)
2658+
{
2659+
uint64_t result;
2660+
memcpy(&result, &value, sizeof(double));
2661+
return result;
2662+
}
2663+
26482664
//------------------------------------------------------------------------
26492665
// BitOperations::LeadingZeroCount: Count the number of leading zero bits in a mask.
26502666
//
@@ -2932,6 +2948,22 @@ uint64_t BitOperations::RotateRight(uint64_t value, uint32_t offset)
29322948
return (value >> (offset & 0x3F)) | (value << ((64 - offset) & 0x3F));
29332949
}
29342950

2951+
//------------------------------------------------------------------------
2952+
// BitOperations::SingleToUInt32Bits: Gets the underlying bits for a single-precision floating-point value.
2953+
//
2954+
// Arguments:
2955+
// value - The number to convert
2956+
//
2957+
// Return Value:
2958+
// The underlying bits for value.
2959+
//
2960+
uint32_t BitOperations::SingleToUInt32Bits(float value)
2961+
{
2962+
uint32_t result;
2963+
memcpy(&result, &value, sizeof(float));
2964+
return result;
2965+
}
2966+
29352967
//------------------------------------------------------------------------
29362968
// BitOperations::TrailingZeroCount: Count the number of trailing zero bits in an integer value.
29372969
//
@@ -2980,6 +3012,38 @@ uint32_t BitOperations::TrailingZeroCount(uint64_t value)
29803012
#endif
29813013
}
29823014

3015+
//------------------------------------------------------------------------
3016+
// BitOperations::UInt32BitsToSingle: Gets a single-precision floating-point from its underlying bit value.
3017+
//
3018+
// Arguments:
3019+
// value - The underlying bit value.
3020+
//
3021+
// Return Value:
3022+
// The single-precision floating-point from value.
3023+
//
3024+
float BitOperations::UInt32BitsToSingle(uint32_t value)
3025+
{
3026+
float result;
3027+
memcpy(&result, &value, sizeof(uint32_t));
3028+
return result;
3029+
}
3030+
3031+
//------------------------------------------------------------------------
3032+
// BitOperations::UInt64BitsToDouble: Gets a double-precision floating-point from its underlying bit value.
3033+
//
3034+
// Arguments:
3035+
// value - The underlying bit value.
3036+
//
3037+
// Return Value:
3038+
// The double-precision floating-point from value.
3039+
//
3040+
double BitOperations::UInt64BitsToDouble(uint64_t value)
3041+
{
3042+
double result;
3043+
memcpy(&result, &value, sizeof(uint64_t));
3044+
return result;
3045+
}
3046+
29833047
namespace MagicDivide
29843048
{
29853049
template <int TableBase = 0, int TableSize, typename Magic>

src/coreclr/jit/utils.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,8 @@ class BitOperations
751751

752752
static uint32_t BitScanReverse(uint64_t value);
753753

754+
static uint64_t DoubleToUInt64Bits(double value);
755+
754756
static uint32_t LeadingZeroCount(uint32_t value);
755757

756758
static uint32_t LeadingZeroCount(uint64_t value);
@@ -775,9 +777,15 @@ class BitOperations
775777

776778
static uint64_t RotateRight(uint64_t value, uint32_t offset);
777779

780+
static uint32_t SingleToUInt32Bits(float value);
781+
778782
static uint32_t TrailingZeroCount(uint32_t value);
779783

780784
static uint32_t TrailingZeroCount(uint64_t value);
785+
786+
static float UInt32BitsToSingle(uint32_t value);
787+
788+
static double UInt64BitsToDouble(uint64_t value);
781789
};
782790

783791
// The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but

0 commit comments

Comments
 (0)