@@ -149,6 +149,39 @@ struct simd32_t
149
149
}
150
150
};
151
151
152
+ template <typename TBase>
153
+ TBase EvaluateUnaryScalarSpecialized (genTreeOps oper, TBase arg0)
154
+ {
155
+ switch (oper)
156
+ {
157
+ case GT_NOT:
158
+ {
159
+ return ~arg0;
160
+ }
161
+
162
+ default :
163
+ {
164
+ unreached ();
165
+ }
166
+ }
167
+ }
168
+
169
+ template <>
170
+ inline float EvaluateUnaryScalarSpecialized<float >(genTreeOps oper, float arg0)
171
+ {
172
+ uint32_t arg0Bits = BitOperations::SingleToUInt32Bits (arg0);
173
+ uint32_t resultBits = EvaluateUnaryScalarSpecialized<uint32_t >(oper, arg0Bits);
174
+ return BitOperations::UInt32BitsToSingle (resultBits);
175
+ }
176
+
177
+ template <>
178
+ inline double EvaluateUnaryScalarSpecialized<double >(genTreeOps oper, double arg0)
179
+ {
180
+ uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits (arg0);
181
+ uint64_t resultBits = EvaluateUnaryScalarSpecialized<uint64_t >(oper, arg0Bits);
182
+ return BitOperations::UInt64BitsToDouble (resultBits);
183
+ }
184
+
152
185
template <typename TBase>
153
186
TBase EvaluateUnaryScalar (genTreeOps oper, TBase arg0)
154
187
{
@@ -161,7 +194,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
161
194
162
195
default :
163
196
{
164
- unreached ( );
197
+ return EvaluateUnaryScalarSpecialized<TBase>(oper, arg0 );
165
198
}
166
199
}
167
200
}
@@ -268,6 +301,119 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
268
301
}
269
302
}
270
303
304
+ template <typename TBase>
305
+ TBase EvaluateBinaryScalarRSZ (TBase arg0, TBase arg1)
306
+ {
307
+ return arg0 >> (arg1 & ((sizeof (TBase) * 8 ) - 1 ));
308
+ }
309
+
310
+ template <>
311
+ inline int8_t EvaluateBinaryScalarRSZ<int8_t >(int8_t arg0, int8_t arg1)
312
+ {
313
+ uint8_t arg0Bits = static_cast <uint8_t >(arg0);
314
+ uint8_t arg1Bits = static_cast <uint8_t >(arg1);
315
+
316
+ uint8_t resultBits = EvaluateBinaryScalarRSZ<uint8_t >(arg0Bits, arg1Bits);
317
+ return static_cast <int8_t >(resultBits);
318
+ }
319
+
320
+ template <>
321
+ inline int16_t EvaluateBinaryScalarRSZ<int16_t >(int16_t arg0, int16_t arg1)
322
+ {
323
+ uint16_t arg0Bits = static_cast <uint16_t >(arg0);
324
+ uint16_t arg1Bits = static_cast <uint16_t >(arg1);
325
+
326
+ uint16_t resultBits = EvaluateBinaryScalarRSZ<uint16_t >(arg0Bits, arg1Bits);
327
+ return static_cast <int16_t >(resultBits);
328
+ }
329
+
330
+ template <>
331
+ inline int32_t EvaluateBinaryScalarRSZ<int32_t >(int32_t arg0, int32_t arg1)
332
+ {
333
+ uint32_t arg0Bits = static_cast <uint32_t >(arg0);
334
+ uint32_t arg1Bits = static_cast <uint32_t >(arg1);
335
+
336
+ uint32_t resultBits = EvaluateBinaryScalarRSZ<uint32_t >(arg0Bits, arg1Bits);
337
+ return static_cast <int32_t >(resultBits);
338
+ }
339
+
340
+ template <>
341
+ inline int64_t EvaluateBinaryScalarRSZ<int64_t >(int64_t arg0, int64_t arg1)
342
+ {
343
+ uint64_t arg0Bits = static_cast <uint64_t >(arg0);
344
+ uint64_t arg1Bits = static_cast <uint64_t >(arg1);
345
+
346
+ uint64_t resultBits = EvaluateBinaryScalarRSZ<uint64_t >(arg0Bits, arg1Bits);
347
+ return static_cast <int64_t >(resultBits);
348
+ }
349
+
350
+ template <typename TBase>
351
+ TBase EvaluateBinaryScalarSpecialized (genTreeOps oper, TBase arg0, TBase arg1)
352
+ {
353
+ switch (oper)
354
+ {
355
+ case GT_AND:
356
+ {
357
+ return arg0 & arg1;
358
+ }
359
+
360
+ case GT_AND_NOT:
361
+ {
362
+ return arg0 & ~arg1;
363
+ }
364
+
365
+ case GT_LSH:
366
+ {
367
+ return arg0 << (arg1 & ((sizeof (TBase) * 8 ) - 1 ));
368
+ }
369
+
370
+ case GT_OR:
371
+ {
372
+ return arg0 | arg1;
373
+ }
374
+
375
+ case GT_RSH:
376
+ {
377
+ return arg0 >> (arg1 & ((sizeof (TBase) * 8 ) - 1 ));
378
+ }
379
+
380
+ case GT_RSZ:
381
+ {
382
+ return EvaluateBinaryScalarRSZ<TBase>(arg0, arg1);
383
+ }
384
+
385
+ case GT_XOR:
386
+ {
387
+ return arg0 ^ arg1;
388
+ }
389
+
390
+ default :
391
+ {
392
+ unreached ();
393
+ }
394
+ }
395
+ }
396
+
397
+ template <>
398
+ inline float EvaluateBinaryScalarSpecialized<float >(genTreeOps oper, float arg0, float arg1)
399
+ {
400
+ uint32_t arg0Bits = BitOperations::SingleToUInt32Bits (arg0);
401
+ uint32_t arg1Bits = BitOperations::SingleToUInt32Bits (arg1);
402
+
403
+ uint32_t resultBits = EvaluateBinaryScalarSpecialized<uint32_t >(oper, arg0Bits, arg1Bits);
404
+ return BitOperations::UInt32BitsToSingle (resultBits);
405
+ }
406
+
407
+ template <>
408
+ inline double EvaluateBinaryScalarSpecialized<double >(genTreeOps oper, double arg0, double arg1)
409
+ {
410
+ uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits (arg0);
411
+ uint64_t arg1Bits = BitOperations::DoubleToUInt64Bits (arg1);
412
+
413
+ uint64_t resultBits = EvaluateBinaryScalarSpecialized<uint64_t >(oper, arg0Bits, arg1Bits);
414
+ return BitOperations::UInt64BitsToDouble (resultBits);
415
+ }
416
+
271
417
template <typename TBase>
272
418
TBase EvaluateBinaryScalar (genTreeOps oper, TBase arg0, TBase arg1)
273
419
{
@@ -278,14 +424,24 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
278
424
return arg0 + arg1;
279
425
}
280
426
427
+ case GT_DIV:
428
+ {
429
+ return arg0 / arg1;
430
+ }
431
+
432
+ case GT_MUL:
433
+ {
434
+ return arg0 * arg1;
435
+ }
436
+
281
437
case GT_SUB:
282
438
{
283
439
return arg0 - arg1;
284
440
}
285
441
286
442
default :
287
443
{
288
- unreached ( );
444
+ return EvaluateBinaryScalarSpecialized<TBase>(oper, arg0, arg1 );
289
445
}
290
446
}
291
447
}
@@ -395,6 +551,18 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
395
551
}
396
552
}
397
553
554
+ template <typename TSimd, typename TBase>
555
+ void BroadcastConstantToSimd (TSimd* result, TBase arg0)
556
+ {
557
+ uint32_t count = sizeof (TSimd) / sizeof (TBase);
558
+
559
+ for (uint32_t i = 0 ; i < count; i++)
560
+ {
561
+ // Safely execute `result[i] = arg0`
562
+ memcpy (&result->u8 [i * sizeof (TBase)], &arg0, sizeof (TBase));
563
+ }
564
+ }
565
+
398
566
#ifdef FEATURE_SIMD
399
567
400
568
#ifdef TARGET_XARCH
0 commit comments