@@ -141,9 +141,12 @@ static T Vectorized128(ref T xRef, nuint remainder)
141
141
142
142
// We need to the ensure the underlying data can be aligned and only align
143
143
// it if it can. It is possible we have an unaligned ref, in which case we
144
- // can never achieve the required SIMD alignment.
144
+ // can never achieve the required SIMD alignment. This cannot be done for
145
+ // float or double since that changes how results compound together.
145
146
146
- bool canAlign = ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
147
+ bool canAlign = ( typeof ( T ) != typeof ( float ) ) &&
148
+ ( typeof ( T ) != typeof ( double ) ) &&
149
+ ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
147
150
148
151
if ( canAlign )
149
152
{
@@ -156,11 +159,20 @@ static T Vectorized128(ref T xRef, nuint remainder)
156
159
misalignment = ( ( uint ) sizeof ( Vector128 < T > ) - ( ( nuint ) xPtr % ( uint ) sizeof ( Vector128 < T > ) ) ) / ( uint ) sizeof ( T ) ;
157
160
158
161
xPtr += misalignment ;
159
-
160
162
Debug . Assert ( ( ( nuint ) xPtr % ( uint ) sizeof ( Vector128 < T > ) ) == 0 ) ;
161
163
162
164
remainder -= misalignment ;
163
165
}
166
+ else
167
+ {
168
+ // We can't align, but this also means we're processing the full data from beg
169
+ // so account for that to ensure we don't double process and include them in the
170
+ // aggregate twice.
171
+
172
+ misalignment = ( uint ) Vector128 < T > . Count ;
173
+ xPtr += misalignment ;
174
+ remainder -= misalignment ;
175
+ }
164
176
165
177
Vector128 < T > vector1 ;
166
178
Vector128 < T > vector2 ;
@@ -310,9 +322,12 @@ static T Vectorized256(ref T xRef, nuint remainder)
310
322
311
323
// We need to the ensure the underlying data can be aligned and only align
312
324
// it if it can. It is possible we have an unaligned ref, in which case we
313
- // can never achieve the required SIMD alignment.
325
+ // can never achieve the required SIMD alignment. This cannot be done for
326
+ // float or double since that changes how results compound together.
314
327
315
- bool canAlign = ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
328
+ bool canAlign = ( typeof ( T ) != typeof ( float ) ) &&
329
+ ( typeof ( T ) != typeof ( double ) ) &&
330
+ ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
316
331
317
332
if ( canAlign )
318
333
{
@@ -330,6 +345,16 @@ static T Vectorized256(ref T xRef, nuint remainder)
330
345
331
346
remainder -= misalignment ;
332
347
}
348
+ else
349
+ {
350
+ // We can't align, but this also means we're processing the full data from beg
351
+ // so account for that to ensure we don't double process and include them in the
352
+ // aggregate twice.
353
+
354
+ misalignment = ( uint ) Vector256 < T > . Count ;
355
+ xPtr += misalignment ;
356
+ remainder -= misalignment ;
357
+ }
333
358
334
359
Vector256 < T > vector1 ;
335
360
Vector256 < T > vector2 ;
@@ -479,9 +504,12 @@ static T Vectorized512(ref T xRef, nuint remainder)
479
504
480
505
// We need to the ensure the underlying data can be aligned and only align
481
506
// it if it can. It is possible we have an unaligned ref, in which case we
482
- // can never achieve the required SIMD alignment.
507
+ // can never achieve the required SIMD alignment. This cannot be done for
508
+ // float or double since that changes how results compound together.
483
509
484
- bool canAlign = ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
510
+ bool canAlign = ( typeof ( T ) != typeof ( float ) ) &&
511
+ ( typeof ( T ) != typeof ( double ) ) &&
512
+ ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
485
513
486
514
if ( canAlign )
487
515
{
@@ -499,6 +527,16 @@ static T Vectorized512(ref T xRef, nuint remainder)
499
527
500
528
remainder -= misalignment ;
501
529
}
530
+ else
531
+ {
532
+ // We can't align, but this also means we're processing the full data from beg
533
+ // so account for that to ensure we don't double process and include them in the
534
+ // aggregate twice.
535
+
536
+ misalignment = ( uint ) Vector512 < T > . Count ;
537
+ xPtr += misalignment ;
538
+ remainder -= misalignment ;
539
+ }
502
540
503
541
Vector512 < T > vector1 ;
504
542
Vector512 < T > vector2 ;
@@ -1227,9 +1265,12 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder)
1227
1265
1228
1266
// We need to the ensure the underlying data can be aligned and only align
1229
1267
// it if it can. It is possible we have an unaligned ref, in which case we
1230
- // can never achieve the required SIMD alignment.
1268
+ // can never achieve the required SIMD alignment. This cannot be done for
1269
+ // float or double since that changes how results compound together.
1231
1270
1232
- bool canAlign = ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
1271
+ bool canAlign = ( typeof ( T ) != typeof ( float ) ) &&
1272
+ ( typeof ( T ) != typeof ( double ) ) &&
1273
+ ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
1233
1274
1234
1275
if ( canAlign )
1235
1276
{
@@ -1248,6 +1289,19 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder)
1248
1289
1249
1290
remainder -= misalignment ;
1250
1291
}
1292
+ else
1293
+ {
1294
+ // We can't align, but this also means we're processing the full data from beg
1295
+ // so account for that to ensure we don't double process and include them in the
1296
+ // aggregate twice.
1297
+
1298
+ misalignment = ( uint ) Vector128 < T > . Count ;
1299
+
1300
+ xPtr += misalignment ;
1301
+ yPtr += misalignment ;
1302
+
1303
+ remainder -= misalignment ;
1304
+ }
1251
1305
1252
1306
Vector128 < T > vector1 ;
1253
1307
Vector128 < T > vector2 ;
@@ -1418,9 +1472,12 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder)
1418
1472
1419
1473
// We need to the ensure the underlying data can be aligned and only align
1420
1474
// it if it can. It is possible we have an unaligned ref, in which case we
1421
- // can never achieve the required SIMD alignment.
1475
+ // can never achieve the required SIMD alignment. This cannot be done for
1476
+ // float or double since that changes how results compound together.
1422
1477
1423
- bool canAlign = ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
1478
+ bool canAlign = ( typeof ( T ) != typeof ( float ) ) &&
1479
+ ( typeof ( T ) != typeof ( double ) ) &&
1480
+ ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
1424
1481
1425
1482
if ( canAlign )
1426
1483
{
@@ -1439,6 +1496,19 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder)
1439
1496
1440
1497
remainder -= misalignment ;
1441
1498
}
1499
+ else
1500
+ {
1501
+ // We can't align, but this also means we're processing the full data from beg
1502
+ // so account for that to ensure we don't double process and include them in the
1503
+ // aggregate twice.
1504
+
1505
+ misalignment = ( uint ) Vector256 < T > . Count ;
1506
+
1507
+ xPtr += misalignment ;
1508
+ yPtr += misalignment ;
1509
+
1510
+ remainder -= misalignment ;
1511
+ }
1442
1512
1443
1513
Vector256 < T > vector1 ;
1444
1514
Vector256 < T > vector2 ;
@@ -1609,9 +1679,12 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder)
1609
1679
1610
1680
// We need to the ensure the underlying data can be aligned and only align
1611
1681
// it if it can. It is possible we have an unaligned ref, in which case we
1612
- // can never achieve the required SIMD alignment.
1682
+ // can never achieve the required SIMD alignment. This cannot be done for
1683
+ // float or double since that changes how results compound together.
1613
1684
1614
- bool canAlign = ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
1685
+ bool canAlign = ( typeof ( T ) != typeof ( float ) ) &&
1686
+ ( typeof ( T ) != typeof ( double ) ) &&
1687
+ ( ( nuint ) xPtr % ( nuint ) sizeof ( T ) ) == 0 ;
1615
1688
1616
1689
if ( canAlign )
1617
1690
{
@@ -1630,6 +1703,19 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder)
1630
1703
1631
1704
remainder -= misalignment ;
1632
1705
}
1706
+ else
1707
+ {
1708
+ // We can't align, but this also means we're processing the full data from beg
1709
+ // so account for that to ensure we don't double process and include them in the
1710
+ // aggregate twice.
1711
+
1712
+ misalignment = ( uint ) Vector512 < T > . Count ;
1713
+
1714
+ xPtr += misalignment ;
1715
+ yPtr += misalignment ;
1716
+
1717
+ remainder -= misalignment ;
1718
+ }
1633
1719
1634
1720
Vector512 < T > vector1 ;
1635
1721
Vector512 < T > vector2 ;
0 commit comments