Skip to content

Commit 05abb76

Browse files
Ensure that aggregation is consistent regardless of data alignment (#106166)
* Ensure that aggregation is consistent regardless of data alignment * Ensure we handle for all aggregation helpers * Ensure we don't process beg twice * Ensure that we properly track in the case we can't align * Add missing semicolon * Fix the handling on .NET Framework * Ensure yptr on .NET Framework is incremented as well
1 parent bfb674e commit 05abb76

File tree

2 files changed

+116
-54
lines changed

2 files changed

+116
-54
lines changed

src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IAggregationOperator.cs

Lines changed: 99 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,12 @@ static T Vectorized128(ref T xRef, nuint remainder)
141141

142142
// We need to the ensure the underlying data can be aligned and only align
143143
// it if it can. It is possible we have an unaligned ref, in which case we
144-
// can never achieve the required SIMD alignment.
144+
// can never achieve the required SIMD alignment. This cannot be done for
145+
// float or double since that changes how results compound together.
145146

146-
bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
147+
bool canAlign = (typeof(T) != typeof(float)) &&
148+
(typeof(T) != typeof(double)) &&
149+
((nuint)xPtr % (nuint)sizeof(T)) == 0;
147150

148151
if (canAlign)
149152
{
@@ -156,11 +159,20 @@ static T Vectorized128(ref T xRef, nuint remainder)
156159
misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)xPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
157160

158161
xPtr += misalignment;
159-
160162
Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector128<T>)) == 0);
161163

162164
remainder -= misalignment;
163165
}
166+
else
167+
{
168+
// We can't align, but this also means we're processing the full data from beg
169+
// so account for that to ensure we don't double process and include them in the
170+
// aggregate twice.
171+
172+
misalignment = (uint)Vector128<T>.Count;
173+
xPtr += misalignment;
174+
remainder -= misalignment;
175+
}
164176

165177
Vector128<T> vector1;
166178
Vector128<T> vector2;
@@ -310,9 +322,12 @@ static T Vectorized256(ref T xRef, nuint remainder)
310322

311323
// We need to the ensure the underlying data can be aligned and only align
312324
// it if it can. It is possible we have an unaligned ref, in which case we
313-
// can never achieve the required SIMD alignment.
325+
// can never achieve the required SIMD alignment. This cannot be done for
326+
// float or double since that changes how results compound together.
314327

315-
bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
328+
bool canAlign = (typeof(T) != typeof(float)) &&
329+
(typeof(T) != typeof(double)) &&
330+
((nuint)xPtr % (nuint)sizeof(T)) == 0;
316331

317332
if (canAlign)
318333
{
@@ -330,6 +345,16 @@ static T Vectorized256(ref T xRef, nuint remainder)
330345

331346
remainder -= misalignment;
332347
}
348+
else
349+
{
350+
// We can't align, but this also means we're processing the full data from beg
351+
// so account for that to ensure we don't double process and include them in the
352+
// aggregate twice.
353+
354+
misalignment = (uint)Vector256<T>.Count;
355+
xPtr += misalignment;
356+
remainder -= misalignment;
357+
}
333358

334359
Vector256<T> vector1;
335360
Vector256<T> vector2;
@@ -479,9 +504,12 @@ static T Vectorized512(ref T xRef, nuint remainder)
479504

480505
// We need to the ensure the underlying data can be aligned and only align
481506
// it if it can. It is possible we have an unaligned ref, in which case we
482-
// can never achieve the required SIMD alignment.
507+
// can never achieve the required SIMD alignment. This cannot be done for
508+
// float or double since that changes how results compound together.
483509

484-
bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
510+
bool canAlign = (typeof(T) != typeof(float)) &&
511+
(typeof(T) != typeof(double)) &&
512+
((nuint)xPtr % (nuint)sizeof(T)) == 0;
485513

486514
if (canAlign)
487515
{
@@ -499,6 +527,16 @@ static T Vectorized512(ref T xRef, nuint remainder)
499527

500528
remainder -= misalignment;
501529
}
530+
else
531+
{
532+
// We can't align, but this also means we're processing the full data from beg
533+
// so account for that to ensure we don't double process and include them in the
534+
// aggregate twice.
535+
536+
misalignment = (uint)Vector512<T>.Count;
537+
xPtr += misalignment;
538+
remainder -= misalignment;
539+
}
502540

503541
Vector512<T> vector1;
504542
Vector512<T> vector2;
@@ -1227,9 +1265,12 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder)
12271265

12281266
// We need to the ensure the underlying data can be aligned and only align
12291267
// it if it can. It is possible we have an unaligned ref, in which case we
1230-
// can never achieve the required SIMD alignment.
1268+
// can never achieve the required SIMD alignment. This cannot be done for
1269+
// float or double since that changes how results compound together.
12311270

1232-
bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
1271+
bool canAlign = (typeof(T) != typeof(float)) &&
1272+
(typeof(T) != typeof(double)) &&
1273+
((nuint)xPtr % (nuint)sizeof(T)) == 0;
12331274

12341275
if (canAlign)
12351276
{
@@ -1248,6 +1289,19 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder)
12481289

12491290
remainder -= misalignment;
12501291
}
1292+
else
1293+
{
1294+
// We can't align, but this also means we're processing the full data from beg
1295+
// so account for that to ensure we don't double process and include them in the
1296+
// aggregate twice.
1297+
1298+
misalignment = (uint)Vector128<T>.Count;
1299+
1300+
xPtr += misalignment;
1301+
yPtr += misalignment;
1302+
1303+
remainder -= misalignment;
1304+
}
12511305

12521306
Vector128<T> vector1;
12531307
Vector128<T> vector2;
@@ -1418,9 +1472,12 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder)
14181472

14191473
// We need to the ensure the underlying data can be aligned and only align
14201474
// it if it can. It is possible we have an unaligned ref, in which case we
1421-
// can never achieve the required SIMD alignment.
1475+
// can never achieve the required SIMD alignment. This cannot be done for
1476+
// float or double since that changes how results compound together.
14221477

1423-
bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
1478+
bool canAlign = (typeof(T) != typeof(float)) &&
1479+
(typeof(T) != typeof(double)) &&
1480+
((nuint)xPtr % (nuint)sizeof(T)) == 0;
14241481

14251482
if (canAlign)
14261483
{
@@ -1439,6 +1496,19 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder)
14391496

14401497
remainder -= misalignment;
14411498
}
1499+
else
1500+
{
1501+
// We can't align, but this also means we're processing the full data from beg
1502+
// so account for that to ensure we don't double process and include them in the
1503+
// aggregate twice.
1504+
1505+
misalignment = (uint)Vector256<T>.Count;
1506+
1507+
xPtr += misalignment;
1508+
yPtr += misalignment;
1509+
1510+
remainder -= misalignment;
1511+
}
14421512

14431513
Vector256<T> vector1;
14441514
Vector256<T> vector2;
@@ -1609,9 +1679,12 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder)
16091679

16101680
// We need to the ensure the underlying data can be aligned and only align
16111681
// it if it can. It is possible we have an unaligned ref, in which case we
1612-
// can never achieve the required SIMD alignment.
1682+
// can never achieve the required SIMD alignment. This cannot be done for
1683+
// float or double since that changes how results compound together.
16131684

1614-
bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
1685+
bool canAlign = (typeof(T) != typeof(float)) &&
1686+
(typeof(T) != typeof(double)) &&
1687+
((nuint)xPtr % (nuint)sizeof(T)) == 0;
16151688

16161689
if (canAlign)
16171690
{
@@ -1630,6 +1703,19 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder)
16301703

16311704
remainder -= misalignment;
16321705
}
1706+
else
1707+
{
1708+
// We can't align, but this also means we're processing the full data from beg
1709+
// so account for that to ensure we don't double process and include them in the
1710+
// aggregate twice.
1711+
1712+
misalignment = (uint)Vector512<T>.Count;
1713+
1714+
xPtr += misalignment;
1715+
yPtr += misalignment;
1716+
1717+
remainder -= misalignment;
1718+
}
16331719

16341720
Vector512<T> vector1;
16351721
Vector512<T> vector2;

src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs

Lines changed: 17 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -175,28 +175,15 @@ static float Vectorized(ref float xRef, nuint remainder, TTransformOperator tran
175175
{
176176
float* xPtr = px;
177177

178-
// We need to the ensure the underlying data can be aligned and only align
179-
// it if it can. It is possible we have an unaligned ref, in which case we
180-
// can never achieve the required SIMD alignment.
178+
// Unlike many other vectorization algorithms, we cannot align for aggregation
179+
// because that changes how results compound together and can cause a significant
180+
// difference in the output. This also means we're processing the full data from beg
181+
// so account for that to ensure we don't double process and include them in the
182+
// aggregate twice.
181183

182-
bool canAlign = ((nuint)(xPtr) % sizeof(float)) == 0;
183-
184-
if (canAlign)
185-
{
186-
// Compute by how many elements we're misaligned and adjust the pointers accordingly
187-
//
188-
// Noting that we are only actually aligning dPtr. This is because unaligned stores
189-
// are more expensive than unaligned loads and aligning both is significantly more
190-
// complex.
191-
192-
misalignment = ((uint)(sizeof(Vector<float>)) - ((nuint)(xPtr) % (uint)(sizeof(Vector<float>)))) / sizeof(float);
193-
194-
xPtr += misalignment;
195-
196-
Debug.Assert(((nuint)(xPtr) % (uint)(sizeof(Vector<float>))) == 0);
197-
198-
remainder -= misalignment;
199-
}
184+
misalignment = (uint)Vector<float>.Count;
185+
xPtr += misalignment;
186+
remainder -= misalignment;
200187

201188
Vector<float> vector1;
202189
Vector<float> vector2;
@@ -480,29 +467,18 @@ static float Vectorized(ref float xRef, ref float yRef, nuint remainder, TBinary
480467
float* xPtr = px;
481468
float* yPtr = py;
482469

483-
// We need to the ensure the underlying data can be aligned and only align
484-
// it if it can. It is possible we have an unaligned ref, in which case we
485-
// can never achieve the required SIMD alignment.
486-
487-
bool canAlign = ((nuint)(xPtr) % sizeof(float)) == 0;
488-
489-
if (canAlign)
490-
{
491-
// Compute by how many elements we're misaligned and adjust the pointers accordingly
492-
//
493-
// Noting that we are only actually aligning dPtr. This is because unaligned stores
494-
// are more expensive than unaligned loads and aligning both is significantly more
495-
// complex.
496-
497-
misalignment = ((uint)(sizeof(Vector<float>)) - ((nuint)(xPtr) % (uint)(sizeof(Vector<float>)))) / sizeof(float);
470+
// Unlike many other vectorization algorithms, we cannot align for aggregation
471+
// because that changes how results compound together and can cause a significant
472+
// difference in the output. This also means we're processing the full data from beg
473+
// so account for that to ensure we don't double process and include them in the
474+
// aggregate twice.
498475

499-
xPtr += misalignment;
500-
yPtr += misalignment;
476+
misalignment = (uint)Vector<float>.Count;
501477

502-
Debug.Assert(((nuint)(xPtr) % (uint)(sizeof(Vector<float>))) == 0);
478+
xPtr += misalignment;
479+
yPtr += misalignment;
503480

504-
remainder -= misalignment;
505-
}
481+
remainder -= misalignment;
506482

507483
Vector<float> vector1;
508484
Vector<float> vector2;

0 commit comments

Comments
 (0)