Skip to content

Commit b4e258a

Browse files
authored
Improve vectorization of String.Split (#64899)
1 parent 7508080 commit b4e258a

File tree

3 files changed

+48
-53
lines changed

3 files changed

+48
-53
lines changed

src/libraries/System.Private.CoreLib/src/System/Collections/Generic/ValueListBuilder.cs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,24 @@ public ref T this[int index]
4444
public void Append(T item)
4545
{
4646
int pos = _pos;
47-
if (pos >= _span.Length)
48-
Grow();
47+
if ((uint)pos < (uint)_span.Length)
48+
{
49+
_span[pos] = item;
50+
_pos = pos + 1;
51+
}
52+
else
53+
{
54+
AddWithResize(item);
55+
}
56+
}
4957

58+
// Hide uncommon path
59+
[MethodImpl(MethodImplOptions.NoInlining)]
60+
private void AddWithResize(T item)
61+
{
62+
Debug.Assert(_pos == _span.Length);
63+
int pos = _pos;
64+
Grow();
5065
_span[pos] = item;
5166
_pos = pos + 1;
5267
}

src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs

Lines changed: 29 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,8 +1615,7 @@ private void MakeSeparatorList(ReadOnlySpan<char> separators, ref ValueListBuild
16151615
sep0 = separators[0];
16161616
sep1 = separators.Length > 1 ? separators[1] : sep0;
16171617
sep2 = separators.Length > 2 ? separators[2] : sep1;
1618-
1619-
if (Length >= 16 && Sse41.IsSupported)
1618+
if (Vector128.IsHardwareAccelerated && Length >= Vector128<ushort>.Count * 2)
16201619
{
16211620
MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2);
16221621
return;
@@ -1659,75 +1658,54 @@ private void MakeSeparatorList(ReadOnlySpan<char> separators, ref ValueListBuild
16591658
private void MakeSeparatorListVectorized(ref ValueListBuilder<int> sepListBuilder, char c, char c2, char c3)
16601659
{
16611660
// Redundant test so we won't prejit remainder of this method
1662-
// on platforms without SSE.
1663-
if (!Sse41.IsSupported)
1661+
// on platforms where it is not supported
1662+
if (!Vector128.IsHardwareAccelerated)
16641663
{
16651664
throw new PlatformNotSupportedException();
16661665
}
16671666

1668-
// Constant that allows for the truncation of 16-bit (FFFF/0000) values within a register to 4-bit (F/0)
1669-
Vector128<byte> shuffleConstant = Vector128.Create(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
1667+
Debug.Assert(Length >= Vector128<ushort>.Count);
1668+
1669+
nuint offset = 0;
1670+
nuint lengthToExamine = (nuint)(uint)Length;
1671+
1672+
ref ushort source = ref Unsafe.As<char, ushort>(ref _firstChar);
16701673

16711674
Vector128<ushort> v1 = Vector128.Create((ushort)c);
16721675
Vector128<ushort> v2 = Vector128.Create((ushort)c2);
16731676
Vector128<ushort> v3 = Vector128.Create((ushort)c3);
16741677

1675-
ref char c0 = ref MemoryMarshal.GetReference(this.AsSpan());
1676-
int cond = Length & -Vector128<ushort>.Count;
1677-
int i = 0;
1678-
1679-
for (; i < cond; i += Vector128<ushort>.Count)
1678+
do
16801679
{
1681-
Vector128<ushort> charVector = ReadVector(ref c0, i);
1682-
Vector128<ushort> cmp = Sse2.CompareEqual(charVector, v1);
1683-
1684-
cmp = Sse2.Or(Sse2.CompareEqual(charVector, v2), cmp);
1685-
cmp = Sse2.Or(Sse2.CompareEqual(charVector, v3), cmp);
1680+
Vector128<ushort> vector = Vector128.LoadUnsafe(ref source, offset);
1681+
Vector128<ushort> v1Eq = Vector128.Equals(vector, v1);
1682+
Vector128<ushort> v2Eq = Vector128.Equals(vector, v2);
1683+
Vector128<ushort> v3Eq = Vector128.Equals(vector, v3);
1684+
Vector128<byte> cmp = (v1Eq | v2Eq | v3Eq).AsByte();
16861685

1687-
if (Sse41.TestZ(cmp, cmp)) { continue; }
1688-
1689-
Vector128<byte> mask = Sse2.ShiftRightLogical(cmp.AsUInt64(), 4).AsByte();
1690-
mask = Ssse3.Shuffle(mask, shuffleConstant);
1691-
1692-
uint lowBits = Sse2.ConvertToUInt32(mask.AsUInt32());
1693-
mask = Sse2.ShiftRightLogical(mask.AsUInt64(), 32).AsByte();
1694-
uint highBits = Sse2.ConvertToUInt32(mask.AsUInt32());
1695-
1696-
for (int idx = i; lowBits != 0; idx++)
1686+
if (cmp != Vector128<byte>.Zero)
16971687
{
1698-
if ((lowBits & 0xF) != 0)
1688+
// Skip every other bit
1689+
uint mask = cmp.ExtractMostSignificantBits() & 0x5555;
1690+
do
16991691
{
1700-
sepListBuilder.Append(idx);
1701-
}
1702-
1703-
lowBits >>= 8;
1692+
uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char);
1693+
sepListBuilder.Append((int)(offset + bitPos));
1694+
mask = BitOperations.ResetLowestSetBit(mask);
1695+
} while (mask != 0);
17041696
}
17051697

1706-
for (int idx = i + 4; highBits != 0; idx++)
1707-
{
1708-
if ((highBits & 0xF) != 0)
1709-
{
1710-
sepListBuilder.Append(idx);
1711-
}
1712-
1713-
highBits >>= 8;
1714-
}
1715-
}
1698+
offset += (nuint)Vector128<ushort>.Count;
1699+
} while (offset <= lengthToExamine - (nuint)Vector128<ushort>.Count);
17161700

1717-
for (; i < Length; i++)
1701+
while (offset < lengthToExamine)
17181702
{
1719-
char curr = Unsafe.Add(ref c0, (IntPtr)(uint)i);
1703+
char curr = (char)Unsafe.Add(ref source, offset);
17201704
if (curr == c || curr == c2 || curr == c3)
17211705
{
1722-
sepListBuilder.Append(i);
1706+
sepListBuilder.Append((int)offset);
17231707
}
1724-
}
1725-
1726-
static Vector128<ushort> ReadVector(ref char c0, int offset)
1727-
{
1728-
ref char ci = ref Unsafe.Add(ref c0, (IntPtr)(uint)offset);
1729-
ref byte b = ref Unsafe.As<char, byte>(ref ci);
1730-
return Unsafe.ReadUnaligned<Vector128<ushort>>(ref b);
1708+
offset++;
17311709
}
17321710
}
17331711

src/libraries/System.Runtime/tests/System/String.SplitTests.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,7 @@ public static void SplitNullCharArraySeparator_BindsToCharArrayOverload()
530530
[InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.RemoveEmptyEntries, new[] { "thi", " i", " ", " ", "tring", " with ", "ome ", "p", "ce" })]
531531
[InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.TrimEntries, new[] { "thi", "", "i", "", "", "", "", "tring", "with", "ome", "p", "ce", "" })]
532532
[InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "thi", "i", "tring", "with", "ome", "p", "ce" })]
533+
[InlineData("this, is, a, very long string, with some spaces, commas and more spaces", new[] { ',', 's' }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "thi", "i", "a", "very long", "tring", "with", "ome", "pace", "comma", "and more", "pace" })]
533534
public static void SplitCharArraySeparator(string value, char[] separators, int count, StringSplitOptions options, string[] expected)
534535
{
535536
Assert.Equal(expected, value.Split(separators, count, options));
@@ -561,6 +562,7 @@ public static void SplitCharArraySeparator(string value, char[] separators, int
561562
[InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries, new[] { "this", " is", " a", "tring", " with", "ome", "paces", " " })]
562563
[InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "", "tring", "with", "ome", "paces", "" })]
563564
[InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "tring", "with", "ome", "paces" })]
565+
[InlineData("this, is, a, very long string, with some spaces, commas and more spaces", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "very long", "tring", "with", "ome", "paces", "commas and more", "paces" })]
564566
public static void SplitStringArraySeparator(string value, string[] separators, int count, StringSplitOptions options, string[] expected)
565567
{
566568
Assert.Equal(expected, value.Split(separators, count, options));

0 commit comments

Comments
 (0)