Skip to content

Commit 5ad9c00

Browse files
authored
Vectorized HttpUserAgentParser.TryExtractVersion (#79)
* Vectorized HttpUserAgentParser.TryExtractVersion * Added a comment
1 parent 4a82130 commit 5ad9c00

File tree

2 files changed

+197
-28
lines changed

2 files changed

+197
-28
lines changed

src/HttpUserAgentParser/HttpUserAgentParser.cs

Lines changed: 119 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
// Copyright © https://myCSharp.de - all rights reserved
22

3+
using System.Diagnostics;
34
using System.Diagnostics.CodeAnalysis;
45
using System.Runtime.CompilerServices;
6+
using System.Runtime.InteropServices;
7+
using System.Runtime.Intrinsics;
58

69
namespace MyCSharp.HttpUserAgentParser;
710

@@ -206,45 +209,133 @@ private static bool TryExtractVersion(ReadOnlySpan<char> haystack, out Range ran
206209
{
207210
range = default;
208211

209-
// Limit search window to avoid scanning entire UA string unnecessarily
210-
const int Window = 128;
211-
if (haystack.Length > Window)
212-
{
213-
haystack = haystack.Slice(0, Window);
214-
}
212+
// Vectorization is used in a optimistic way and specialized to common (trimmed down) user agents.
213+
// When the first two char-vectors don't yield any success, we fall back to the scalar path.
214+
// This penalized not found versions, but has an advantage for found versions.
215+
// Vector512 is left out, because there are no common inputs with length 128 or more.
216+
//
217+
// Two short (same size as char) vectors are read, then packed to byte vectors on which the
218+
// operation is done. For short / chart the higher byte is not of interest and zero or outside
219+
// the target characters, thus with bytes we can process twice as much elements at once.
215220

216-
// Find first digit
217-
int start = -1;
218-
for (int i = 0; i < haystack.Length; i++)
221+
if (Vector256.IsHardwareAccelerated && haystack.Length >= 2 * Vector256<short>.Count)
219222
{
220-
char c = haystack[i];
221-
if (c >= '0' && c <= '9')
223+
ref char ptr = ref MemoryMarshal.GetReference(haystack);
224+
225+
Vector256<byte> vec = ptr.ReadVector256AsBytes(0);
226+
Vector256<byte> between0and9 = Vector256.LessThan(vec - Vector256.Create((byte)'0'), Vector256.Create((byte)('9' - '0' + 1)));
227+
228+
if (between0and9 == Vector256<byte>.Zero)
222229
{
223-
start = i;
224-
break;
230+
goto Scalar;
225231
}
226-
}
227232

228-
if (start < 0)
233+
uint bitMask = between0and9.ExtractMostSignificantBits();
234+
int idx = (int)uint.TrailingZeroCount(bitMask);
235+
Debug.Assert(idx is >= 0 and <= 32);
236+
int start = idx;
237+
238+
Vector256<byte> byteMask = between0and9 | Vector256.Equals(vec, Vector256.Create((byte)'.'));
239+
byteMask = ~byteMask;
240+
241+
if (byteMask == Vector256<byte>.Zero)
242+
{
243+
goto Scalar;
244+
}
245+
246+
bitMask = byteMask.ExtractMostSignificantBits();
247+
bitMask >>= start;
248+
249+
idx = start + (int)uint.TrailingZeroCount(bitMask);
250+
Debug.Assert(idx is >= 0 and <= 32);
251+
int end = idx;
252+
253+
range = new Range(start, end);
254+
return true;
255+
}
256+
else if (Vector128.IsHardwareAccelerated && haystack.Length >= 2 * Vector128<short>.Count)
229257
{
230-
// No digit found => no version
231-
return false;
258+
ref char ptr = ref MemoryMarshal.GetReference(haystack);
259+
260+
Vector128<byte> vec = ptr.ReadVector128AsBytes(0);
261+
Vector128<byte> between0and9 = Vector128.LessThan(vec - Vector128.Create((byte)'0'), Vector128.Create((byte)('9' - '0' + 1)));
262+
263+
if (between0and9 == Vector128<byte>.Zero)
264+
{
265+
goto Scalar;
266+
}
267+
268+
uint bitMask = between0and9.ExtractMostSignificantBits();
269+
int idx = (int)uint.TrailingZeroCount(bitMask);
270+
Debug.Assert(idx is >= 0 and <= 16);
271+
int start = idx;
272+
273+
Vector128<byte> byteMask = between0and9 | Vector128.Equals(vec, Vector128.Create((byte)'.'));
274+
byteMask = ~byteMask;
275+
276+
if (byteMask == Vector128<byte>.Zero)
277+
{
278+
goto Scalar;
279+
}
280+
281+
bitMask = byteMask.ExtractMostSignificantBits();
282+
bitMask >>= start;
283+
284+
idx = start + (int)uint.TrailingZeroCount(bitMask);
285+
Debug.Assert(idx is >= 0 and <= 16);
286+
int end = idx;
287+
288+
range = new Range(start, end);
289+
return true;
232290
}
233291

234-
// Consume digits and dots after first digit
235-
int end = start + 1;
236-
while (end < haystack.Length)
292+
Scalar:
237293
{
238-
char c = haystack[end];
239-
if (!((c >= '0' && c <= '9') || c == '.'))
294+
// Limit search window to avoid scanning entire UA string unnecessarily
295+
const int Windows = 128;
296+
if (haystack.Length > Windows)
240297
{
241-
break;
298+
haystack = haystack.Slice(0, Windows);
299+
}
300+
301+
int start = -1;
302+
int i = 0;
303+
304+
for (; i < haystack.Length; ++i)
305+
{
306+
char c = haystack[i];
307+
if (char.IsBetween(c, '0', '9'))
308+
{
309+
start = i;
310+
break;
311+
}
312+
}
313+
314+
if (start < 0)
315+
{
316+
// No digit found => no version
317+
return false;
318+
}
319+
320+
haystack = haystack.Slice(i + 1);
321+
for (i = 0; i < haystack.Length; ++i)
322+
{
323+
char c = haystack[i];
324+
if (!(char.IsBetween(c, '0', '9') || c == '.'))
325+
{
326+
break;
327+
}
242328
}
243-
end++;
244-
}
245329

246-
// Create exclusive end range
247-
range = new Range(start, end);
248-
return true;
330+
i += start + 1; // shift back the previous domain
331+
332+
if (i == start)
333+
{
334+
return false;
335+
}
336+
337+
range = new Range(start, i);
338+
return true;
339+
}
249340
}
250341
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// Copyright © https://myCSharp.de - all rights reserved
2+
3+
using System.Runtime.CompilerServices;
4+
using System.Runtime.Intrinsics;
5+
using System.Runtime.Intrinsics.Arm;
6+
using System.Runtime.Intrinsics.X86;
7+
8+
namespace MyCSharp.HttpUserAgentParser;
9+
10+
internal static class VectorExtensions
11+
{
12+
extension(ref char c)
13+
{
14+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
15+
public Vector128<byte> ReadVector128AsBytes(int offset)
16+
{
17+
ref short ptr = ref Unsafe.As<char, short>(ref c);
18+
19+
#if NET10_0_OR_GREATER
20+
return Vector128.NarrowWithSaturation(
21+
Vector128.LoadUnsafe(ref ptr, (uint)offset),
22+
Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count))
23+
).AsByte();
24+
#else
25+
if (Sse2.IsSupported)
26+
{
27+
return Sse2.PackUnsignedSaturate(
28+
Vector128.LoadUnsafe(ref ptr, (uint)offset),
29+
Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count)));
30+
}
31+
else if (AdvSimd.Arm64.IsSupported)
32+
{
33+
return AdvSimd.Arm64.UnzipEven(
34+
Vector128.LoadUnsafe(ref ptr, (uint)offset).AsByte(),
35+
Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count)).AsByte());
36+
}
37+
else
38+
{
39+
return Vector128.Narrow(
40+
Vector128.LoadUnsafe(ref ptr, (uint)offset),
41+
Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count))
42+
).AsByte();
43+
}
44+
#endif
45+
}
46+
47+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
48+
public Vector256<byte> ReadVector256AsBytes(int offset)
49+
{
50+
ref short ptr = ref Unsafe.As<char, short>(ref c);
51+
52+
#if NET10_0_OR_GREATER
53+
return Vector256.NarrowWithSaturation(
54+
Vector256.LoadUnsafe(ref ptr, (uint)offset),
55+
Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256<short>.Count)
56+
).AsByte();
57+
#else
58+
if (Avx2.IsSupported)
59+
{
60+
Vector256<byte> tmp = Avx2.PackUnsignedSaturate(
61+
Vector256.LoadUnsafe(ref ptr, (uint)offset),
62+
Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256<short>.Count));
63+
64+
Vector256<long> tmp1 = Avx2.Permute4x64(tmp.AsInt64(), 0b_11_01_10_00);
65+
66+
return tmp1.AsByte();
67+
}
68+
else
69+
{
70+
return Vector256.Narrow(
71+
Vector256.LoadUnsafe(ref ptr, (uint)offset),
72+
Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256<short>.Count)
73+
).AsByte();
74+
}
75+
#endif
76+
}
77+
}
78+
}

0 commit comments

Comments
 (0)