Skip to content

Commit

Permalink
Rework ProbabilisticMap character checks in SearchValues (#101001)
Browse files Browse the repository at this point in the history
* Rework ProbabilisticMap character checks in SearchValues

* Reduce footprint of ProbMap SearchValues

* Update misleading comment
  • Loading branch information
MihaZupan authored May 16, 2024
1 parent f4eba6b commit b95c8e1
Show file tree
Hide file tree
Showing 8 changed files with 487 additions and 125 deletions.
28 changes: 26 additions & 2 deletions src/libraries/System.Memory/tests/Span/SearchValues.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,31 @@ public static IEnumerable<object[]> Values_MemberData()
"\uFFFF\uFFFE\uFFFD\uFFFC\uFFFB\uFFFA",
"\uFFFF\uFFFE\uFFFD\uFFFC\uFFFB\uFFFB",
"\uFFFF\uFFFE\uFFFD\uFFFC\uFFFB\uFFF9",
new string('\u0080', 256) + '\u0082',
new string('\u0080', 100) + '\uF000',
new string('\u0080', 256) + '\uF000',
string.Concat(Enumerable.Range(128, 255).Select(i => (char)i)),
string.Concat(Enumerable.Range(128, 257).Select(i => (char)i)),
string.Concat(Enumerable.Range(128, 254).Select(i => (char)i)) + '\uF000',
string.Concat(Enumerable.Range(128, 256).Select(i => (char)i)) + '\uF000',
'\0' + string.Concat(Enumerable.Range(2, char.MaxValue - 1).Select(i => (char)i)),
};

return values.Select(v => new object[] { v, Encoding.Latin1.GetBytes(v) });
foreach (string value in values)
{
yield return Pair(value);
yield return Pair('a' + value);

// Test some more duplicates
if (value.Length > 0)
{
yield return Pair(value + value[0]);
yield return Pair(value[0] + value);
yield return Pair(value + value);
}
}

static object[] Pair(string value) => new object[] { value, Encoding.Latin1.GetBytes(value) };
}

[Theory]
Expand Down Expand Up @@ -192,10 +214,12 @@ public static void SearchValues_Contains(string needle, byte[] byteNeedle)

static void Test<T>(ReadOnlySpan<T> needle, SearchValues<T> values) where T : struct, INumber<T>, IMinMaxValue<T>
{
HashSet<T> needleSet = needle.ToArray().ToHashSet();

for (int i = int.CreateChecked(T.MaxValue); i >= 0; i--)
{
T t = T.CreateChecked(i);
Assert.Equal(needle.Contains(t), values.Contains(t));
Assert.Equal(needleSet.Contains(t), values.Contains(t));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2SearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any3SearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\BitVector256.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticMapState.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticWithAsciiCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any4SearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any5SearchValues.cs" />
Expand All @@ -443,7 +444,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\RangeByteSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\RangeCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Latin1CharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\BitmapCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SearchValues.T.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SearchValuesDebugView.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -1,34 +1,55 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace System.Buffers
{
internal sealed class Latin1CharSearchValues : SearchValues<char>
internal sealed class BitmapCharSearchValues : SearchValues<char>
{
private readonly BitVector256 _lookup;
private readonly uint[] _bitmap;

public Latin1CharSearchValues(ReadOnlySpan<char> values)
public BitmapCharSearchValues(ReadOnlySpan<char> values, int maxInclusive)
{
Debug.Assert(maxInclusive <= char.MaxValue);

_bitmap = new uint[maxInclusive / 32 + 1];

foreach (char c in values)
{
if (c > 255)
_bitmap[c >> 5] |= 1u << c;
}
}

internal override char[] GetValues()
{
var chars = new List<char>();
uint[] bitmap = _bitmap;

for (int i = 0; i < _bitmap.Length * 32; i++)
{
if (Contains(bitmap, i))
{
// The values were modified concurrent with the call to SearchValues.Create
ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion();
chars.Add((char)i);
}

_lookup.Set(c);
}
}

internal override char[] GetValues() => _lookup.GetCharValues();
return chars.ToArray();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override bool ContainsCore(char value) =>
_lookup.Contains256(value);
Contains(_bitmap, value);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool Contains(uint[] bitmap, int value)
{
uint offset = (uint)(value >> 5);
return offset < (uint)bitmap.Length && (bitmap[offset] & (1u << value)) != 0;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override int IndexOfAny(ReadOnlySpan<char> span) =>
Expand All @@ -51,11 +72,12 @@ private int IndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
{
ref char searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
ref char cur = ref searchSpace;
uint[] bitmap = _bitmap;

while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
{
char c = cur;
if (TNegator.NegateIfNeeded(_lookup.Contains256(c)))
if (TNegator.NegateIfNeeded(Contains(bitmap, c)))
{
return (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref cur) / sizeof(char));
}
Expand All @@ -69,16 +91,18 @@ private int IndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
private int LastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
{
for (int i = searchSpaceLength - 1; i >= 0; i--)
uint[] bitmap = _bitmap;

while (--searchSpaceLength >= 0)
{
char c = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(_lookup.Contains256(c)))
char c = Unsafe.Add(ref searchSpace, searchSpaceLength);
if (TNegator.NegateIfNeeded(Contains(bitmap, c)))
{
return i;
break;
}
}

return -1;
return searchSpaceLength;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,30 @@ namespace System.Buffers
{
internal sealed class ProbabilisticCharSearchValues : SearchValues<char>
{
private ProbabilisticMap _map;
private readonly string _values;
private ProbabilisticMapState _map;

public ProbabilisticCharSearchValues(scoped ReadOnlySpan<char> values)
public ProbabilisticCharSearchValues(ReadOnlySpan<char> values, int maxInclusive)
{
_values = new string(values);
_map = new ProbabilisticMap(_values);
_map = new ProbabilisticMapState(values, maxInclusive);
}

internal override char[] GetValues() => _values.ToCharArray();
internal override char[] GetValues() =>
_map.GetValues();

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override bool ContainsCore(char value) =>
ProbabilisticMap.Contains(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), _values, value);
_map.FastContains(value);

internal override int IndexOfAny(ReadOnlySpan<char> span) =>
ProbabilisticMap.IndexOfAny(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref MemoryMarshal.GetReference(span), span.Length, _values);
ProbabilisticMap.IndexOfAny<SearchValues.TrueConst>(ref MemoryMarshal.GetReference(span), span.Length, ref _map);

internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
ProbabilisticMap.IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, _values);
ProbabilisticMapState.IndexOfAnySimpleLoop<SearchValues.TrueConst, IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, ref _map);

internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
ProbabilisticMap.LastIndexOfAny(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref MemoryMarshal.GetReference(span), span.Length, _values);
ProbabilisticMap.LastIndexOfAny<SearchValues.TrueConst>(ref MemoryMarshal.GetReference(span), span.Length, ref _map);

internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
ProbabilisticMap.LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, _values);
ProbabilisticMapState.LastIndexOfAnySimpleLoop<SearchValues.TrueConst, IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, ref _map);
}
}
Loading

0 comments on commit b95c8e1

Please sign in to comment.