Skip to content

Commit 7060c23

Browse files
committed
Use trait-based classes to get everything inlined
This will cost one extra `HashSet<string>` and one extra `SubstringComparer` to be allocated, but might make the code run faster. Use the GSW strategy for virtual flattening
1 parent c1dd5d9 commit 7060c23

File tree

5 files changed

+254
-55
lines changed

5 files changed

+254
-55
lines changed

src/libraries/System.Collections.Immutable/src/System.Collections.Immutable.csproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ The System.Collections.Immutable library is built-in as part of the shared frame
1010

1111
<ItemGroup>
1212
<Compile Include="Properties\InternalsVisibleTo.cs" />
13+
<Compile Include="System\Collections\Frozen\String\SubstringComparers\SubstringComparer.cs" />
14+
<Compile Include="System\Collections\Frozen\String\SubstringComparers\SubstringComparerBase.cs" />
1315

1416
<Compile Include="System\Polyfills.cs" />
1517
<Compile Include="System\Collections\ThrowHelper.cs" />

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/KeyAnalyzer.cs

Lines changed: 40 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Collections.Generic;
66
using System.Diagnostics;
77
using System.Runtime.CompilerServices;
8+
using System.Collections.Frozen.String.SubstringComparers;
89

910
namespace System.Collections.Frozen
1011
{
@@ -29,40 +30,40 @@ internal static class KeyAnalyzer
2930
public static AnalysisResults Analyze(
3031
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength)
3132
{
32-
Debug.Assert(uniqueStrings.Length > 0);
33+
Debug.Assert(!uniqueStrings.IsEmpty);
3334

3435
if (minLength > 0)
3536
{
3637
const int MaxSubstringLengthLimit = 8; // arbitrary small-ish limit...it's not worth the increase in algorithmic complexity to analyze longer substrings
38+
int uniqueStringsLength = uniqueStrings.Length;
3739

3840
// Sufficient uniqueness factor of 95% is good enough.
3941
// Instead of ensuring that 95% of data is good, we stop when we know that at least 5% is bad.
40-
int acceptableNonUniqueCount = uniqueStrings.Length / 20;
42+
int acceptableNonUniqueCount = uniqueStringsLength / 20;
4143

42-
// Try to pick a substring comparer.
43-
SubstringComparer comparer = ignoreCase ? new JustifiedCaseInsensitiveSubstringComparer() : new JustifiedSubstringComparer();
44-
HashSet<string> set = new HashSet<string>(
45-
#if NET6_0_OR_GREATER
46-
uniqueStrings.Length,
47-
#endif
48-
comparer);
44+
ISubstringComparer leftComparer = ignoreCase ? new LeftSubstringCaseInsensitiveComparer() : new LeftSubstringOrdinalComparer();
45+
HashSet<string> leftSet = MakeHashSet(uniqueStringsLength, leftComparer);
46+
47+
// we lazily spin up the right comparators when/if needed
48+
ISubstringComparer? rightComparer = null;
49+
HashSet<string>? rightSet = null;
4950

5051
// For each substring length...preferring the shortest length that provides
5152
// enough uniqueness
5253
int maxSubstringLength = Math.Min(minLength, MaxSubstringLengthLimit);
5354
for (int count = 1; count <= maxSubstringLength; count++)
5455
{
55-
comparer.Count = count;
56+
leftComparer.Count = count;
5657

5758
// For each index, get a uniqueness factor for the left-justified substrings.
5859
// If any is above our threshold, we're done.
5960
for (int index = 0; index <= minLength - count; index++)
6061
{
61-
comparer.Index = index;
62+
leftComparer.Index = index;
6263

63-
if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount))
64+
if (HasSufficientUniquenessFactor(leftSet, uniqueStrings, acceptableNonUniqueCount))
6465
{
65-
return CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, index, count);
66+
return CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, leftComparer);
6667
}
6768
}
6869

@@ -72,29 +73,42 @@ public static AnalysisResults Analyze(
7273
// right-justified substrings, and so we also check right-justification.
7374
if (minLength != maxLength)
7475
{
76+
rightComparer ??= ignoreCase ? new RightSubstringCaseInsensitiveComparer() : new RightSubstringOrdinalComparer();
77+
rightSet ??= MakeHashSet(uniqueStringsLength, rightComparer);
78+
7579
// when Index is negative, we're offsetting from the right, ensure we're at least
7680
// far enough from the right that we have count characters available
77-
comparer.Index = -count;
81+
rightComparer!.Count = count;
82+
rightComparer!.Index = -count;
7883

7984
// For each index, get a uniqueness factor for the right-justified substrings.
8085
// If any is above our threshold, we're done.
81-
for (int offset = 0; offset <= minLength - count; offset++, comparer.Index--)
86+
for (int offset = 0; offset <= minLength - count; offset++, rightComparer!.Index--)
8287
{
83-
if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount))
88+
if (HasSufficientUniquenessFactor(rightSet!, uniqueStrings, acceptableNonUniqueCount))
8489
{
85-
return CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count);
90+
return CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, rightComparer);
8691
}
8792
}
8893
}
8994
}
9095
}
9196

9297
// Could not find a substring index/length that was good enough, use the entire string.
93-
return CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0);
98+
return CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, s_FullStringComparer);
99+
}
100+
101+
private static HashSet<string> MakeHashSet(int length, IEqualityComparer<string> comparer)
102+
{
103+
return new HashSet<string>(
104+
#if NET6_0_OR_GREATER
105+
length,
106+
#endif
107+
comparer);
94108
}
95109

96110
private static AnalysisResults CreateAnalysisResults(
97-
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count)
111+
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, ISubstringComparer comparer)
98112
{
99113
// Start off by assuming all strings are ASCII
100114
bool allAsciiIfIgnoreCase = true;
@@ -113,7 +127,7 @@ private static AnalysisResults CreateAnalysisResults(
113127
foreach (string s in uniqueStrings)
114128
{
115129
// Get the span for the substring.
116-
ReadOnlySpan<char> substring = count == 0 ? s.AsSpan() : Slicer(s, index, count);
130+
ReadOnlySpan<char> substring = comparer.Slice(s);
117131

118132
// If the substring isn't ASCII, bail out to return the results.
119133
if (!IsAllAscii(substring))
@@ -139,7 +153,7 @@ private static AnalysisResults CreateAnalysisResults(
139153
}
140154

141155
// Return the analysis results.
142-
return new AnalysisResults(ignoreCase, allAsciiIfIgnoreCase, index, count, minLength, maxLength);
156+
return new AnalysisResults(ignoreCase, allAsciiIfIgnoreCase, comparer.Index, comparer.Count, minLength, maxLength);
143157
}
144158

145159
internal static unsafe bool IsAllAscii(ReadOnlySpan<char> s)
@@ -184,7 +198,7 @@ internal static unsafe bool IsAllAscii(ReadOnlySpan<char> s)
184198
#if NET8_0_OR_GREATER
185199
private static readonly SearchValues<char> s_asciiLetters = SearchValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
186200
#endif
187-
private static bool ContainsAnyLetters(ReadOnlySpan<char> s)
201+
internal static bool ContainsAnyLetters(ReadOnlySpan<char> s)
188202
{
189203
Debug.Assert(IsAllAscii(s));
190204

@@ -203,14 +217,13 @@ private static bool ContainsAnyLetters(ReadOnlySpan<char> s)
203217
#endif
204218
}
205219

206-
private static bool HasSufficientUniquenessFactor(HashSet<string> set, ReadOnlySpan<string> uniqueStrings, int acceptableNonUniqueCount)
220+
internal static bool HasSufficientUniquenessFactor(HashSet<string> set, ReadOnlySpan<string> uniqueStrings, int acceptableNonUniqueCount)
207221
{
208-
set.Clear();
209-
210222
foreach (string s in uniqueStrings)
211223
{
212-
if (!set.Add(s) && acceptableNonUniqueCount-- <= 0)
224+
if (!set.Add(s) && --acceptableNonUniqueCount < 0)
213225
{
226+
set.Clear();
214227
return false;
215228
}
216229
}
@@ -241,34 +254,6 @@ public AnalysisResults(bool ignoreCase, bool allAsciiIfIgnoreCase, int hashIndex
241254
public bool RightJustifiedSubstring => HashIndex < 0;
242255
}
243256

244-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
245-
public static ReadOnlySpan<char> Slicer(this string s, int index, int count) => s.AsSpan((index >= 0 ? index : s.Length + index), count);
246-
247-
private abstract class SubstringComparer : IEqualityComparer<string>
248-
{
249-
public int Index; // offset from left side (if positive) or right side (if negative) of the string
250-
public int Count; // number of characters in the span
251-
252-
public abstract bool Equals(string? x, string? y);
253-
public abstract int GetHashCode(string s);
254-
}
255-
256-
private sealed class JustifiedSubstringComparer : SubstringComparer
257-
{
258-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
259-
public override bool Equals(string? x, string? y) => x!.Slicer(Index, Count).SequenceEqual(y!.Slicer(Index, Count));
260-
261-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
262-
public override int GetHashCode(string s) => Hashing.GetHashCodeOrdinal(s.Slicer(Index, Count));
263-
}
264-
265-
private sealed class JustifiedCaseInsensitiveSubstringComparer : SubstringComparer
266-
{
267-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
268-
public override bool Equals(string? x, string? y) => x!.Slicer(Index, Count).Equals(y!.Slicer(Index, Count), StringComparison.OrdinalIgnoreCase);
269-
270-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
271-
public override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCase(s.Slicer(Index, Count));
272-
}
257+
private static FullStringComparer s_FullStringComparer = new FullStringComparer();
273258
}
274259
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Runtime.CompilerServices;
5+
6+
namespace System.Collections.Frozen.String.SubstringComparers
7+
{
8+
internal sealed class LeftSubstringOrdinalComparer : SubstringComparerBase<LeftSubstringOrdinalComparer.GSW>
9+
{
10+
internal struct GSW : IGenericSpecializedWrapper
11+
{
12+
private LeftSubstringOrdinalComparer _this;
13+
public void Store(ISubstringComparer @this) => _this = (LeftSubstringOrdinalComparer)@this;
14+
15+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
16+
public ReadOnlySpan<char> Slice(string s) => s.AsSpan(_this.Index, _this.Count);
17+
18+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
19+
public bool Equals(string? x, string? y) => Slice(x!).SequenceEqual(Slice(y!));
20+
21+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
22+
public int GetHashCode(string s) => Hashing.GetHashCodeOrdinal(Slice(s));
23+
}
24+
}
25+
26+
internal sealed class RightSubstringOrdinalComparer : SubstringComparerBase<RightSubstringOrdinalComparer.GSW>
27+
{
28+
internal struct GSW : IGenericSpecializedWrapper
29+
{
30+
private RightSubstringOrdinalComparer _this;
31+
public void Store(ISubstringComparer @this) => _this = (RightSubstringOrdinalComparer)@this;
32+
33+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
34+
public ReadOnlySpan<char> Slice(string s) => s.AsSpan(s.Length + _this.Index, _this.Count);
35+
36+
37+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
38+
public bool Equals(string? x, string? y) => Slice(x!).SequenceEqual(Slice(y!));
39+
40+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
41+
public int GetHashCode(string s) => Hashing.GetHashCodeOrdinal(Slice(s));
42+
}
43+
}
44+
45+
internal sealed class LeftSubstringCaseInsensitiveComparer : SubstringComparerBase<LeftSubstringCaseInsensitiveComparer.GSW>
46+
{
47+
internal struct GSW : IGenericSpecializedWrapper
48+
{
49+
private LeftSubstringCaseInsensitiveComparer _this;
50+
public void Store(ISubstringComparer @this) => _this = (LeftSubstringCaseInsensitiveComparer)@this;
51+
52+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
53+
public ReadOnlySpan<char> Slice(string s) => s.AsSpan(_this.Index, _this.Count);
54+
55+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
56+
public bool Equals(string? x, string? y) => Slice(x!).Equals(Slice(y!), StringComparison.OrdinalIgnoreCase);
57+
58+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
59+
public int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCase(Slice(s));
60+
}
61+
}
62+
63+
internal sealed class RightSubstringCaseInsensitiveComparer : SubstringComparerBase<RightSubstringCaseInsensitiveComparer.GSW>
64+
{
65+
internal struct GSW : IGenericSpecializedWrapper
66+
{
67+
private RightSubstringCaseInsensitiveComparer _this;
68+
public void Store(ISubstringComparer @this) => _this = (RightSubstringCaseInsensitiveComparer)@this;
69+
70+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
71+
public ReadOnlySpan<char> Slice(string s) => s.AsSpan(s.Length + _this.Index, _this.Count);
72+
73+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
74+
public bool Equals(string? x, string? y) => Slice(x!).Equals(Slice(y!), StringComparison.OrdinalIgnoreCase);
75+
76+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
77+
public int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCase(Slice(s));
78+
}
79+
}
80+
81+
internal sealed class FullStringComparer : SubstringComparerBase<FullStringComparer.GSW>
82+
{
83+
internal struct GSW : IGenericSpecializedWrapper
84+
{
85+
private FullStringComparer _this;
86+
public void Store(ISubstringComparer @this) => _this = (FullStringComparer)@this;
87+
88+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
89+
public ReadOnlySpan<char> Slice(string s) => s.AsSpan();
90+
91+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
92+
public bool Equals(string? x, string? y) => Slice(x!).Equals(Slice(y!), StringComparison.OrdinalIgnoreCase);
93+
94+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
95+
public int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCase(Slice(s));
96+
}
97+
}
98+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Generic;
5+
using System.Runtime.CompilerServices;
6+
7+
namespace System.Collections.Frozen.String.SubstringComparers
8+
{
9+
internal interface ISubstringComparer : IEqualityComparer<string>
10+
{
11+
public int Index { get; set; } // offset from left side (if positive) or right side (if negative) of the string
12+
public int Count { get; set; } // number of characters in the span
13+
14+
public abstract ReadOnlySpan<char> Slice(string s);
15+
}
16+
17+
internal abstract class SubstringComparerBase<TThisWrapper> : ISubstringComparer
18+
where TThisWrapper : struct, SubstringComparerBase<TThisWrapper>.IGenericSpecializedWrapper
19+
{
20+
/// <summary>A wrapper around this that enables access to important members without making virtual calls.</summary>
21+
private readonly TThisWrapper _this;
22+
23+
protected SubstringComparerBase()
24+
{
25+
_this = default;
26+
_this.Store(this);
27+
}
28+
29+
public int Index { get; set; }
30+
public int Count { get; set; }
31+
32+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
33+
public ReadOnlySpan<char> Slice(string s) => _this.Slice(s);
34+
35+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
36+
public bool Equals(string? x, string? y) => _this.Equals(x, y);
37+
38+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
39+
public int GetHashCode(string s) => _this.GetHashCode(s);
40+
41+
/// <summary>Used to enable generic specialization with reference types.</summary>
42+
/// <remarks>
43+
/// To avoid each of those incurring virtual dispatch to the derived type, the derived
44+
/// type hands down a struct wrapper through which all calls are performed. This base
45+
/// class uses that generic struct wrapper to specialize and devirtualize.
46+
/// </remarks>
47+
internal interface IGenericSpecializedWrapper
48+
{
49+
void Store(ISubstringComparer @this);
50+
public ReadOnlySpan<char> Slice(string s);
51+
public bool Equals(string? x, string? y);
52+
public int GetHashCode(string s);
53+
}
54+
}
55+
}

0 commit comments

Comments
 (0)