Skip to content

Commit 4f7dbe4

Browse files
committed
Add some KeyAnalyzer tests.
Made ContainsAnyLetters and SufficientUniquenessFactor internal so they can be tested. Hoisted the calculation of the maximum collisions allowed outside the HasSufficientUniquenessFactor for testability (the calculation only needs to be done once). Added tests for ContainsAnyLetters and SufficientUniquenessFactor are working correctly.
1 parent 3be8fa1 commit 4f7dbe4

File tree

2 files changed

+87
-14
lines changed

2 files changed

+87
-14
lines changed

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/KeyAnalyzer.cs

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,29 +46,35 @@ public static AnalysisResults Analyze(
4646
/// <summary>Try to find the minimal unique substring index/length to use for comparisons.</summary>
4747
private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, out AnalysisResults results)
4848
{
49-
const int MaxSubstringLengthLimit = 8; // arbitrary small-ish limit... t's not worth the increase in algorithmic complexity to analyze longer substrings
49+
const int MaxSubstringLengthLimit = 8; // arbitrary small-ish limit... it's not worth the increase in algorithmic complexity to analyze longer substrings
50+
int uniqueStringsLength = uniqueStrings.Length;
51+
52+
// Sufficient uniqueness factor of 95% is good enough.
53+
// Instead of ensuring that 95% of data is good, we stop when we know that at least 5% is bad.
54+
int acceptableNonUniqueCount = uniqueStringsLength / 20;
5055

5156
SubstringComparer comparer = ignoreCase ? new JustifiedCaseInsensitiveSubstringComparer() : new JustifiedSubstringComparer();
5257
HashSet<string> set = new HashSet<string>(
5358
#if NET6_0_OR_GREATER
54-
uniqueStrings.Length,
59+
uniqueStringsLength,
5560
#endif
5661
comparer);
5762

58-
// For each substring length...
63+
// For each substring length...preferring the shortest length that provides
64+
// enough uniqueness
5965
int maxSubstringLength = Math.Min(minLength, MaxSubstringLengthLimit);
6066
for (int count = 1; count <= maxSubstringLength; count++)
6167
{
6268
comparer.IsLeft = true;
6369
comparer.Count = count;
6470

65-
// For each index, get a uniqueness factor for the left-justified substrings.
71+
// For each index from, get a uniqueness factor for the left-justified substrings.
6672
// If any is above our threshold, we're done.
6773
for (int index = 0; index <= minLength - count; index++)
6874
{
6975
comparer.Index = index;
7076

71-
if (HasSufficientUniquenessFactor(set, uniqueStrings))
77+
if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount))
7278
{
7379
results = CreateAnalysisResults(
7480
uniqueStrings, ignoreCase, minLength, maxLength, index, count,
@@ -90,10 +96,9 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
9096
// If any is above our threshold, we're done.
9197
for (int index = 0; index <= minLength - count; index++)
9298
{
93-
// Get a uniqueness factor for the right-justified substrings.
94-
// If it's above our threshold, we're done.
9599
comparer.Index = -index - count;
96-
if (HasSufficientUniquenessFactor(set, uniqueStrings))
100+
101+
if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount))
97102
{
98103
results = CreateAnalysisResults(
99104
uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count,
@@ -202,7 +207,7 @@ internal static unsafe bool IsAllAscii(ReadOnlySpan<char> s)
202207
#if NET8_0_OR_GREATER
203208
private static readonly SearchValues<char> s_asciiLetters = SearchValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
204209
#endif
205-
private static bool ContainsAnyLetters(ReadOnlySpan<char> s)
210+
internal static bool ContainsAnyLetters(ReadOnlySpan<char> s)
206211
{
207212
Debug.Assert(IsAllAscii(s));
208213

@@ -221,14 +226,10 @@ private static bool ContainsAnyLetters(ReadOnlySpan<char> s)
221226
#endif
222227
}
223228

224-
private static bool HasSufficientUniquenessFactor(HashSet<string> set, ReadOnlySpan<string> uniqueStrings)
229+
internal static bool HasSufficientUniquenessFactor(HashSet<string> set, ReadOnlySpan<string> uniqueStrings, int acceptableNonUniqueCount)
225230
{
226231
set.Clear();
227232

228-
// Sufficient uniqueness factor of 95% is good enough.
229-
// Instead of ensuring that 95% of data is good, we stop when we know that at least 5% is bad.
230-
int acceptableNonUniqueCount = uniqueStrings.Length / 20;
231-
232233
foreach (string s in uniqueStrings)
233234
{
234235
if (!set.Add(s) && --acceptableNonUniqueCount < 0)

src/libraries/System.Collections.Immutable/tests/Frozen/KeyAnalyzerTests.cs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System;
5+
using System.Collections.Generic;
6+
using System.Linq;
57
using Xunit;
68

79
namespace System.Collections.Frozen.Tests
@@ -212,5 +214,75 @@ public static void IsAllAscii()
212214
Assert.True(KeyAnalyzer.IsAllAscii("abcdefghij".AsSpan()));
213215
Assert.False(KeyAnalyzer.IsAllAscii("abcdéfghij".AsSpan()));
214216
}
217+
218+
[Fact]
219+
public static void ContainsAnyLetters()
220+
{
221+
Assert.True(KeyAnalyzer.ContainsAnyLetters("abc".AsSpan()));
222+
Assert.True(KeyAnalyzer.ContainsAnyLetters("ABC".AsSpan()));
223+
Assert.False(KeyAnalyzer.ContainsAnyLetters("123".AsSpan()));
224+
// note, must only pass ASCII to ContainsAnyLetters, anything else is a
225+
// Debug.Assert and would not have been called in the actual implementation
226+
}
227+
228+
[Fact]
229+
public static void HasSufficientUniquenessFactor()
230+
{
231+
HashSet<string> set = new HashSet<string>(StringComparer.Ordinal);
232+
233+
Assert.True(KeyAnalyzer.HasSufficientUniquenessFactor(set, new[] { "a", "b", "c" }, 0));
234+
Assert.Equal(3, set.Count);
235+
236+
Assert.True(KeyAnalyzer.HasSufficientUniquenessFactor(set, new[] { "a", "b", "a" }, 1));
237+
Assert.Equal(2, set.Count); // set should only have the non-collided ones
238+
239+
Assert.False(KeyAnalyzer.HasSufficientUniquenessFactor(set, new[] { "aa", "ab", "aa" }, 0));
240+
Assert.Equal(0, set.Count); // if we failed it should empty the set
241+
}
242+
243+
[Fact]
244+
public static void HasSufficientUniquenessFactorInsensitive()
245+
{
246+
HashSet<string> set = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
247+
248+
Assert.True(KeyAnalyzer.HasSufficientUniquenessFactor(set, new[] { "a", "B", "c" }, 0));
249+
Assert.Equal(3, set.Count);
250+
251+
Assert.True(KeyAnalyzer.HasSufficientUniquenessFactor(set, new[] { "aa", "AA" }, 1));
252+
Assert.Equal(1, set.Count); // set should only have the non-collided ones
253+
254+
Assert.False(KeyAnalyzer.HasSufficientUniquenessFactor(set, new[] { "aa", "AA" }, 0));
255+
}
256+
257+
[Fact]
258+
public static void HasSufficientUniquenessFactorLarge()
259+
{
260+
HashSet<string> set = new HashSet<string>(StringComparer.Ordinal);
261+
262+
Assert.True(KeyAnalyzer.HasSufficientUniquenessFactor(set, new[] { "a", "b", "c" }, 1));
263+
Assert.Equal(3, set.Count);
264+
265+
Assert.True(KeyAnalyzer.HasSufficientUniquenessFactor(set, new[] { "a", "b", "a" }, 1));
266+
Assert.Equal(2, set.Count); // set should only have the non-collided ones
267+
268+
Assert.False(KeyAnalyzer.HasSufficientUniquenessFactor(set, new[] { "a", "a", "a" }, 1));
269+
}
270+
271+
// reuse the typical data declared in the FrozenFromKnownValuesTests
272+
public static IEnumerable<object[]> TypicalData() => FrozenFromKnownValuesTests.StringStringData();
273+
274+
[Theory]
275+
[MemberData(nameof(TypicalData))]
276+
public static void HasSufficientUniquenessKnownData(Dictionary<string, string> source)
277+
{
278+
string[] keys = source.Keys.ToArray();
279+
HashSet<string> set = new HashSet<string>(source.Comparer);
280+
281+
int allowedCollisions = keys.Length / 20;
282+
bool passable = KeyAnalyzer.HasSufficientUniquenessFactor(set, keys.AsSpan(), allowedCollisions);
283+
284+
if (passable)
285+
Assert.InRange(set.Count, keys.Length - allowedCollisions, keys.Length);
286+
}
215287
}
216288
}

0 commit comments

Comments
 (0)