Skip to content

Address feedback on dense FrozenDictionary optimization #112298

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ private static FrozenDictionary<TKey, TValue> CreateFromDictionary<TKey, TValue>
if (typeof(TKey).IsValueType && ReferenceEquals(comparer, EqualityComparer<TKey>.Default))
{
#if NET
if (DenseIntegralFrozenDictionary.TryCreate(source, out FrozenDictionary<TKey, TValue>? result))
if (DenseIntegralFrozenDictionary.CreateIfValid(source) is { } denseResult)
{
return result;
return denseResult;
}
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,63 +17,46 @@ namespace System.Collections.Frozen
internal sealed class DenseIntegralFrozenDictionary
{
/// <summary>
/// Maximum allowed ratio of the number of key/value pairs to the range between the minimum and maximum keys.
/// Maximum allowed factor by which the spread between the min and max of keys in the dictionary may exceed the count.
/// </summary>
/// <remarks>
/// <para>
/// This is dialable. The closer the value gets to 0, the more likely this implementation will be used,
/// and the more memory will be consumed to store the values. The value of 0.1 means that up to 90% of the
/// This is dialable. The larger this value, the more likely this implementation will be used,
/// and the more memory will be consumed to store the values. The value of 10 means that up to 90% of the
/// slots in the values array may be unused.
/// </para>
/// <para>
/// As an example, DaysOfWeek's min is 0, its max is 6, and it has 7 values, such that 7 / (6 - 0 + 1) = 1.0; thus
/// with a threshold of 0.1, DaysOfWeek will use this implementation. But SocketError's min is -1, its max is 11004, and
/// it has 47 values, such that 47 / (11004 - (-1) + 1) = 0.004; thus, SocketError will not use this implementation.
/// </para>
/// </remarks>
private const double CountToLengthRatio = 0.1;
private const int LengthToCountFactor = 10;

public static bool TryCreate<TKey, TValue>(Dictionary<TKey, TValue> source, [NotNullWhen(true)] out FrozenDictionary<TKey, TValue>? result)
public static FrozenDictionary<TKey, TValue>? CreateIfValid<TKey, TValue>(Dictionary<TKey, TValue> source)
where TKey : notnull
{
// Int32 and integer types that fit within Int32. This is to minimize difficulty later validating that
// inputs are in range of int: we can always cast everything to Int32 without loss of information.

if (typeof(TKey) == typeof(byte) || (typeof(TKey).IsEnum && typeof(TKey).GetEnumUnderlyingType() == typeof(byte)))
return TryCreate<TKey, byte, TValue>(source, out result);

if (typeof(TKey) == typeof(sbyte) || (typeof(TKey).IsEnum && typeof(TKey).GetEnumUnderlyingType() == typeof(sbyte)))
return TryCreate<TKey, sbyte, TValue>(source, out result);

if (typeof(TKey) == typeof(ushort) || (typeof(TKey).IsEnum && typeof(TKey).GetEnumUnderlyingType() == typeof(ushort)))
return TryCreate<TKey, ushort, TValue>(source, out result);

if (typeof(TKey) == typeof(short) || (typeof(TKey).IsEnum && typeof(TKey).GetEnumUnderlyingType() == typeof(short)))
return TryCreate<TKey, short, TValue>(source, out result);

if (typeof(TKey) == typeof(int) || (typeof(TKey).IsEnum && typeof(TKey).GetEnumUnderlyingType() == typeof(int)))
return TryCreate<TKey, int, TValue>(source, out result);

result = null;
return false;
// inputs are in range of int: we can always cast everything here to Int32 without loss of information.
return
typeof(TKey) == typeof(byte) || (typeof(TKey).IsEnum && typeof(TKey).GetEnumUnderlyingType() == typeof(byte)) ? CreateIfValid<TKey, byte, TValue>(source) :
typeof(TKey) == typeof(sbyte) || (typeof(TKey).IsEnum && typeof(TKey).GetEnumUnderlyingType() == typeof(sbyte)) ? CreateIfValid<TKey, sbyte, TValue>(source) :
typeof(TKey) == typeof(ushort) || (typeof(TKey).IsEnum && typeof(TKey).GetEnumUnderlyingType() == typeof(ushort)) ? CreateIfValid<TKey, ushort, TValue>(source) :
typeof(TKey) == typeof(short) || (typeof(TKey).IsEnum && typeof(TKey).GetEnumUnderlyingType() == typeof(short)) ? CreateIfValid<TKey, short, TValue>(source) :
typeof(TKey) == typeof(char) ? CreateIfValid<TKey, char, TValue>(source) :
typeof(TKey) == typeof(int) || (typeof(TKey).IsEnum && typeof(TKey).GetEnumUnderlyingType() == typeof(int)) ? CreateIfValid<TKey, int, TValue>(source) :
null;
}

private static bool TryCreate<TKey, TKeyUnderlying, TValue>(Dictionary<TKey, TValue> source, [NotNullWhen(true)] out FrozenDictionary<TKey, TValue>? result)
private static FrozenDictionary<TKey, TValue>? CreateIfValid<TKey, TKeyUnderlying, TValue>(Dictionary<TKey, TValue> source)
where TKey : notnull
where TKeyUnderlying : unmanaged, IBinaryInteger<TKeyUnderlying>
{
// Start enumerating the dictionary to ensure it has at least one element.
int count = source.Count;

Dictionary<TKey, TValue>.Enumerator e = source.GetEnumerator();
if (e.MoveNext())
{
// Get that element and treat it as the min and max. Then continue enumerating the remainder
// of the dictionary to count the number of elements and track the full min and max.
int count = 1;
// Get the first element and treat it as the min and max. Then continue enumerating the remainder
// of the dictionary to track the full min and max. Along the way, bail if at any point the length
// exceeds the allowed limit based on the known count.
int min = int.CreateTruncating((TKeyUnderlying)(object)e.Current.Key);
int max = min;
while (e.MoveNext())
{
count++;
int key = int.CreateTruncating((TKeyUnderlying)(object)e.Current.Key);
if (key < min)
{
Expand All @@ -85,72 +68,49 @@ private static bool TryCreate<TKey, TKeyUnderlying, TValue>(Dictionary<TKey, TVa
}
}

// Based on the min and max, determine the spread. If the range fits within a non-negative Int32
// and the ratio of the number of elements in the dictionary to the length is within the allowed
// threshold, create the new dictionary.
long maxAllowedLength = Math.Min((long)count * LengthToCountFactor, Array.MaxLength);
long length = (long)max - min + 1;
Debug.Assert(length > 0);
if (length <= int.MaxValue &&
(double)count / length >= CountToLengthRatio)
if (length <= maxAllowedLength)
{
// Create arrays of the keys and values, sorted ascending by key.
var keys = new TKey[count];
var values = new TValue[keys.Length];
int i = 0;
foreach (KeyValuePair<TKey, TValue> entry in source)
{
keys[i] = entry.Key;
values[i] = entry.Value;
i++;
}

if (i != keys.Length)
{
throw new InvalidOperationException(SR.CollectionModifiedDuringEnumeration);
}

// Sort the values so that we can more easily check for contiguity but also so that
// the keys/values returned from various properties/enumeration are in a predictable order.
Array.Sort(keys, values);

// Determine whether all of the keys are contiguous starting at 0.
bool isFull = true;
for (i = 0; i < keys.Length; i++)
{
if (int.CreateTruncating((TKeyUnderlying)(object)keys[i]) != i)
{
isFull = false;
break;
}
}

if (isFull)
if (min == 0 && length == count)
{
// All of the keys are contiguous starting at 0, so we can use an implementation that
// just stores all the values in an array indexed by key. This both provides faster access
// and allows the single values array to be used for lookups and for ValuesCore.
result = new WithFullValues<TKey, TKeyUnderlying, TValue>(keys, values);
foreach (KeyValuePair<TKey, TValue> entry in source)
{
int index = int.CreateTruncating((TKeyUnderlying)(object)entry.Key);
keys[index] = entry.Key;
values[index] = entry.Value;
}

return new WithFullValues<TKey, TKeyUnderlying, TValue>(keys, values);
}
else
{
// Some of the keys in the length are missing, so create an array to hold optional values
// and populate the entries just for the elements we have. The 0th element of the optional
// values array corresponds to the element with the min key.
var optionalValues = new Optional<TValue>[length];
for (i = 0; i < keys.Length; i++)
int i = 0;
foreach (KeyValuePair<TKey, TValue> entry in source)
{
optionalValues[int.CreateTruncating((TKeyUnderlying)(object)keys[i]) - min] = new(values[i], hasValue: true);
keys[i] = entry.Key;
values[i] = entry.Value;
i++;

optionalValues[int.CreateTruncating((TKeyUnderlying)(object)entry.Key) - min] = new(entry.Value, hasValue: true);
}

result = new WithOptionalValues<TKey, TKeyUnderlying, TValue>(keys, values, optionalValues, min);
return new WithOptionalValues<TKey, TKeyUnderlying, TValue>(keys, values, optionalValues, min);
}

return true;
}
}

result = null;
return false;
return null;
}

/// <summary>Implementation used when all keys are contiguous starting at 0.</summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,28 @@ namespace System.Collections.Frozen.Tests
{
public class FrozenFromKnownValuesTests
{
public static IEnumerable<object[]> Int32StringData() =>
from keys in new int[][]
public static IEnumerable<object[]> Int32StringData()
{
IEnumerable<int>[] ints =
[
[0],
[0, 1],
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
[0, 2, 4, 6, 8, 10],
[-1, 0, 2, 4, 6, 8, 10],
Enumerable.Range(42, 100),
Enumerable.Range(-42, 100),
Enumerable.Range(0, 20).Select(i => i * 11),
];

for (int i = 0; i < ints.Length; i++)
{
new int[] { 0 },
new int[] { 0, 1 },
new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 },
new int[] { 0, 2, 4, 6, 8, 10 },
new int[] { -1, 0, 2, 4, 6, 8, 10 },
Enumerable.Range(42, 100).ToArray(),
ints[i] = ints[i].OrderBy(_ => Guid.NewGuid());
}
select new object[] { keys.ToDictionary(i => i, i => i.ToString()) };

return from keys in ints
select new object[] { keys.ToDictionary(i => i, i => i.ToString()) };
}

public static IEnumerable<object[]> StringStringData() =>
from comparer in new[] { StringComparer.Ordinal, StringComparer.OrdinalIgnoreCase }
Expand Down Expand Up @@ -168,9 +179,11 @@ public void FrozenDictionary_Int32String(Dictionary<int, string> source)
FrozenDictionaryWorker(source.ToDictionary(i => (short)i.Key, i => i.Value));
FrozenDictionaryWorker(source.ToDictionary(i => i.Key, i => i.Value));
FrozenDictionaryWorker(source.ToDictionary(i => (long)i.Key, i => i.Value));
FrozenDictionaryWorker(source.ToDictionary(i => (DayOfWeek)i.Key, i => i.Value));

FrozenDictionaryWorker(source.ToDictionary(i => (byte)i.Key, i => i.Value));
FrozenDictionaryWorker(source.ToDictionary(i => (ushort)i.Key, i => i.Value));
FrozenDictionaryWorker(source.ToDictionary(i => (char)i.Key, i => i.Value));
FrozenDictionaryWorker(source.ToDictionary(i => (uint)i.Key, i => i.Value));
FrozenDictionaryWorker(source.ToDictionary(i => (ulong)i.Key, i => i.Value));

Expand Down
Loading