Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix SortedSet.IsSubsetOf (and friends) 102118 #102249

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,11 @@ internal override int InternalIndexOf(T item)
return -1;
}

internal override int GetInternalIndexOfBitHelperLength()
{
return Count;
}

/// <summary>
/// Checks whether this subset is out of date, and updates it if necessary.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ public partial class SortedSet<T> : ISet<T>, ICollection<T>, ICollection, IReadO

internal const int StackAllocThreshold = 100;

internal const int BitHelperThreshold = 8191; // See GetInternalIndexOfBitHelperLength on how this relates to count and max tree height

#endregion

#region Constructors
Expand Down Expand Up @@ -714,11 +716,13 @@ private void ReplaceNode(Node match, Node parentOfMatch, Node successor, Node pa
/// <returns>The item's zero-based index in this set, or -1 if it isn't found.</returns>
/// <remarks>
/// <para>
/// This implementation is based off of http://en.wikipedia.org/wiki/Binary_Tree#Methods_for_storing_binary_trees.
/// This implementation is based off of http://en.wikipedia.org/wiki/Binary_Tree#Methods_for_storing_binary_trees
/// if Count is not greater than InternalIndexOfCountThreshold, otherwise it returns the index according to the order of the set's elements.
/// </para>
/// <para>
/// This method is used with the <see cref="BitHelper"/> class. Note that this implementation is
/// completely different from <see cref="TreeSubSet"/>'s, and that the two should not be mixed.
/// If this method is overridden, <see cref="GetInternalIndexOfBitHelperLength"/> needs to be changed accordingly as well.
/// </para>
/// </remarks>
internal virtual int InternalIndexOf(T item)
Expand All @@ -740,6 +744,39 @@ internal virtual int InternalIndexOf(T item)
return -1;
}

/// <summary>
/// Number of bits required to flag if elements are found in CheckUniqueAndUnfoundElements (i.e. max value of InternalIndexOf plus one).
/// If the required number of bits is too large or InternalIndexOf won't be used for some other reason, minus one is returned,
/// in which case we simply use a HashSet to store the found elements' nodes in.
/// </summary>
/// <returns>Max value of InternalIndexOf plus one or minus one</returns>
internal virtual int GetInternalIndexOfBitHelperLength()
{
// Some values for element count, the trees respective max height and count a perfect tree of that height:
//
// | Count | MaxHeight | Perfect tree count
// | 1 - 2 | 2 | 3
// | 3 - 6 | 4 | 15
// | 7 - 14 | 6 | 63
// | 15 - 30 | 8 | 255
// | 31 - 62 | 10 | 1023
// | 63 - 126 | 12 | 4095
// | 127 - 254 | 14 | 16383
// | 255 - 510 | 16 | 65535
// | 511 - 1022 | 18 | 262143

if (Count > 255)
{
return -1;
}

// The maximum height of a red-black tree is 2*lg(n+1).
// See page 264 of "Introduction to algorithms" by Thomas H. Cormen
int maximumHeight = 2 * Log2(Count + 1);
// Maximum count (of a perfect tree of height H) is M = 2^0 + 2^1 + ... + 2^(H-1) = 2^H - 1
return (1 << maximumHeight) - 1;
}

internal Node? FindRange(T? from, T? to) => FindRange(from, to, lowerBoundActive: true, upperBoundActive: true);

internal Node? FindRange(T? from, T? to, bool lowerBoundActive, bool upperBoundActive)
Expand Down Expand Up @@ -1385,43 +1422,73 @@ private unsafe ElementCount CheckUniqueAndUnfoundElements(IEnumerable<T> other,
return result;
}

int originalLastIndex = Count;
int intArrayLength = BitHelper.ToIntArrayLength(originalLastIndex);

Span<int> span = stackalloc int[StackAllocThreshold];
BitHelper bitHelper = intArrayLength <= StackAllocThreshold ?
new BitHelper(span.Slice(0, intArrayLength), clear: true) :
new BitHelper(new int[intArrayLength], clear: false);

// count of items in other not found in this
int UnfoundCount = 0;
int unfoundCount = 0;
// count of unique items in other found in this
int uniqueFoundCount = 0;

foreach (T item in other)
int bitHelperLength = GetInternalIndexOfBitHelperLength();

if (bitHelperLength == -1)
{
int index = InternalIndexOf(item);
if (index >= 0)
HashSet<Node> foundNodes = new HashSet<Node>();

foreach(T item in other)
{
if (!bitHelper.IsMarked(index))
Node? node = FindNode(item);
if (node != null)
{
// item hasn't been seen yet
bitHelper.MarkBit(index);
uniqueFoundCount++;
if (foundNodes.Add(node))
{
// item hasn't been seen yet
uniqueFoundCount++;
}
}
else
{
unfoundCount++;
if (returnIfUnfound)
{
break;
}
}
}
else
}
else
{
int intArrayLength = BitHelper.ToIntArrayLength(bitHelperLength);

Span<int> span = stackalloc int[StackAllocThreshold];
BitHelper bitHelper = intArrayLength <= StackAllocThreshold ?
new BitHelper(span.Slice(0, intArrayLength), clear: true) :
new BitHelper(new int[intArrayLength], clear: false);

foreach (T item in other)
{
UnfoundCount++;
if (returnIfUnfound)
int index = InternalIndexOf(item);
if (index >= 0)
{
break;
Debug.Assert(index < bitHelperLength);
if (!bitHelper.IsMarked(index))
{
// item hasn't been seen yet
bitHelper.MarkBit(index);
uniqueFoundCount++;
}
}
else
{
unfoundCount++;
if (returnIfUnfound)
{
break;
}
}
}
}

result.UniqueCount = uniqueFoundCount;
result.UnfoundCount = UnfoundCount;
result.UnfoundCount = unfoundCount;
return result;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,50 @@ public void SortedSet_Generic_IntersectWith_SupersetEnumerableWithDups()
Assert.Equal(new[] { 3, 5, 7 }, set);
}

// https://github.com/dotnet/runtime/issues/102118
[Fact]
public void SortedSet_Generic_SetEquals_UnbalancedWithDup()
{
var data = new[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 17 };
var set = new SortedSet<int>();

foreach(var item in data)
{
set.Add(item);
}

Assert.True(set.SetEquals(data));
}

[Fact]
public void SortedSet_Generic_SetEquals_CountAboveThresholdInGetInternalIndexOfBitHelperLength()
{
const int AboveInternalIndexOfCountThreshold = 255;

var setData = new int[AboveInternalIndexOfCountThreshold];
for(int i = 0; i < AboveInternalIndexOfCountThreshold; i++)
{
setData[i] = i;
}
var set = new SortedSet<int>(setData);

var superSetData = new int[AboveInternalIndexOfCountThreshold + 1];
Array.Copy(setData, superSetData, AboveInternalIndexOfCountThreshold);
superSetData[AboveInternalIndexOfCountThreshold] = -1;

var subSetData = new int[AboveInternalIndexOfCountThreshold - 1];
Array.Copy(setData, subSetData, AboveInternalIndexOfCountThreshold - 1);

var differentSet = new int[AboveInternalIndexOfCountThreshold];
Array.Copy(setData, differentSet, AboveInternalIndexOfCountThreshold);
differentSet[0] = -1;

Assert.True(set.SetEquals(setData));
Assert.False(set.SetEquals(superSetData));
Assert.False(set.SetEquals(subSetData));
Assert.False(set.SetEquals(differentSet));
}

[Fact]
public void SortedSet_Generic_GetViewBetween_MinMax_Exhaustive()
{
Expand Down
Loading