Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use next sibling in SyntaxNode.GetChildPosition() if available #66876

Merged
merged 14 commits into from
Mar 8, 2023
89 changes: 89 additions & 0 deletions src/Compilers/CSharp/Test/Syntax/Syntax/SyntaxListTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.CSharp.Test.Utilities;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Test.Utilities;
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
Expand Down Expand Up @@ -305,5 +307,92 @@ public void WithLotsOfChildrenTest()
}
}
}

[Theory]
[CombinatorialData]
public void EnumerateWithManyChildren_Forward(bool trailingSeparator)
Copy link
Contributor

@AlekseyTs AlekseyTs Feb 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EnumerateWithManyChildren_Forward

What would be the failure for these tests without the fix? Timeout? If so, consider adding a comment about that. #Resolved

{
const int n = 200000;
var builder = new StringBuilder();
builder.Append("int[] values = new[] { ");
for (int i = 0; i < n; i++) builder.Append("0, ");
if (!trailingSeparator) builder.Append("0 ");
builder.AppendLine("};");

var tree = CSharpSyntaxTree.ParseText(builder.ToString());
// Do not descend into InitializerExpressionSyntax since that will populate SeparatedWithManyChildren._children.
var node = tree.GetRoot().DescendantNodes().OfType<InitializerExpressionSyntax>().First();

foreach (var child in node.ChildNodesAndTokens())
{
_ = child.ToString();
}
}

// Tests should timeout when using SeparatedWithManyChildren.GetChildPosition()
// instead of GetChildPositionFromEnd().
[WorkItem(66475, "https://github.com/dotnet/roslyn/issues/66475")]
[Theory]
[CombinatorialData]
public void EnumerateWithManyChildren_Reverse(bool trailingSeparator)
{
const int n = 200000;
var builder = new StringBuilder();
builder.Append("int[] values = new[] { ");
for (int i = 0; i < n; i++) builder.Append("0, ");
if (!trailingSeparator) builder.Append("0 ");
builder.AppendLine("};");

var tree = CSharpSyntaxTree.ParseText(builder.ToString());
// Do not descend into InitializerExpressionSyntax since that will populate SeparatedWithManyChildren._children.
var node = tree.GetRoot().DescendantNodes().OfType<InitializerExpressionSyntax>().First();

foreach (var child in node.ChildNodesAndTokens().Reverse())
{
_ = child.ToString();
}
}

[Theory]
[InlineData("int[] values = new[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };")]
[InlineData("int[] values = new[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, };")]
public void EnumerateWithManyChildren_Compare(string source)
{
CSharpSyntaxTree.ParseText(source).VerifyChildNodePositions();

var builder = ArrayBuilder<SyntaxNodeOrToken>.GetInstance();
foreach (var node in parseAndGetInitializer(source).ChildNodesAndTokens().Reverse())
{
builder.Add(node);
}
builder.ReverseContents();
var childNodes1 = builder.ToImmutableAndFree();

builder = ArrayBuilder<SyntaxNodeOrToken>.GetInstance();
foreach (var node in parseAndGetInitializer(source).ChildNodesAndTokens())
{
builder.Add(node);
}
var childNodes2 = builder.ToImmutableAndFree();

Assert.Equal(childNodes1.Length, childNodes2.Length);

for (int i = 0; i < childNodes1.Length; i++)
{
var child1 = childNodes1[i];
var child2 = childNodes2[i];
Assert.Equal(child1.Position, child2.Position);
Assert.Equal(child1.EndPosition, child2.EndPosition);
Assert.Equal(child1.Width, child2.Width);
Assert.Equal(child1.FullWidth, child2.FullWidth);
}

static InitializerExpressionSyntax parseAndGetInitializer(string source)
{
var tree = CSharpSyntaxTree.ParseText(source);
// Do not descend into InitializerExpressionSyntax since that will populate SeparatedWithManyChildren._children.
return tree.GetRoot().DescendantNodes().OfType<InitializerExpressionSyntax>().First();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;

namespace Microsoft.CodeAnalysis.Syntax
{
internal partial class SyntaxList
{
internal class SeparatedWithManyChildren : SyntaxList
internal sealed class SeparatedWithManyChildren : SyntaxList
{
private readonly ArrayElement<SyntaxNode?>[] _children;

Expand Down Expand Up @@ -39,6 +37,25 @@ internal SeparatedWithManyChildren(InternalSyntax.SyntaxList green, SyntaxNode?

return _children[i >> 1].Value;
}

internal override int GetChildPosition(int index)
{
// If the previous sibling (ignoring separator) is not cached, but the next sibling
// (ignoring separator) is cached, use the next sibling to determine position.
int valueIndex = (index & 1) != 0 ? index - 1 : index;
// The check for valueIndex >= Green.SlotCount - 2 ignores the last item because the last item
// is a separator and separators are not cached. In those cases, when the index represents
// the last or next to last item, we still want to calculate the position from the end of
// the list rather than the start.
if (valueIndex > 1
&& GetCachedSlot(valueIndex - 2) is null
&& (valueIndex >= Green.SlotCount - 2 || GetCachedSlot(valueIndex + 2) is { }))
{
return GetChildPositionFromEnd(index);
Copy link
Contributor

@AlekseyTs AlekseyTs Feb 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GetChildPositionFromEnd

It feels like the correlation between cache checks performed by GetChildPositionFromEnd and cache checks performed here would be more obvious if we were using GetCachedSlot helper rather than working with _children on the low level. Also, could we instead adjust implementation of GetChildPosition to perform the same cache tests for the following child? Then all the logic would be in one place, and, perhaps, we wouldn't need to provide a general GetChildPositionFromEnd helper, since it looks like we really want to handle just 3 specific situations.

It looks like the optimization targets very limited set of scenarios. For example, when we are far from the end and the next sibling isn't cached, but the one after the next is cached, we won't take advantage of that cached sibling. Would it make sense to generalize the optimization. For example, even if nothing is cached, it still might be faster to calculate from end simply because the item is closer to the end than to the front. Similarly, we could make decision based on the proximity of the first cached item going backwards vs. going forward. Perhaps we could maintain a bitmap of cached items to speed-up the process of finding closest cached items. #Closed

Copy link
Member Author

@cston cston Feb 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the optimization targets very limited set of scenarios.

Yes, the optimization specifically targets the scenario where the first iteration through the child list is in reverse, so the previous siblings are not in the cache. That's the one scenario we have currently where there is a perf issue. The optimization here is simply to calculate the position based on the offset from the immediately following sibling when that sibling is in the cache; otherwise, we use the existing approach.

We should limit the change here to fixing this one scenario, to avoid perf regressions in other cases. If additional scenarios arise, we can consider further optimizations.

Copy link
Member Author

@cston cston Feb 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, could we instead adjust implementation of GetChildPosition to perform the same cache tests for the following child?

GetChildPosition() doesn't know about separated lists (where the separator is not in the cache) so it would be surprising for that method to skip over some siblings when determining whether using the following sibling is a better choice.

Then all the logic would be in one place, and, perhaps, we wouldn't need to provide a general GetChildPositionFromEnd helper ...

I added GetChildPositionFromEnd() next to GetChildPostion() so the two loops were together. And I think we'd still need the cases that are handled in GetChildPositionFromEnd(), even if the methods were combined. I'll keep the two methods separate so that both implementations are clear.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels like the correlation between cache checks performed by GetChildPositionFromEnd and cache checks performed here would be more obvious if we were using GetCachedSlot helper rather than working with _children on the low level.

Updated.

}

return base.GetChildPosition(index);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace Microsoft.CodeAnalysis.Syntax
{
internal partial class SyntaxList
{
internal class SeparatedWithManyWeakChildren : SyntaxList
internal sealed class SeparatedWithManyWeakChildren : SyntaxList
{
private readonly ArrayElement<WeakReference<SyntaxNode>?>[] _children;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace Microsoft.CodeAnalysis.Syntax
{
internal partial class SyntaxList
{
internal class WithManyChildren : SyntaxList
internal sealed class WithManyChildren : SyntaxList
{
private readonly ArrayElement<SyntaxNode?>[] _children;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace Microsoft.CodeAnalysis.Syntax
{
internal partial class SyntaxList
{
internal class WithManyWeakChildren : SyntaxList
internal sealed class WithManyWeakChildren : SyntaxList
{
private readonly ArrayElement<WeakReference<SyntaxNode>?>[] _children;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace Microsoft.CodeAnalysis.Syntax
{
internal partial class SyntaxList
{
internal class WithThreeChildren : SyntaxList
internal sealed class WithThreeChildren : SyntaxList
{
private SyntaxNode? _child0;
private SyntaxNode? _child1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace Microsoft.CodeAnalysis.Syntax
{
internal partial class SyntaxList
{
internal class WithTwoChildren : SyntaxList
internal sealed class WithTwoChildren : SyntaxList
{
private SyntaxNode? _child0;
private SyntaxNode? _child1;
Expand Down
35 changes: 35 additions & 0 deletions src/Compilers/Core/Portable/Syntax/SyntaxNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,11 @@ internal int GetChildIndex(int slot)
/// </summary>
internal virtual int GetChildPosition(int index)
{
if (this.GetCachedSlot(index) is { } node)
{
return node.Position;
}

int offset = 0;
var green = this.Green;
while (index > 0)
Expand All @@ -632,6 +637,36 @@ internal virtual int GetChildPosition(int index)
return this.Position + offset;
}

// Similar to GetChildPosition() but calculating based on the positions of
// following siblings rather than previous siblings.
internal int GetChildPositionFromEnd(int index)
{
if (this.GetCachedSlot(index) is { } node)
{
return node.Position;
}

var green = this.Green;
int offset = green.GetSlot(index)?.FullWidth ?? 0;
Copy link
Contributor

@AlekseyTs AlekseyTs Feb 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

green.GetSlot(index)

It looks like we completely ignore the fact that the item could be cached by now and, therefore, there is no need to iterate. #Closed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This matches the behavior of GetChildPosition() above. It looks like GetChildPosition() for a syntax list is typically called when creating the corresponding red node, which is then cached.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This matches the behavior of GetChildPosition() above.

I won't object to adjusting that one too.

int slotCount = green.SlotCount;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider moving the -1 to this expression.

while (index < slotCount - 1)
{
index++;
var nextSibling = this.GetCachedSlot(index);
if (nextSibling != null)
{
return nextSibling.Position - offset;
}
var greenChild = green.GetSlot(index);
if (greenChild != null)
{
offset += greenChild.FullWidth;
}
}

return this.EndPosition - offset;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

overall, i have no issue with the approach. but i think we def need tests around ensuring that reverse iterating produces nodes/tokens in the correct location. can you add a bunch of tests that show that if we reverse iterate the positions of things are all correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added verifying results from GetChildPosition() and GetChildPositionFromEnd() to tests in IOperation verification.


public Location GetLocation()
{
return this.SyntaxTree.GetLocation(this.Span);
Expand Down
69 changes: 69 additions & 0 deletions src/Compilers/Test/Core/Compilation/CompilationExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
using Roslyn.Test.Utilities;
using Roslyn.Utilities;
using Xunit;
using SeparatedWithManyChildren = Microsoft.CodeAnalysis.Syntax.SyntaxList.SeparatedWithManyChildren;

namespace Microsoft.CodeAnalysis.Test.Utilities
{
Expand Down Expand Up @@ -315,6 +316,8 @@ void checkTimeout()
}
}
}

tree.VerifyChildNodePositions();
}

var explicitNodeMap = new Dictionary<SyntaxNode, IOperation>();
Expand Down Expand Up @@ -381,6 +384,72 @@ void checkControlFlowGraph(IOperation root, ISymbol associatedSymbol)
}
}

internal static void VerifyChildNodePositions(this SyntaxTree tree)
{
var nodes = tree.GetRoot().DescendantNodesAndSelf();
foreach (var node in nodes)
{
var childNodesAndTokens = node.ChildNodesAndTokens();
if (childNodesAndTokens.Node is { } container)
{
for (int i = 0; i < childNodesAndTokens.Count; i++)
{
if (container.GetNodeSlot(i) is SeparatedWithManyChildren separatedList)
{
verifyPositions(separatedList);
}
}
}
}

static void verifyPositions(SeparatedWithManyChildren separatedList)
{
var green = (Microsoft.CodeAnalysis.Syntax.InternalSyntax.SyntaxList)separatedList.Green;

// Calculate positions from start, using existing cache.
int[] positions = getPositionsFromStart(separatedList);

// Calculate positions from end, using existing cache.
AssertEx.Equal(positions, getPositionsFromEnd(separatedList));

// Avoid testing without caches if the number of children is large.
if (separatedList.SlotCount > 100)
{
return;
}

// Calculate positions from start, with empty cache.
AssertEx.Equal(positions, getPositionsFromStart(new SeparatedWithManyChildren(green, null, separatedList.Position)));

// Calculate positions from end, with empty cache.
AssertEx.Equal(positions, getPositionsFromEnd(new SeparatedWithManyChildren(green, null, separatedList.Position)));
}

// Calculate positions from start, using any existing cache of red nodes on separated list.
static int[] getPositionsFromStart(SeparatedWithManyChildren separatedList)
{
int n = separatedList.SlotCount;
var positions = new int[n];
for (int i = 0; i < n; i++)
{
positions[i] = separatedList.GetChildPosition(i);
}
return positions;
}

// Calculate positions from end, using any existing cache of red nodes on separated list.
static int[] getPositionsFromEnd(SeparatedWithManyChildren separatedList)
{
int n = separatedList.SlotCount;
var positions = new int[n];
for (int i = n - 1; i >= 0; i--)
{
positions[i] = separatedList.GetChildPositionFromEnd(i);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and just to check. this actually computes, and is unaffected by anything potentially cached with the first for-loop?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, thanks.

Actually, neither of the two loops cache any nodes, but both loops get the positions of the nodes using the cache is there at the time the verify method is called.

Add comments and also added verification without empty caches for both directions.

return positions;
}
}

/// <summary>
/// The reference assembly System.Runtime.InteropServices.WindowsRuntime was removed in net5.0. This builds
/// up <see cref="CompilationReference"/> which contains all of the well known types that were used from that
Expand Down
50 changes: 50 additions & 0 deletions src/Compilers/VisualBasic/Test/Syntax/Syntax/SyntaxListTests.vb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
' See the LICENSE file in the project root for more information.

Imports Microsoft.CodeAnalysis.VisualBasic.Syntax
Imports Roslyn.Test.Utilities

Namespace Microsoft.CodeAnalysis.VisualBasic.UnitTests
Public Class SyntaxListTests
Expand Down Expand Up @@ -241,5 +242,54 @@ Namespace Microsoft.CodeAnalysis.VisualBasic.UnitTests
Next
End Sub

<Fact>
Public Sub EnumerateWithManyChildren_Forward()
Const n = 200000
Dim builder As New System.Text.StringBuilder()
builder.AppendLine("Module M")
builder.AppendLine(" Sub Main")
builder.Append(" Dim values As Integer() = {")
For i = 0 To n - 1
builder.Append("0, ")
Next
builder.AppendLine("}")
builder.AppendLine(" End Sub")
builder.AppendLine("End Module")

Dim tree = VisualBasicSyntaxTree.ParseText(builder.ToString())
' Do not descend into CollectionInitializerSyntax since that will populate SeparatedWithManyChildren._children.
Dim node = tree.GetRoot().DescendantNodes().OfType(Of CollectionInitializerSyntax)().First()

For Each child In node.ChildNodesAndTokens()
child.ToString()
Next
End Sub

' Tests should timeout when using SeparatedWithManyChildren.GetChildPosition()
' instead of GetChildPositionFromEnd().
<WorkItem(66475, "https://github.com/dotnet/roslyn/issues/66475")>
<Fact>
Public Sub EnumerateWithManyChildren_Reverse()
Const n = 200000
Dim builder As New System.Text.StringBuilder()
builder.AppendLine("Module M")
builder.AppendLine(" Sub Main")
builder.Append(" Dim values As Integer() = {")
For i = 0 To n - 1
builder.Append("0, ")
Next
builder.AppendLine("}")
builder.AppendLine(" End Sub")
builder.AppendLine("End Module")

Dim tree = VisualBasicSyntaxTree.ParseText(builder.ToString())
' Do not descend into CollectionInitializerSyntax since that will populate SeparatedWithManyChildren._children.
Dim node = tree.GetRoot().DescendantNodes().OfType(Of CollectionInitializerSyntax)().First()

For Each child In node.ChildNodesAndTokens().Reverse()
child.ToString()
Next
End Sub

End Class
End Namespace