Skip to content

[Perf] Improve SyntaxRewriter visitation performance #2726

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
"""
)

DeclSyntax(
"""
/// 'Syntax' object factory recycling 'Syntax.Info' instances.
private let nodeFactory: SyntaxNodeFactory = SyntaxNodeFactory()
"""
)

DeclSyntax(
"""
public init(viewMode: SyntaxTreeViewMode = .sourceAccurate) {
Expand All @@ -65,7 +72,8 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
"""
/// Rewrite `node`, keeping its parent unless `detach` is `true`.
public func rewrite(_ node: some SyntaxProtocol, detach: Bool = false) -> Syntax {
let rewritten = self.dispatchVisit(Syntax(node))
var rewritten = Syntax(node)
self.dispatchVisit(&rewritten)
if detach {
return rewritten
}
Expand Down Expand Up @@ -126,15 +134,19 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
/// - Returns: the rewritten node
@available(*, deprecated, renamed: "rewrite(_:detach:)")
public func visit(_ node: Syntax) -> Syntax {
return dispatchVisit(node)
var rewritten = node
dispatchVisit(&rewritten)
return rewritten
}
"""
)

DeclSyntax(
"""
public func visit<T: SyntaxChildChoices>(_ node: T) -> T {
return dispatchVisit(Syntax(node)).cast(T.self)
var rewritten = Syntax(node)
dispatchVisit(&rewritten)
return rewritten.cast(T.self)
}
"""
)
Expand All @@ -148,7 +160,7 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
/// - Returns: the rewritten node
\(node.apiAttributes())\
open func visit(_ node: \(node.kind.syntaxType)) -> \(node.kind.syntaxType) {
return visitChildren(node)
return visitChildren(node._syntaxNode).cast(\(node.kind.syntaxType).self)
}
"""
)
Expand All @@ -160,7 +172,7 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
/// - Returns: the rewritten node
\(node.apiAttributes())\
open func visit(_ node: \(node.kind.syntaxType)) -> \(node.baseType.syntaxBaseName) {
return \(node.baseType.syntaxBaseName)(visitChildren(node))
return \(node.baseType.syntaxBaseName)(visitChildren(node._syntaxNode).cast(\(node.kind.syntaxType).self))
}
"""
)
Expand All @@ -177,7 +189,9 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
/// - Returns: the rewritten node
\(baseNode.apiAttributes())\
public func visit(_ node: \(baseKind.syntaxType)) -> \(baseKind.syntaxType) {
return dispatchVisit(Syntax(node)).cast(\(baseKind.syntaxType).self)
var node: Syntax = Syntax(node)
dispatchVisit(&node)
return node.cast(\(baseKind.syntaxType).self)
}
"""
)
Expand All @@ -187,21 +201,16 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
"""
/// Interpret `node` as a node of type `nodeType`, visit it, calling
/// the `visit` to transform the node.
@inline(__always)
private func visitImpl<NodeType: SyntaxProtocol>(
_ node: Syntax,
_ node: inout Syntax,
_ nodeType: NodeType.Type,
_ visit: (NodeType) -> some SyntaxProtocol
) -> Syntax {
let castedNode = node.cast(NodeType.self)
// Accessing _syntaxNode directly is faster than calling Syntax(node)
visitPre(node)
defer {
visitPost(node)
}
if let newNode = visitAny(node) {
return newNode
}
return Syntax(visit(castedNode))
) {
let origNode = node
visitPre(origNode)
node = visitAny(origNode) ?? Syntax(visit(origNode.cast(NodeType.self)))
visitPost(origNode)
}
"""
)
Expand Down Expand Up @@ -242,26 +251,26 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
/// that determines the correct visitation function will be popped of the
/// stack before the function is being called, making the switch's stack
/// space transient instead of having it linger in the call stack.
private func visitationFunc(for node: Syntax) -> ((Syntax) -> Syntax)
private func visitationFunc(for node: Syntax) -> ((inout Syntax) -> Void)
"""
) {
try SwitchExprSyntax("switch node.raw.kind") {
SwitchCaseSyntax("case .token:") {
StmtSyntax("return { self.visitImpl($0, TokenSyntax.self, self.visit) }")
StmtSyntax("return { self.visitImpl(&$0, TokenSyntax.self, self.visit) }")
}

for node in NON_BASE_SYNTAX_NODES {
SwitchCaseSyntax("case .\(node.varOrCaseName):") {
StmtSyntax("return { self.visitImpl($0, \(node.kind.syntaxType).self, self.visit) }")
StmtSyntax("return { self.visitImpl(&$0, \(node.kind.syntaxType).self, self.visit) }")
}
}
}
}

DeclSyntax(
"""
private func dispatchVisit(_ node: Syntax) -> Syntax {
return visitationFunc(for: node)(node)
private func dispatchVisit(_ node: inout Syntax) {
visitationFunc(for: node)(&node)
}
"""
)
Expand All @@ -272,15 +281,15 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
poundKeyword: .poundElseToken(),
elements: .statements(
CodeBlockItemListSyntax {
try! FunctionDeclSyntax("private func dispatchVisit(_ node: Syntax) -> Syntax") {
try! FunctionDeclSyntax("private func dispatchVisit(_ node: inout Syntax)") {
try SwitchExprSyntax("switch node.raw.kind") {
SwitchCaseSyntax("case .token:") {
StmtSyntax("return visitImpl(node, TokenSyntax.self, visit)")
StmtSyntax("return visitImpl(&node, TokenSyntax.self, visit)")
}

for node in NON_BASE_SYNTAX_NODES {
SwitchCaseSyntax("case .\(node.varOrCaseName):") {
StmtSyntax("return visitImpl(node, \(node.kind.syntaxType).self, visit)")
StmtSyntax("return visitImpl(&node, \(node.kind.syntaxType).self, visit)")
}
}
}
Expand All @@ -293,9 +302,7 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {

DeclSyntax(
"""
private func visitChildren<SyntaxType: SyntaxProtocol>(
_ node: SyntaxType
) -> SyntaxType {
private func visitChildren(_ node: Syntax) -> Syntax {
// Walk over all children of this node and rewrite them. Don't store any
// rewritten nodes until the first non-`nil` value is encountered. When this
// happens, retrieve all previous syntax nodes from the parent node to
Expand All @@ -305,73 +312,48 @@ let syntaxRewriterFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {

// newLayout is nil until the first child node is rewritten and rewritten
// nodes are being collected.
var newLayout: ContiguousArray<RawSyntax?>?

// Rewritten children just to keep their 'SyntaxArena' alive until they are
// wrapped with 'Syntax'
var rewrittens: ContiguousArray<Syntax> = []
var newLayout: UnsafeMutableBufferPointer<RawSyntax?> = .init(start: nil, count: 0)

let syntaxNode = node._syntaxNode
// Keep 'SyntaxArena' of rewritten nodes alive until they are wrapped
// with 'Syntax'
var rewrittens: ContiguousArray<RetainedSyntaxArena> = []

// Incrementing i manually is faster than using .enumerated()
var childIndex = 0
for (raw, info) in RawSyntaxChildren(syntaxNode) {
defer { childIndex += 1 }

guard let child = raw, viewMode.shouldTraverse(node: child) else {
// Node does not exist or should not be visited. If we are collecting
// rewritten nodes, we need to collect this one as well, otherwise we
// can ignore it.
if newLayout != nil {
newLayout!.append(raw)
}
continue
}
for case let (child?, info) in RawSyntaxChildren(node) where viewMode.shouldTraverse(node: child) {

// Build the Syntax node to rewrite
let absoluteRaw = AbsoluteRawSyntax(raw: child, info: info)
var childNode = nodeFactory.create(parent: node, raw: child, absoluteInfo: info)

let rewritten = dispatchVisit(Syntax(absoluteRaw, parent: syntaxNode))
if rewritten.id != info.nodeId {
dispatchVisit(&childNode)
if childNode.raw.id != child.id {
Copy link
Member Author

@rintaro rintaro Jul 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously it compared SyntaxIdentifier which takes the entire tree into account.
Now it compares RawSyntax.ID because we only care the RawSyntax here.

I.e. returning node.detached is NOT considered "rewritten"

// The node was rewritten, let's handle it
if newLayout == nil {

if newLayout.baseAddress == nil {
// We have not yet collected any previous rewritten nodes. Initialize
// the new layout with the previous nodes of the parent. This is
// possible, since we know they were not rewritten.

// The below implementation is based on Collection.map but directly
// reserves enough capacity for the entire layout.
newLayout = ContiguousArray<RawSyntax?>()
newLayout!.reserveCapacity(node.raw.layoutView!.children.count)
for j in 0..<childIndex {
newLayout!.append(node.raw.layoutView!.children[j])
}
// the new layout with the previous nodes of the parent.
newLayout = .allocate(capacity: node.raw.layoutView!.children.count)
_ = newLayout.initialize(fromContentsOf: node.raw.layoutView!.children)
}

// Now that we know we have a new layout in which we collect rewritten
// nodes, add it.
rewrittens.append(rewritten)
newLayout!.append(rewritten.raw)
} else {
// The node was not changed by the rewriter. Only store it if a previous
// node has been rewritten and we are collecting a rewritten layout.
if newLayout != nil {
newLayout!.append(raw)
}
// Update the rewritten child.
newLayout[Int(info.indexInParent)] = childNode.raw
// Retain the syntax arena of the new node until it's wrapped with Syntax node.
rewrittens.append(childNode.raw.arenaReference.retained)
}

// Recycle 'childNode.info'
nodeFactory.dispose(&childNode)
}

if let newLayout {
if newLayout.baseAddress != nil {
// A child node was rewritten. Build the updated node.

// Sanity check, ensure the new children are the same length.
precondition(newLayout.count == node.raw.layoutView!.children.count)

let arena = self.arena ?? SyntaxArena()
let newRaw = node.raw.layoutView!.replacingLayout(with: Array(newLayout), arena: arena)
let newRaw = node.raw.layoutView!.replacingLayout(with: newLayout, arena: arena)
newLayout.deinitialize()
newLayout.deallocate()
// 'withExtendedLifetime' to keep 'SyntaxArena's of them alive until here.
return withExtendedLifetime(rewrittens) {
Syntax(raw: newRaw, rawNodeArena: arena).cast(SyntaxType.self)
Syntax(raw: newRaw, rawNodeArena: arena)
}
} else {
// No child node was rewritten. So no need to change this node as well.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,27 +34,8 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {

DeclSyntax(
"""
/// `Syntax.Info` objects created in `visitChildren` but whose `Syntax` nodes were not retained by the `visit`
/// functions implemented by a subclass of `SyntaxVisitor`.
///
/// Instead of deallocating them and allocating memory for new syntax nodes, store the allocated memory in an array.
/// We can then re-use them to create new syntax nodes.
///
/// The array's size should be a typical nesting depth of a Swift file. That way we can store all allocated syntax
/// nodes when unwinding the visitation stack.
///
/// The actual `info` stored in the `Syntax.Info` objects is garbage. It needs to be set when any of the `Syntax.Info`
/// objects get re-used.
private var recyclableNodeInfos: ContiguousArray<Syntax.Info?> = ContiguousArray(repeating: nil, count: 64)
"""
)

DeclSyntax(
"""
/// A bit is set to 1 if the corresponding index in `recyclableNodeInfos` is occupied and ready to be reused.
///
/// The last bit in this UInt64 corresponds to index 0 in `recyclableNodeInfos`.
private var recyclableNodeInfosUsageBitmap: UInt64 = 0
/// 'Syntax' object factory recycling 'Syntax.Info' instances.
private let nodeFactory: SyntaxNodeFactory = SyntaxNodeFactory()
"""
)

Expand Down Expand Up @@ -261,65 +242,14 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
DeclSyntax(
"""
/// - Note: `node` is `inout` to avoid reference counting. See comment in `visitImpl`.
private func visitChildren(_ syntaxNode: inout Syntax) {
for childRaw in NonNilRawSyntaxChildren(syntaxNode, viewMode: viewMode) {
// syntaxNode gets retained here. That seems unnecessary but I don't know how to remove it.
var childNode: Syntax
if let recycledInfoIndex = recyclableNodeInfosUsageBitmap.indexOfRightmostOne {
var recycledInfo: Syntax.Info? = nil
// Use `swap` to extract the recyclable syntax node without incurring ref-counting.
swap(&recycledInfo, &recyclableNodeInfos[recycledInfoIndex])
assert(recycledInfo != nil, "Slot indicated by the bitmap did not contain a value")
recyclableNodeInfosUsageBitmap.setBitToZero(at: recycledInfoIndex)
// syntaxNode.info gets retained here. This is necessary because we build up the parent tree.
recycledInfo!.info = .nonRoot(.init(parent: syntaxNode, absoluteInfo: childRaw.info))
childNode = Syntax(childRaw.raw, info: recycledInfo!)
} else {
childNode = Syntax(childRaw, parent: syntaxNode)
}
private func visitChildren(_ node: inout Syntax) {
for case let (child?, info) in RawSyntaxChildren(node) where viewMode.shouldTraverse(node: child) {
var childNode = nodeFactory.create(parent: node, raw: child, absoluteInfo: info)
visit(&childNode)
if isKnownUniquelyReferenced(&childNode.info) {
// The node didn't get stored by the subclass's visit method. We can re-use the memory of its `Syntax.Info`
// for future syntax nodes.
childNode.info.info = nil
if let emptySlot = recyclableNodeInfosUsageBitmap.indexOfRightmostZero {
// Use `swap` to store the recyclable syntax node without incurring ref-counting.
swap(&recyclableNodeInfos[emptySlot], &childNode.info)
assert(childNode.info == nil, "Slot should not have contained a value")
recyclableNodeInfosUsageBitmap.setBitToOne(at: emptySlot)
}
}
nodeFactory.dispose(&childNode)
}
}
"""
)
}

DeclSyntax(
"""
fileprivate extension UInt64 {
var indexOfRightmostZero: Int? {
return (~self).indexOfRightmostOne
}

var indexOfRightmostOne: Int? {
let trailingZeroCount = self.trailingZeroBitCount
if trailingZeroCount == Self.bitWidth {
// All indicies are 0
return nil
}
return trailingZeroCount
}

mutating func setBitToZero(at index: Int) {
self &= ~(1 << index)
}

mutating func setBitToOne(at index: Int) {
self |= 1 << index
}
}

"""
)
}
1 change: 1 addition & 0 deletions Sources/SwiftSyntax/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ add_swift_syntax_library(SwiftSyntax
SyntaxCollection.swift
SyntaxHashable.swift
SyntaxIdentifier.swift
SyntaxNodeFactory.swift
SyntaxNodeStructure.swift
SyntaxProtocol.swift
SyntaxText.swift
Expand Down
4 changes: 4 additions & 0 deletions Sources/SwiftSyntax/SyntaxArenaAllocatedBuffer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,8 @@ public struct SyntaxArenaAllocatedBufferPointer<Element: Sendable>: RandomAccess
var unsafeRawBufferPointer: UnsafeRawBufferPointer {
return UnsafeRawBufferPointer(buffer)
}

public func withContiguousStorageIfAvailable<R>(_ body: (UnsafeBufferPointer<Element>) throws -> R) rethrows -> R? {
try body(buffer)
}
}
Loading