Skip to content

Commit

Permalink
perf(semantic): avoid counting nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed Sep 11, 2024
1 parent 64f9575 commit 23cfb0f
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 20 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ memoffset = "0.9.1"
miette = { version = "7.2.0", features = ["fancy-no-syscall"] }
mimalloc = "0.1.43"
mime_guess = "2.0.5"
more_asserts = "0.3.1"
nonmax = "0.5.5"
num-bigint = "0.4.6"
num-traits = "0.2.19"
Expand Down
1 change: 1 addition & 0 deletions crates/oxc_semantic/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ oxc_syntax = { workspace = true }
assert-unchecked = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true }
more_asserts = { workspace = true }
phf = { workspace = true, features = ["macros"] }
rustc-hash = { workspace = true }
serde = { workspace = true, features = ["derive"], optional = true }
Expand Down
36 changes: 20 additions & 16 deletions crates/oxc_semantic/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,26 +231,30 @@ impl<'a> SemanticBuilder<'a> {
// Avoiding this growth produces up to 30% perf boost on our benchmarks.
// TODO: It would be even more efficient to calculate counts in parser to avoid
// this extra AST traversal.
let mut counter = Counter::default();
counter.visit_program(program);
self.nodes.reserve(counter.nodes_count);
self.scope.reserve(counter.scopes_count);
self.symbols.reserve(counter.symbols_count, counter.references_count);
let estimated_counts = Counter::count(program, self.source_text);
self.nodes.reserve(estimated_counts.nodes_count);
self.scope.reserve(estimated_counts.scopes_count);
self.symbols.reserve(estimated_counts.symbols_count, estimated_counts.references_count);

// Visit AST to generate scopes tree etc
self.visit_program(program);

// Check that `Counter` got accurate counts
debug_assert_eq!(self.nodes.len(), counter.nodes_count);
debug_assert_eq!(self.scope.len(), counter.scopes_count);
debug_assert_eq!(self.symbols.references.len(), counter.references_count);
// `Counter` may overestimate number of symbols, because multiple `BindingIdentifier`s
// can result in only a single symbol.
// e.g. `var x; var x;` = 2 x `BindingIdentifier` but 1 x symbol.
// This is not a big problem - allocating a `Vec` with excess capacity is cheap.
// It's allocating with *not enough* capacity which is costly, as then the `Vec`
// will grow and reallocate.
debug_assert!(self.symbols.len() <= counter.symbols_count);
// Debug assert that `Counter` got accurate counts
#[cfg(debug_assertions)]
{
let actual_counts = Counter {
nodes_count: self.nodes.len(),
scopes_count: self.scope.len(),
symbols_count: self.symbols.len(),
references_count: self.symbols.references.len(),
};
Counter::check(actual_counts, estimated_counts);
}

// Shrink allocations to required size, to free memory
self.nodes.shrink_to(self.nodes.len());
self.scope.shrink_to(self.scope.len());
self.symbols.shrink_to(self.symbols.len(), self.symbols.references.len());

// Checking syntax error on module record requires scope information from the previous AST pass
if self.check_syntax_error {
Expand Down
101 changes: 97 additions & 4 deletions crates/oxc_semantic/src/counter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,117 @@
//! These counts can be used to pre-allocate sufficient capacity in `AstNodes`,
//! `ScopeTree`, and `SymbolTable` to store info for all these items.

use std::cell::Cell;
use std::{cell::Cell, cmp::max};

use more_asserts::assert_le;

use oxc_ast::{
ast::{BindingIdentifier, IdentifierReference, TSEnumMemberName, TSModuleDeclarationName},
ast::{
BindingIdentifier, IdentifierReference, Program, TSEnumMemberName, TSModuleDeclarationName,
},
visit::walk::{walk_ts_enum_member_name, walk_ts_module_declaration_name},
AstKind, Visit,
};
use oxc_syntax::scope::{ScopeFlags, ScopeId};

#[allow(clippy::struct_field_names)]
#[derive(Default, Debug)]
#[expect(clippy::struct_field_names)]
#[derive(Clone, Copy, Default, Debug)]
pub(crate) struct Counter {
pub nodes_count: usize,
pub scopes_count: usize,
pub symbols_count: usize,
pub references_count: usize,
}

impl Counter {
/// Calculate counts as probable over-estimates based on size of source text
pub fn count(_program: &Program, source_text: &str) -> Self {
let source_len = source_text.len();

// Calculate maximum number of nodes, scopes, symbols, references that's possible
// from length of source code.
// These will almost always be a large over-estimate, but will never be an under-estimate,
// which is the most important thing, as the `Vec`s in `AstNodes`, `ScopeTree` and `SymbolTable`
// will not need to resize during building `Semantic`, which avoids expensive memory copying.

// The most node-intensive code is:
// `` = 0 bytes, 1 nodes
// `0` = 1 bytes, 3 nodes
// `0+0` = 3 bytes, 5 nodes
// `0+0+0` = 5 bytes, 7 nodes
// `a:a:x` = 5 bytes, 7 nodes
// `!0` = 2 bytes, 4 nodes
// `!!0` = 3 bytes, 5 nodes
// `!!!0` = 4 bytes, 6 nodes
let nodes_count = source_len + 2;

// The most scope-intensive code is:
// `` = 0 bytes, 1 scopes
// `{}` = 2 bytes, 2 scopes
// `{{}}` = 4 bytes, 3 scopes
// `{{{}}}` = 6 bytes, 4 scopes
let scopes_count = source_len / 2 + 1;

// The most symbol-intensive code is:
// `` = 0 bytes, 0 symbols
// `a=>0` = 4 bytes, 1 symbols
// `(a,a)=>0` = 8 bytes, 2 symbols
// `var a` = 5 bytes, 1 symbols
// `var a,a` = 7 bytes, 2 symbols
let symbols_count = max(source_len / 2, 1) - 1;

// The most reference-intensive code is:
// `a` = 1 bytes, 1 references
// `a,a` = 3 bytes, 2 references
// `a,a,a` = 5 bytes, 3 references
let references_count = source_len / 2 + 1;

Counter { nodes_count, scopes_count, symbols_count, references_count }
}

/// Debug assert that estimated counts were not an under-estimate
pub fn check(actual: Self, estimate: Self) {
assert_le!(actual.nodes_count, estimate.nodes_count);
assert_le!(actual.scopes_count, estimate.scopes_count);
assert_le!(actual.references_count, estimate.references_count);
assert_le!(actual.symbols_count, estimate.symbols_count);
}
}

#[expect(clippy::struct_field_names)]
#[derive(Clone, Copy, Default, Debug)]
pub(crate) struct CounterByVisit {
pub nodes_count: usize,
pub scopes_count: usize,
pub symbols_count: usize,
pub references_count: usize,
}

impl CounterByVisit {
/// Calculate counts by visiting AST
#[expect(dead_code)]
pub fn count(program: &Program, _source_text: &str) -> Self {
let mut counter = Self::default();
counter.visit_program(program);
counter
}

/// Debug assert that estimated counts were accurate
#[expect(dead_code)]
pub fn check(actual: Self, estimate: Self) {
assert_eq!(actual.nodes_count, estimate.nodes_count);
assert_eq!(actual.scopes_count, estimate.scopes_count);
assert_eq!(actual.references_count, estimate.references_count);
// `Counter` may overestimate number of symbols, because multiple `BindingIdentifier`s
// can result in only a single symbol.
// e.g. `var x; var x;` = 2 x `BindingIdentifier` but 1 x symbol.
// This is not a big problem - allocating a `Vec` with excess capacity is cheap.
// It's allocating with *not enough* capacity which is costly, as then the `Vec`
// will grow and reallocate.
assert_le!(actual.symbols_count, estimate.symbols_count);
}
}

impl<'a> Visit<'a> for Counter {
#[inline]
fn enter_node(&mut self, _: AstKind<'a>) {
Expand Down
5 changes: 5 additions & 0 deletions crates/oxc_semantic/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ impl<'a> AstNodes<'a> {
self.nodes.reserve(additional);
self.parent_ids.reserve(additional);
}

pub fn shrink_to(&mut self, capacity: usize) {
self.nodes.shrink_to(capacity);
self.parent_ids.shrink_to(capacity);
}
}

#[derive(Debug, Clone)]
Expand Down
11 changes: 11 additions & 0 deletions crates/oxc_semantic/src/scope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,4 +309,15 @@ impl ScopeTree {
self.child_ids.reserve(additional);
}
}

/// Shrink allocations to a certain number of scopes.
pub fn shrink_to(&mut self, capacity: usize) {
self.parent_ids.shrink_to(capacity);
self.flags.shrink_to(capacity);
self.bindings.shrink_to(capacity);
self.node_ids.shrink_to(capacity);
if self.build_child_ids {
self.child_ids.shrink_to(capacity);
}
}
}
12 changes: 12 additions & 0 deletions crates/oxc_semantic/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,18 @@ impl SymbolTable {

self.references.reserve(additional_references);
}

pub fn shrink_to(&mut self, capacity_symbols: usize, capacity_references: usize) {
self.spans.shrink_to(capacity_symbols);
self.names.shrink_to(capacity_symbols);
self.flags.shrink_to(capacity_symbols);
self.scope_ids.shrink_to(capacity_symbols);
self.declarations.shrink_to(capacity_symbols);
self.resolved_references.shrink_to(capacity_symbols);
self.redeclarations.shrink_to(capacity_symbols);

self.references.shrink_to(capacity_references);
}
}

/// Checks whether the a identifier reference is a global value or not.
Expand Down

0 comments on commit 23cfb0f

Please sign in to comment.