Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(semantic): avoid counting nodes #5703

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions crates/oxc/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,13 @@ pub trait CompilerInterface {
/* Compress */

if let Some(options) = self.compress_options() {
self.compress(&allocator, &mut program, options);
self.compress(&allocator, &mut program, source_text, options);
}

/* Mangler */

let mangler = self.mangle_options().map(|options| self.mangle(&mut program, options));
let mangler =
self.mangle_options().map(|options| self.mangle(&mut program, source_text, options));

/* Codegen */

Expand Down Expand Up @@ -214,13 +215,19 @@ pub trait CompilerInterface {
&self,
allocator: &'a Allocator,
program: &mut Program<'a>,
source_text: &str,
options: CompressOptions,
) {
Compressor::new(allocator, options).build(program);
Compressor::new(allocator, options).build(program, source_text);
}

fn mangle(&self, program: &mut Program<'_>, options: MangleOptions) -> Mangler {
Mangler::new().with_options(options).build(program)
fn mangle(
&self,
program: &mut Program<'_>,
source_text: &str,
options: MangleOptions,
) -> Mangler {
Mangler::new().with_options(options).build(program, source_text)
}

fn codegen<'a>(
Expand Down
4 changes: 2 additions & 2 deletions crates/oxc_mangler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ impl Mangler {
}

#[must_use]
pub fn build<'a>(mut self, program: &'a Program<'a>) -> Mangler {
let semantic = SemanticBuilder::new("").build(program).semantic;
pub fn build<'a>(mut self, program: &'a Program<'a>, source_text: &str) -> Mangler {
let semantic = SemanticBuilder::new(source_text).build(program).semantic;

// Mangle the symbol table by computing slots from the scope tree.
// A slot is the occurrence index of a binding identifier inside a scope.
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_minifier/examples/mangler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ fn mangler(source_text: &str, source_type: SourceType, debug: bool) -> String {
let allocator = Allocator::default();
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
let mangler = Mangler::new().with_options(MangleOptions { debug }).build(program);
let mangler = Mangler::new().with_options(MangleOptions { debug }).build(program, source_text);
CodeGenerator::new().with_mangler(Some(mangler)).build(program).source_text
}
2 changes: 1 addition & 1 deletion crates/oxc_minifier/examples/minifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ fn minify(source_text: &str, source_type: SourceType, mangle: bool) -> String {
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
let options = MinifierOptions { mangle, compress: CompressOptions::all_true() };
let ret = Minifier::new(options).build(&allocator, program);
let ret = Minifier::new(options).build(&allocator, program, source_text);
CodeGenerator::new().with_mangler(ret.mangler).build(program).source_text
}
8 changes: 5 additions & 3 deletions crates/oxc_minifier/src/compressor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ impl<'a> Compressor<'a> {
Self { allocator, options }
}

pub fn build(self, program: &mut Program<'a>) {
let (symbols, scopes) =
SemanticBuilder::new("").build(program).semantic.into_symbol_table_and_scope_tree();
pub fn build(self, program: &mut Program<'a>, source_text: &str) {
let (symbols, scopes) = SemanticBuilder::new(source_text)
.build(program)
.semantic
.into_symbol_table_and_scope_tree();
self.build_with_symbols_and_scopes(symbols, scopes, program);
}

Expand Down
11 changes: 8 additions & 3 deletions crates/oxc_minifier/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,14 @@ impl Minifier {
Self { options }
}

pub fn build<'a>(self, allocator: &'a Allocator, program: &mut Program<'a>) -> MinifierReturn {
Compressor::new(allocator, self.options.compress).build(program);
let mangler = self.options.mangle.then(|| Mangler::default().build(program));
pub fn build<'a>(
self,
allocator: &'a Allocator,
program: &mut Program<'a>,
source_text: &str,
) -> MinifierReturn {
Compressor::new(allocator, self.options.compress).build(program, source_text);
let mangler = self.options.mangle.then(|| Mangler::default().build(program, source_text));
MinifierReturn { mangler }
}
}
2 changes: 1 addition & 1 deletion crates/oxc_minifier/tests/mangler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fn mangle(source_text: &str) -> String {
let source_type = SourceType::mjs();
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = ret.program;
let mangler = Mangler::new().build(&program);
let mangler = Mangler::new().build(&program, source_text);
CodeGenerator::new().with_mangler(Some(mangler)).build(&program).source_text
}

Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_minifier/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn run(source_text: &str, source_type: SourceType, options: Option<CompressOptio
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
if let Some(options) = options {
Compressor::new(&allocator, options).build(program);
Compressor::new(&allocator, options).build(program, source_text);
}
CodeGenerator::new()
.with_options(CodegenOptions { single_quote: true, ..CodegenOptions::default() })
Expand Down
19 changes: 9 additions & 10 deletions crates/oxc_semantic/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::{
binder::Binder,
checker,
class::ClassTableBuilder,
counter::Counts,
counts::Counts,
diagnostics::redeclaration,
jsdoc::JSDocBuilder,
label::UnusedLabels,
Expand Down Expand Up @@ -221,18 +221,16 @@ impl<'a> SemanticBuilder<'a> {
let scope_id = self.scope.add_scope(None, NodeId::DUMMY, ScopeFlags::Top);
program.scope_id.set(Some(scope_id));
} else {
// Count the number of nodes, scopes, symbols, and references.
// Estimate the number of nodes, scopes, symbols, and references.
// Use these counts to reserve sufficient capacity in `AstNodes`, `ScopeTree`
// and `SymbolTable` to store them.
// This means that as we traverse the AST and fill up these structures with data,
// they never need to grow and reallocate - which is an expensive operation as it
// involves copying all the memory from the old allocation to the new one.
// For large source files, these structures are very large, so growth is very costly
// as it involves copying massive chunks of memory.
// Avoiding this growth produces up to 30% perf boost on our benchmarks.
// TODO: It would be even more efficient to calculate counts in parser to avoid
// this extra AST traversal.
let counts = Counts::count(program);
// Avoiding this growth produces up to 40% perf boost on our benchmarks.
let counts = Counts::count(program, self.source_text);
self.nodes.reserve(counts.nodes);
self.scope.reserve(counts.scopes);
self.symbols.reserve(counts.symbols, counts.references);
Expand All @@ -243,11 +241,12 @@ impl<'a> SemanticBuilder<'a> {
// Check that estimated counts accurately
#[cfg(debug_assertions)]
{
#[allow(clippy::cast_possible_truncation)]
let actual_counts = Counts {
nodes: self.nodes.len(),
scopes: self.scope.len(),
symbols: self.symbols.len(),
references: self.symbols.references.len(),
nodes: self.nodes.len() as u32,
scopes: self.scope.len() as u32,
symbols: self.symbols.len() as u32,
references: self.symbols.references.len() as u32,
};
Counts::assert_accurate(&actual_counts, &counts);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
//! Visitor to count nodes, scopes, symbols and references in AST.
//! These counts can be used to pre-allocate sufficient capacity in `AstNodes`,
//! `ScopeTree`, and `SymbolTable` to store info for all these items.
//! Counter to estimate counts of nodes, scopes, symbols and references.
//!
//! Produces an accurate count, by visiting AST and counting these items.
//!
//! Doing a full traverse of AST has a sizeable performance cost, but is necessary on platforms
//! which are 32-bit or do not have virtual memory (e.g. WASM) and so the "standard" version of
//! `Counts` is not suitable.

use std::cell::Cell;

Expand All @@ -13,38 +17,37 @@ use oxc_ast::{
};
use oxc_syntax::scope::{ScopeFlags, ScopeId};

use super::assert_le;

#[derive(Default, Debug)]
pub(crate) struct Counts {
pub nodes: usize,
pub scopes: usize,
pub symbols: usize,
pub references: usize,
pub struct Counts {
pub nodes: u32,
pub scopes: u32,
pub symbols: u32,
pub references: u32,
}

impl Counts {
pub fn count(program: &Program) -> Self {
let mut counts = Counts::default();
/// Calculate counts by visiting AST
pub fn count(program: &Program, _source_text: &str) -> Self {
let mut counts = Self::default();
counts.visit_program(program);
counts
}

/// Assert that estimated counts were accurate
#[cfg_attr(not(debug_assertions), expect(dead_code))]
pub fn assert_accurate(actual: &Self, estimated: &Self) {
assert_eq!(actual.nodes, estimated.nodes, "nodes count mismatch");
assert_eq!(actual.scopes, estimated.scopes, "scopes count mismatch");
assert_eq!(actual.references, estimated.references, "references count mismatch");
// `Counts` may overestimate number of symbols, because multiple `BindingIdentifier`s
// can result in only a single symbol.
// e.g. `var x; var x;` = 2 x `BindingIdentifier` but 1 x symbol.
// This is not a big problem - allocating a `Vec` with excess capacity is cheap.
// This is not a big problem - allocating a `Vec` with excess capacity is fairly cheap.
// It's allocating with *not enough* capacity which is costly, as then the `Vec`
// will grow and reallocate.
assert!(
actual.symbols <= estimated.symbols,
"symbols count mismatch {} <= {}",
actual.symbols,
estimated.symbols
);
assert_le!(actual.symbols, estimated.symbols, "symbols count mismatch");
assert_eq!(actual.references, estimated.references, "references count mismatch");
}
}

Expand Down
54 changes: 54 additions & 0 deletions crates/oxc_semantic/src/counts/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//! Counter to estimate counts of nodes, scopes, symbols and references.
//!
//! These counts can be used to pre-allocate sufficient capacity in `AstNodes`,
//! `ScopeTree`, and `SymbolTable`, to store data for all these items.
//! If sufficient capacity is not reserved in advance, these structures can grow and reallocate
//! during their construction, which involves copying large chunks of memory.
//! This produces a large performance cost - around 30% on our benchmarks for large source files.
//!
//! `Counts` has 2 implementations.
//! * `standard` - preferred version, for 64-bit platforms with virtual memory.
//! * `visitor` - fallback version, for 32-bit platforms, and platforms with no virtual memory (e.g. WASM).
//!
//! Please see docs in each module for the differences between them.

#[cfg(all(target_pointer_width = "64", not(target_arch = "wasm32")))]
mod standard;
#[cfg(all(target_pointer_width = "64", not(target_arch = "wasm32")))]
pub use standard::Counts;

#[cfg(not(all(target_pointer_width = "64", not(target_arch = "wasm32"))))]
mod fallback;
#[cfg(not(all(target_pointer_width = "64", not(target_arch = "wasm32"))))]
pub use fallback::Counts;

/// Macro to assert that `left <= right`
macro_rules! assert_le {
($left:expr, $right:expr, $($msg_args:tt)+) => {
match (&$left, &$right) {
(left, right) => if !(left <= right) {
panic!(
"assertion failed: `(left <= right)`\n left: `{:?}`,\n right: `{:?}`: {}",
left, right,
::std::format_args!($($msg_args)+),
);
}
}
};

($left:expr, $right:expr) => {
match (&$left, &$right) {
(left, right) => if !(left <= right) {
panic!(
"assertion failed: `(left <= right)`\n left: `{:?}`,\n right: `{:?}`",
left, right,
);
}
}
};

($lhs:expr, $rhs:expr,) => {
assert_le!($lhs, $rhs);
};
}
use assert_le;
83 changes: 83 additions & 0 deletions crates/oxc_semantic/src/counts/standard.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
//! Counter to estimate counts of nodes, scopes, symbols and references.
//!
//! Estimate counts based on size of source text.
//!
//! These will almost always be a large over-estimate, but will never be an under-estimate.
//! Not under-estimating is the most important thing, as the `Vec`s in `AstNodes`, `ScopeTree`
//! and `SymbolTable` will not need to resize during building `Semantic`, which avoids expensive
//! memory copying.
//!
//! This implementation of `Counts` can be used on 64-bit platforms with virtual memory, where it's
//! not a big problem to reserve excessively large blocks of virtual memory, because:
//! 1. The memory space is so large that it's almost impossible to exhaust.
//! 2. Allocating memory only consumes virtual memory, not physical memory.
//!
//! Note: Ideally, we'd shrink the allocations to fit once the actual size required is known.
//! But found that shrinking caused memory to be reallocated, which had a large perf cost
//! (~10% on semantic benchmarks).

use std::cmp::max;

use oxc_ast::ast::Program;

use super::assert_le;

#[derive(Default, Debug)]
pub struct Counts {
pub nodes: u32,
pub scopes: u32,
pub symbols: u32,
pub references: u32,
}

impl Counts {
/// Calculate counts as probable over-estimates based on size of source text
pub fn count(_program: &Program, source_text: &str) -> Self {
#[allow(clippy::cast_possible_truncation)]
let source_len = source_text.len() as u32;

// Calculate maximum number of nodes, scopes, symbols and references that's possible
// for given length of source code.
// These will almost always be a large over-estimate, but will never be an under-estimate.

// The most node-intensive code is:
// `` = 0 bytes, 1 nodes
// `x` = 1 bytes, 3 nodes
// `x=x` = 3 bytes, 7 nodes
// `x=x=x` = 5 bytes, 11 nodes
#[allow(clippy::cast_lossless)]
let nodes = u32::try_from(source_len as u64 * 2 + 1).unwrap_or(u32::MAX);

// The most scope-intensive code is:
// `` = 0 bytes, 1 scopes
// `{}` = 2 bytes, 2 scopes
// `{{}}` = 4 bytes, 3 scopes
// `{{{}}}` = 6 bytes, 4 scopes
let scopes = source_len / 2 + 1;

// The most symbol-intensive code is:
// `` = 0 bytes, 0 symbols
// `a=>0` = 4 bytes, 1 symbols
// `(a,a)=>0` = 8 bytes, 2 symbols
// `var a` = 5 bytes, 1 symbols
// `var a,a` = 7 bytes, 2 symbols
let symbols = max(source_len / 2, 1) - 1;

// The most reference-intensive code is:
// `a` = 1 bytes, 1 references
// `a,a` = 3 bytes, 2 references
// `a,a,a` = 5 bytes, 3 references
let references = source_len / 2 + 1;

Self { nodes, scopes, symbols, references }
}

/// Assert that estimated counts were not an under-estimate
#[cfg_attr(not(debug_assertions), expect(dead_code))]
pub fn assert_accurate(actual: &Self, estimated: &Self) {
assert_le!(actual.nodes, estimated.nodes, "nodes count mismatch");
assert_le!(actual.scopes, estimated.scopes, "scopes count mismatch");
assert_le!(actual.symbols, estimated.symbols, "symbols count mismatch");
assert_le!(actual.references, estimated.references, "references count mismatch");
}
}
2 changes: 1 addition & 1 deletion crates/oxc_semantic/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ mod binder;
mod builder;
mod checker;
mod class;
mod counter;
mod counts;
mod diagnostics;
mod jsdoc;
mod label;
Expand Down
3 changes: 2 additions & 1 deletion crates/oxc_semantic/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ impl<'a> AstNodes<'a> {
ast_node_id
}

pub fn reserve(&mut self, additional: usize) {
pub fn reserve(&mut self, additional: u32) {
let additional = additional as usize;
self.nodes.reserve(additional);
self.parent_ids.reserve(additional);
}
Expand Down
Loading