Skip to content

Commit

Permalink
perf(semantic): avoid counting nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed Sep 11, 2024
1 parent 1b4412f commit 19ab835
Show file tree
Hide file tree
Showing 20 changed files with 144 additions and 27 deletions.
17 changes: 12 additions & 5 deletions crates/oxc/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,13 @@ pub trait CompilerInterface {
/* Compress */

if let Some(options) = self.compress_options() {
self.compress(&allocator, &mut program, options);
self.compress(&allocator, &mut program, source_text, options);
}

/* Mangler */

let mangler = self.mangle_options().map(|options| self.mangle(&mut program, options));
let mangler =
self.mangle_options().map(|options| self.mangle(&mut program, source_text, options));

/* Codegen */

Expand Down Expand Up @@ -214,13 +215,19 @@ pub trait CompilerInterface {
&self,
allocator: &'a Allocator,
program: &mut Program<'a>,
source_text: &str,
options: CompressOptions,
) {
Compressor::new(allocator, options).build(program);
Compressor::new(allocator, options).build(program, source_text);
}

fn mangle(&self, program: &mut Program<'_>, options: MangleOptions) -> Mangler {
Mangler::new().with_options(options).build(program)
fn mangle(
&self,
program: &mut Program<'_>,
source_text: &str,
options: MangleOptions,
) -> Mangler {
Mangler::new().with_options(options).build(program, source_text)
}

fn codegen<'a>(
Expand Down
6 changes: 6 additions & 0 deletions crates/oxc_index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,12 @@ impl<I: Idx, T> IndexVec<I, T> {
self.raw.shrink_to_fit();
}

/// Shrinks the capacity of the vector with a lower bound.
#[inline]
pub fn shrink_to(&mut self, min_capacity: usize) {
self.raw.shrink_to(min_capacity);
}

/// Shortens the vector, keeping the first `len` elements and dropping
/// the rest. See [`Vec::truncate`]
#[inline]
Expand Down
4 changes: 2 additions & 2 deletions crates/oxc_mangler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ impl Mangler {
}

#[must_use]
pub fn build<'a>(mut self, program: &'a Program<'a>) -> Mangler {
let semantic = SemanticBuilder::new("").build(program).semantic;
pub fn build<'a>(mut self, program: &'a Program<'a>, source_text: &str) -> Mangler {
let semantic = SemanticBuilder::new(source_text).build(program).semantic;

// Mangle the symbol table by computing slots from the scope tree.
// A slot is the occurrence index of a binding identifier inside a scope.
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_minifier/examples/mangler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ fn mangler(source_text: &str, source_type: SourceType, debug: bool) -> String {
let allocator = Allocator::default();
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
let mangler = Mangler::new().with_options(MangleOptions { debug }).build(program);
let mangler = Mangler::new().with_options(MangleOptions { debug }).build(program, source_text);
CodeGenerator::new().with_mangler(Some(mangler)).build(program).source_text
}
2 changes: 1 addition & 1 deletion crates/oxc_minifier/examples/minifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ fn minify(source_text: &str, source_type: SourceType, mangle: bool) -> String {
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
let options = MinifierOptions { mangle, compress: CompressOptions::all_true() };
let ret = Minifier::new(options).build(&allocator, program);
let ret = Minifier::new(options).build(&allocator, program, source_text);
CodeGenerator::new().with_mangler(ret.mangler).build(program).source_text
}
8 changes: 5 additions & 3 deletions crates/oxc_minifier/src/compressor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ impl<'a> Compressor<'a> {
Self { allocator, options }
}

pub fn build(self, program: &mut Program<'a>) {
let (symbols, scopes) =
SemanticBuilder::new("").build(program).semantic.into_symbol_table_and_scope_tree();
pub fn build(self, program: &mut Program<'a>, source_text: &str) {
let (symbols, scopes) = SemanticBuilder::new(source_text)
.build(program)
.semantic
.into_symbol_table_and_scope_tree();
self.build_with_symbols_and_scopes(symbols, scopes, program);
}

Expand Down
11 changes: 8 additions & 3 deletions crates/oxc_minifier/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,14 @@ impl Minifier {
Self { options }
}

pub fn build<'a>(self, allocator: &'a Allocator, program: &mut Program<'a>) -> MinifierReturn {
Compressor::new(allocator, self.options.compress).build(program);
let mangler = self.options.mangle.then(|| Mangler::default().build(program));
pub fn build<'a>(
self,
allocator: &'a Allocator,
program: &mut Program<'a>,
source_text: &str,
) -> MinifierReturn {
Compressor::new(allocator, self.options.compress).build(program, source_text);
let mangler = self.options.mangle.then(|| Mangler::default().build(program, source_text));
MinifierReturn { mangler }
}
}
2 changes: 1 addition & 1 deletion crates/oxc_minifier/tests/mangler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fn mangle(source_text: &str) -> String {
let source_type = SourceType::mjs();
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = ret.program;
let mangler = Mangler::new().build(&program);
let mangler = Mangler::new().build(&program, source_text);
CodeGenerator::new().with_mangler(Some(mangler)).build(&program).source_text
}

Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_minifier/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn run(source_text: &str, source_type: SourceType, options: Option<CompressOptio
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
if let Some(options) = options {
Compressor::new(&allocator, options).build(program);
Compressor::new(&allocator, options).build(program, source_text);
}
CodeGenerator::new()
.with_options(CodegenOptions { single_quote: true, ..CodegenOptions::default() })
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_semantic/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ impl<'a> SemanticBuilder<'a> {
// Avoiding this growth produces up to 30% perf boost on our benchmarks.
// TODO: It would be even more efficient to calculate counts in parser to avoid
// this extra AST traversal.
let counts = Counts::count(program);
let counts = Counts::count(program, self.source_text);
self.nodes.reserve(counts.nodes);
self.scope.reserve(counts.scopes);
self.symbols.reserve(counts.symbols, counts.references);
Expand Down
59 changes: 56 additions & 3 deletions crates/oxc_semantic/src/counter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//! These counts can be used to pre-allocate sufficient capacity in `AstNodes`,
//! `ScopeTree`, and `SymbolTable` to store info for all these items.

use std::cell::Cell;
use std::{cell::Cell, cmp::max};

use more_asserts::assert_le;

Expand All @@ -24,13 +24,66 @@ pub(crate) struct Counts {
}

impl Counts {
pub fn count(program: &Program) -> Self {
let mut counts = Counts::default();
/// Calculate counts as probable over-estimates based on size of source text
pub fn count(_program: &Program, source_text: &str) -> Self {
let source_len = source_text.len();

// Calculate maximum number of nodes, scopes, symbols, references that's possible
// from length of source code.
// These will almost always be a large over-estimate, but will never be an under-estimate,
// which is the most important thing, as the `Vec`s in `AstNodes`, `ScopeTree` and `SymbolTable`
// will not need to resize during building `Semantic`, which avoids expensive memory copying.

// The most node-intensive code is:
// `` = 0 bytes, 1 nodes
// `x` = 1 bytes, 3 nodes
// `x=x` = 3 bytes, 7 nodes
// `x=x=x` = 5 bytes, 11 nodes
let nodes = source_len * 2 + 1;

// The most scope-intensive code is:
// `` = 0 bytes, 1 scopes
// `{}` = 2 bytes, 2 scopes
// `{{}}` = 4 bytes, 3 scopes
// `{{{}}}` = 6 bytes, 4 scopes
let scopes = source_len / 2 + 1;

// The most symbol-intensive code is:
// `` = 0 bytes, 0 symbols
// `a=>0` = 4 bytes, 1 symbols
// `(a,a)=>0` = 8 bytes, 2 symbols
// `var a` = 5 bytes, 1 symbols
// `var a,a` = 7 bytes, 2 symbols
let symbols = max(source_len / 2, 1) - 1;

// The most reference-intensive code is:
// `a` = 1 bytes, 1 references
// `a,a` = 3 bytes, 2 references
// `a,a,a` = 5 bytes, 3 references
let references = source_len / 2 + 1;

Self { nodes, scopes, symbols, references }
}

/// Calculate counts by visiting AST
#[expect(dead_code)]
pub fn count_by_visit(program: &Program, _source_text: &str) -> Self {
let mut counts = Self::default();
counts.visit_program(program);
counts
}

/// Assert that estimated counts were not an under-estimate
pub fn assert_accurate(actual: &Self, estimated: &Self) {
assert_le!(actual.nodes, estimated.nodes, "nodes count mismatch");
assert_le!(actual.scopes, estimated.scopes, "scopes count mismatch");
assert_le!(actual.references, estimated.references, "references count mismatch");
assert_le!(actual.symbols, estimated.symbols, "symbols count mismatch");
}

/// Assert that estimated counts were accurate
#[expect(dead_code)]
pub fn assert_accurate_by_visit(actual: &Self, estimated: &Self) {
assert_eq!(actual.nodes, estimated.nodes, "nodes count mismatch");
assert_eq!(actual.scopes, estimated.scopes, "scopes count mismatch");
assert_eq!(actual.references, estimated.references, "references count mismatch");
Expand Down
5 changes: 5 additions & 0 deletions crates/oxc_semantic/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ impl<'a> AstNodes<'a> {
self.nodes.reserve(additional);
self.parent_ids.reserve(additional);
}

pub fn shrink_to(&mut self, capacity: usize) {
self.nodes.shrink_to(capacity);
self.parent_ids.shrink_to(capacity);
}
}

#[derive(Debug, Clone)]
Expand Down
7 changes: 5 additions & 2 deletions crates/oxc_semantic/src/post_transform_checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,14 @@ use crate::{ScopeTree, SemanticBuilder, SymbolTable};

type FxIndexMap<K, V> = IndexMap<K, V, BuildHasherDefault<FxHasher>>;

/// Check `ScopeTree` and `SymbolTable` are correct after transform
/// Check `ScopeTree` and `SymbolTable` are correct after transform.
///
/// `code_after_transform` must be the *post-transform* AST printed to string.
pub fn check_semantic_after_transform(
symbols_after_transform: &SymbolTable,
scopes_after_transform: &ScopeTree,
program: &Program<'_>,
code_after_transform: &str,
) -> Option<Vec<OxcDiagnostic>> {
let mut errors = Errors::default();

Expand All @@ -128,7 +131,7 @@ pub fn check_semantic_after_transform(
// so the cloned AST will be "clean" of all semantic data, as if it had come fresh from the parser.
let allocator = Allocator::default();
let program = program.clone_in(&allocator);
let (symbols_rebuilt, scopes_rebuilt) = SemanticBuilder::new("")
let (symbols_rebuilt, scopes_rebuilt) = SemanticBuilder::new(code_after_transform)
.with_scope_tree_child_ids(scopes_after_transform.has_child_ids())
.build(&program)
.semantic
Expand Down
11 changes: 11 additions & 0 deletions crates/oxc_semantic/src/scope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,4 +309,15 @@ impl ScopeTree {
self.child_ids.reserve(additional);
}
}

/// Shrink allocations to a certain number of scopes.
pub fn shrink_to(&mut self, capacity: usize) {
self.parent_ids.shrink_to(capacity);
self.flags.shrink_to(capacity);
self.bindings.shrink_to(capacity);
self.node_ids.shrink_to(capacity);
if self.build_child_ids {
self.child_ids.shrink_to(capacity);
}
}
}
12 changes: 12 additions & 0 deletions crates/oxc_semantic/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,18 @@ impl SymbolTable {

self.references.reserve(additional_references);
}

pub fn shrink_to(&mut self, capacity_symbols: usize, capacity_references: usize) {
self.spans.shrink_to(capacity_symbols);
self.names.shrink_to(capacity_symbols);
self.flags.shrink_to(capacity_symbols);
self.scope_ids.shrink_to(capacity_symbols);
self.declarations.shrink_to(capacity_symbols);
self.resolved_references.shrink_to(capacity_symbols);
self.redeclarations.shrink_to(capacity_symbols);

self.references.shrink_to(capacity_references);
}
}

/// Checks whether the a identifier reference is a global value or not.
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ impl Oxc {
CompressOptions::all_false()
},
};
Minifier::new(options).build(&allocator, &mut program).mangler
Minifier::new(options).build(&allocator, &mut program, source_text).mangler
} else {
None
};
Expand Down
2 changes: 1 addition & 1 deletion tasks/benchmark/benches/minifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn bench_minifier(criterion: &mut Criterion) {
let allocator = Allocator::default();
let program = Parser::new(&allocator, source_text, source_type).parse().program;
let program = allocator.alloc(program);
Compressor::new(&allocator, options).build(program);
Compressor::new(&allocator, options).build(program, source_text);
allocator
});
},
Expand Down
8 changes: 7 additions & 1 deletion tasks/coverage/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use oxc::{
ast::{Program, RegExpFlags},
Trivias,
},
codegen::CodegenOptions,
codegen::{CodeGenerator, CodegenOptions},
diagnostics::OxcDiagnostic,
minifier::CompressOptions,
parser::{ParseOptions, ParserReturn},
Expand Down Expand Up @@ -101,10 +101,16 @@ impl CompilerInterface for Driver {
transformer_return: &mut TransformerReturn,
) -> ControlFlow<()> {
if self.check_semantic {
let code = CodeGenerator::new()
.with_options(CodegenOptions { minify: true, ..CodegenOptions::default() })
.build(program)
.source_text;

if let Some(errors) = check_semantic_after_transform(
&transformer_return.symbols,
&transformer_return.scopes,
program,
&code,
) {
self.errors.extend(errors);
return ControlFlow::Break(());
Expand Down
2 changes: 1 addition & 1 deletion tasks/minsize/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ fn minify(source_text: &str, source_type: SourceType, options: MinifierOptions)
let allocator = Allocator::default();
let ret = Parser::new(&allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
let ret = Minifier::new(options).build(&allocator, program);
let ret = Minifier::new(options).build(&allocator, program, source_text);
CodeGenerator::new()
.with_options(CodegenOptions { minify: true, ..CodegenOptions::default() })
.with_mangler(ret.mangler)
Expand Down
7 changes: 7 additions & 0 deletions tasks/transform_conformance/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::{mem, ops::ControlFlow, path::Path};

use oxc::{
ast::ast::Program,
codegen::{CodeGenerator, CodegenOptions},
diagnostics::OxcDiagnostic,
semantic::post_transform_checker::check_semantic_after_transform,
span::SourceType,
Expand Down Expand Up @@ -43,10 +44,16 @@ impl CompilerInterface for Driver {
transformer_return: &mut TransformerReturn,
) -> ControlFlow<()> {
if self.check_semantic {
let code = CodeGenerator::new()
.with_options(CodegenOptions { minify: true, ..CodegenOptions::default() })
.build(program)
.source_text;

if let Some(errors) = check_semantic_after_transform(
&transformer_return.symbols,
&transformer_return.scopes,
program,
&code,
) {
self.errors.extend(errors);
}
Expand Down

0 comments on commit 19ab835

Please sign in to comment.