perf(semantic): calculate number of nodes, scopes, symbols, references before visiting AST (#4367)

context: #4328
This commit is contained in:
Dunqing 2024-07-22 10:47:54 +00:00
parent 910f2a0a0a
commit 40f93564ac
6 changed files with 147 additions and 0 deletions

View file

@ -20,6 +20,7 @@ use crate::{
binder::Binder,
checker,
class::ClassTableBuilder,
counter::Counter,
diagnostics::redeclaration,
jsdoc::JSDocBuilder,
label::LabelBuilder,
@ -178,8 +179,38 @@ impl<'a> SemanticBuilder<'a> {
let scope_id = self.scope.add_root_scope(AstNodeId::DUMMY, ScopeFlags::Top);
program.scope_id.set(Some(scope_id));
} else {
// Count the number of nodes, scopes, symbols, and references.
// Use these counts to reserve sufficient capacity in `AstNodes`, `ScopeTree`
// and `SymbolTable` to store them.
// This means that as we traverse the AST and fill up these structures with data,
// they never need to grow and reallocate - which is an expensive operation as it
// involves copying all the memory from the old allocation to the new one.
// For large source files, these structures are very large, so growth is very costly
// as it involves copying massive chunks of memory.
// Avoiding this growth produces up to 30% perf boost on our benchmarks.
// TODO: It would be even more efficient to calculate counts in parser to avoid
// this extra AST traversal.
let mut counter = Counter::default();
counter.visit_program(program);
self.nodes.reserve(counter.nodes_count);
self.scope.reserve(counter.scopes_count);
self.symbols.reserve(counter.symbols_count, counter.references_count);
// Visit AST to generate scopes tree etc
self.visit_program(program);
// Check that `Counter` got accurate counts
debug_assert_eq!(self.nodes.len(), counter.nodes_count);
debug_assert_eq!(self.scope.len(), counter.scopes_count);
debug_assert_eq!(self.symbols.references.len(), counter.references_count);
// `Counter` may overestimate number of symbols, because multiple `BindingIdentifier`s
// can result in only a single symbol.
// e.g. `var x; var x;` = 2 x `BindingIdentifier` but 1 x symbol.
// This is not a big problem - allocating a `Vec` with excess capacity is cheap.
// It's allocating with *not enough* capacity which is costly, as then the `Vec`
// will grow and reallocate.
debug_assert!(self.symbols.len() <= counter.symbols_count);
// Checking syntax error on module record requires scope information from the previous AST pass
if self.check_syntax_error {
checker::check_module_record(&self);

View file

@ -0,0 +1,90 @@
//! Visitor to count nodes, scopes, symbols and references in AST.
//! These counts can be used to pre-allocate sufficient capacity in `AstNodes`,
//! `ScopeTree`, and `SymbolTable` to store info for all these items.
use std::cell::Cell;
use oxc_ast::{
ast::{
BindingIdentifier, IdentifierReference, JSXElementName, JSXMemberExpressionObject,
TSEnumMemberName, TSModuleDeclarationName,
},
visit::walk::{walk_ts_enum_member_name, walk_ts_module_declaration_name},
AstKind, Visit,
};
use oxc_syntax::scope::{ScopeFlags, ScopeId};
#[allow(clippy::struct_field_names)]
#[derive(Default, Debug)]
pub struct Counter {
pub nodes_count: usize,
pub scopes_count: usize,
pub symbols_count: usize,
pub references_count: usize,
}
impl<'a> Visit<'a> for Counter {
#[inline]
fn enter_node(&mut self, _: AstKind<'a>) {
self.nodes_count += 1;
}
#[inline]
fn enter_scope(&mut self, _: ScopeFlags, _: &Cell<Option<ScopeId>>) {
self.scopes_count += 1;
}
#[inline]
fn visit_binding_identifier(&mut self, _: &BindingIdentifier<'a>) {
self.nodes_count += 1;
self.symbols_count += 1;
}
#[inline]
fn visit_identifier_reference(&mut self, _: &IdentifierReference<'a>) {
self.nodes_count += 1;
self.references_count += 1;
}
#[inline]
fn visit_jsx_member_expression_object(&mut self, it: &JSXMemberExpressionObject<'a>) {
self.nodes_count += 1;
match it {
JSXMemberExpressionObject::MemberExpression(expr) => {
self.visit_jsx_member_expression(expr);
}
JSXMemberExpressionObject::Identifier(_) => {
self.nodes_count += 1;
self.references_count += 1;
}
}
}
#[inline]
fn visit_jsx_element_name(&mut self, it: &JSXElementName<'a>) {
self.nodes_count += 1;
match it {
JSXElementName::Identifier(ident) => {
self.nodes_count += 1;
if ident.name.chars().next().is_some_and(char::is_uppercase) {
self.references_count += 1;
}
}
JSXElementName::NamespacedName(name) => self.visit_jsx_namespaced_name(name),
JSXElementName::MemberExpression(expr) => self.visit_jsx_member_expression(expr),
}
}
#[inline]
fn visit_ts_enum_member_name(&mut self, it: &TSEnumMemberName<'a>) {
if !it.is_expression() {
self.symbols_count += 1;
}
walk_ts_enum_member_name(self, it);
}
#[inline]
fn visit_ts_module_declaration_name(&mut self, it: &TSModuleDeclarationName<'a>) {
self.symbols_count += 1;
walk_ts_module_declaration_name(self, it);
}
}

View file

@ -2,6 +2,7 @@ mod binder;
mod builder;
mod checker;
mod class;
mod counter;
mod diagnostics;
mod jsdoc;
mod label;

View file

@ -171,6 +171,11 @@ impl<'a> AstNodes<'a> {
self.nodes.push(node);
ast_node_id
}
pub fn reserve(&mut self, additional: usize) {
self.nodes.reserve(additional);
self.parent_ids.reserve(additional);
}
}
#[derive(Debug)]

View file

@ -236,4 +236,12 @@ impl ScopeTree {
pub fn remove_binding(&mut self, scope_id: ScopeId, name: &CompactStr) {
self.bindings[scope_id].shift_remove(name);
}
pub fn reserve(&mut self, additional: usize) {
self.parent_ids.reserve(additional);
self.child_ids.reserve(additional);
self.flags.reserve(additional);
self.bindings.reserve(additional);
self.node_ids.reserve(additional);
}
}

View file

@ -193,4 +193,16 @@ impl SymbolTable {
_ => false,
}
}
pub fn reserve(&mut self, additional_symbols: usize, additional_references: usize) {
self.spans.reserve(additional_symbols);
self.names.reserve(additional_symbols);
self.flags.reserve(additional_symbols);
self.scope_ids.reserve(additional_symbols);
self.declarations.reserve(additional_symbols);
self.resolved_references.reserve(additional_symbols);
self.redeclare_variables.reserve(additional_symbols);
self.references.reserve(additional_references);
}
}