perf(semantic): remove bounds checks on unresolved references stack (#4390)

`SemanticBuilder` keeps a stack of hashmaps to track unresolved references. This stack only grows, never shrinks. Use that invariant to remove bounds checks.
This commit is contained in:
overlookmotel 2024-07-21 15:49:51 +00:00
parent f7da22da18
commit da13d9338f
5 changed files with 119 additions and 38 deletions

1
Cargo.lock generated
View file

@ -1712,6 +1712,7 @@ dependencies = [
name = "oxc_semantic"
version = "0.21.0"
dependencies = [
"assert-unchecked",
"indexmap",
"insta",
"itertools 0.13.0",

View file

@ -27,11 +27,12 @@ oxc_diagnostics = { workspace = true }
oxc_index = { workspace = true }
oxc_allocator = { workspace = true }
indexmap = { workspace = true }
phf = { workspace = true, features = ["macros"] }
rustc-hash = { workspace = true }
serde = { workspace = true, features = ["derive"], optional = true }
itertools = { workspace = true }
assert-unchecked = { workspace = true }
indexmap = { workspace = true }
phf = { workspace = true, features = ["macros"] }
rustc-hash = { workspace = true }
serde = { workspace = true, features = ["derive"], optional = true }
itertools = { workspace = true }
tsify = { workspace = true, optional = true }
wasm-bindgen = { workspace = true, optional = true }

View file

@ -26,8 +26,9 @@ use crate::{
module_record::ModuleRecordBuilder,
node::{AstNodeId, AstNodes, NodeFlags},
reference::{Reference, ReferenceFlag, ReferenceId},
scope::{ScopeFlags, ScopeId, ScopeTree, UnresolvedReferences},
scope::{ScopeFlags, ScopeId, ScopeTree},
symbol::{SymbolFlags, SymbolId, SymbolTable},
unresolved_stack::UnresolvedReferencesStack,
JSDocFinder, Semantic,
};
@ -56,9 +57,6 @@ pub struct SemanticBuilder<'a> {
pub current_node_flags: NodeFlags,
pub current_symbol_flags: SymbolFlags,
pub current_scope_id: ScopeId,
/// Current scope depth.
/// 0 is global scope. 1 is `Program`. Incremented on entering a scope, and decremented on exit.
pub current_scope_depth: usize,
/// Stores current `AstKind::Function` and `AstKind::ArrowFunctionExpression` during AST visit
pub function_stack: Vec<AstNodeId>,
// To make a namespace/module value like
@ -73,11 +71,7 @@ pub struct SemanticBuilder<'a> {
pub scope: ScopeTree,
pub symbols: SymbolTable,
// Stack used to accumulate unresolved refs while traversing scopes.
// Indexed by scope depth. We recycle `UnresolvedReferences` instances during traversal
// to reduce allocations, so the stack grows to maximum scope depth, but never shrinks.
// See: <https://github.com/oxc-project/oxc/issues/4169>
unresolved_references: Vec<UnresolvedReferences>,
unresolved_references: UnresolvedReferencesStack,
pub(crate) module_record: Arc<ModuleRecord>,
@ -105,13 +99,6 @@ impl<'a> SemanticBuilder<'a> {
let scope = ScopeTree::default();
let current_scope_id = scope.root_scope_id();
// Most programs will have at least 1 place where scope depth reaches 16,
// so initialize `unresolved_references` with this length, to reduce reallocations as it grows.
// This is just an estimate of a good initial size, but certainly better than
// `Vec`'s default initial capacity of 4.
let mut unresolved_references = vec![];
unresolved_references.resize_with(16, Default::default);
let trivias = Trivias::default();
Self {
source_text,
@ -123,13 +110,12 @@ impl<'a> SemanticBuilder<'a> {
current_symbol_flags: SymbolFlags::empty(),
current_reference_flag: ReferenceFlag::empty(),
current_scope_id,
current_scope_depth: 1, // Start with depth for `Program`
function_stack: vec![],
namespace_stack: vec![],
nodes: AstNodes::default(),
scope,
symbols: SymbolTable::default(),
unresolved_references,
unresolved_references: UnresolvedReferencesStack::new(),
module_record: Arc::new(ModuleRecord::default()),
label_builder: LabelBuilder::default(),
build_jsdoc: false,
@ -200,9 +186,8 @@ impl<'a> SemanticBuilder<'a> {
}
}
debug_assert_eq!(self.current_scope_depth, 1);
self.scope.root_unresolved_references =
self.unresolved_references.into_iter().next().unwrap();
debug_assert_eq!(self.unresolved_references.scope_depth(), 1);
self.scope.root_unresolved_references = self.unresolved_references.into_root();
let jsdoc = if self.build_jsdoc { self.jsdoc.build() } else { JSDocFinder::default() };
@ -356,7 +341,8 @@ impl<'a> SemanticBuilder<'a> {
let reference_flag = *reference.flag();
let reference_id = self.symbols.create_reference(reference);
self.unresolved_references[self.current_scope_depth]
self.unresolved_references
.current_mut()
.entry(reference_name)
.or_default()
.push((reference_id, reference_flag));
@ -381,10 +367,7 @@ impl<'a> SemanticBuilder<'a> {
}
fn resolve_references_for_current_scope(&mut self) {
// `iter_mut` to get mut references to 2 entries of `unresolved_references` simultaneously
let mut iter = self.unresolved_references.iter_mut();
let parent_refs = iter.nth(self.current_scope_depth - 1).unwrap();
let current_refs = iter.next().unwrap();
let (current_refs, parent_refs) = self.unresolved_references.current_and_parent_mut();
for (name, mut references) in current_refs.drain() {
// Try to resolve a reference.
@ -481,12 +464,7 @@ impl<'a> Visit<'a> for SemanticBuilder<'a> {
self.scope.get_bindings_mut(self.current_scope_id).extend(bindings);
}
// Increment scope depth, and ensure stack is large enough that
// `self.unresolved_references[self.current_scope_depth]` is initialized
self.current_scope_depth += 1;
if self.unresolved_references.len() <= self.current_scope_depth {
self.unresolved_references.push(UnresolvedReferences::default());
}
self.unresolved_references.increment_scope_depth();
}
// NB: Not called for `Program`
@ -502,7 +480,7 @@ impl<'a> Visit<'a> for SemanticBuilder<'a> {
self.current_scope_id = parent_id;
}
self.current_scope_depth -= 1;
self.unresolved_references.decrement_scope_depth();
}
// Setup all the context for the binder.
@ -554,6 +532,8 @@ impl<'a> Visit<'a> for SemanticBuilder<'a> {
}
self.current_scope_id = self.scope.add_root_scope(self.current_node_id, flags);
program.scope_id.set(Some(self.current_scope_id));
// NB: Don't call `self.unresolved_references.increment_scope_depth()`
// as scope depth is initialized as 1 already (the scope depth for `Program`).
if let Some(hashbang) = &program.hashbang {
self.visit_hashbang(hashbang);
@ -572,6 +552,8 @@ impl<'a> Visit<'a> for SemanticBuilder<'a> {
// Don't call `leave_scope` here as `Program` is a special case - scope has no `parent_id`.
// This simplifies `leave_scope`.
self.resolve_references_for_current_scope();
// NB: Don't call `self.unresolved_references.decrement_scope_depth()`
// as scope depth must remain >= 1.
self.leave_node(kind);
}

View file

@ -10,6 +10,7 @@ mod node;
mod reference;
mod scope;
mod symbol;
mod unresolved_stack;
pub mod dot;

View file

@ -0,0 +1,96 @@
use assert_unchecked::assert_unchecked;
use crate::scope::UnresolvedReferences;
// Stack used to accumulate unresolved refs while traversing scopes.
// Indexed by scope depth. We recycle `UnresolvedReferences` instances during traversal
// to reduce allocations, so the stack grows to maximum scope depth, but never shrinks.
// See: <https://github.com/oxc-project/oxc/issues/4169>
// This stack abstraction uses the invariant that stack only grows to avoid bounds checks.
pub(crate) struct UnresolvedReferencesStack {
stack: Vec<UnresolvedReferences>,
/// Current scope depth.
/// 0 is global scope. 1 is `Program`.
/// Incremented on entering a scope, and decremented on exit.
current_scope_depth: usize,
}
impl UnresolvedReferencesStack {
// Most programs will have at least 1 place where scope depth reaches 16,
// so initialize `stack` with this length, to reduce reallocations as it grows.
// This is just an estimate of a good initial size, but certainly better than
// `Vec`'s default initial capacity of 4.
// SAFETY: Must be >= 2 to ensure soundness of `current_and_parent_mut`.
const INITIAL_SIZE: usize = 16;
// Initial scope depth.
// Start on 1 (`Program` scope depth).
// SAFETY: Must be >= 1 to ensure soundness of `current_and_parent_mut`.
const INITIAL_DEPTH: usize = 1;
#[allow(clippy::assertions_on_constants)]
const _SIZE_CHECK: () = assert!(Self::INITIAL_SIZE > Self::INITIAL_DEPTH);
pub(crate) fn new() -> Self {
let mut stack = vec![];
stack.resize_with(Self::INITIAL_SIZE, UnresolvedReferences::default);
Self { stack, current_scope_depth: Self::INITIAL_DEPTH }
}
pub(crate) fn increment_scope_depth(&mut self) {
self.current_scope_depth += 1;
// Grow stack if required to ensure `self.stack[self.depth]` is in bounds
if self.stack.len() <= self.current_scope_depth {
self.stack.push(UnresolvedReferences::default());
}
}
pub(crate) fn decrement_scope_depth(&mut self) {
self.current_scope_depth -= 1;
// This assertion is required to ensure depth does not go below 1.
// If it did, would cause UB in `current_and_parent_mut`, which relies on
// `current_scope_depth - 1` always being a valid index into `self.stack`.
assert!(self.current_scope_depth > 0);
}
pub(crate) fn scope_depth(&self) -> usize {
self.current_scope_depth
}
/// Get unresolved references hash map for current scope
pub(crate) fn current_mut(&mut self) -> &mut UnresolvedReferences {
// SAFETY: `stack.len() > current_scope_depth` initially.
// Thereafter, `stack` never shrinks, only grows.
// `current_scope_depth` is only increased in `increment_scope_depth`,
// and it grows `stack` to ensure `stack.len()` always exceeds `current_scope_depth`.
// So this read is always guaranteed to be in bounds.
unsafe { self.stack.get_unchecked_mut(self.current_scope_depth) }
}
/// Get unresolved references hash maps for current scope, and parent scope
pub(crate) fn current_and_parent_mut(
&mut self,
) -> (&mut UnresolvedReferences, &mut UnresolvedReferences) {
// Assert invariants to remove bounds checks in code below.
// https://godbolt.org/z/vv5Wo5csv
// SAFETY: `current_scope_depth` starts at 1, and is only decremented
// in `decrement_scope_depth` which checks it doesn't go below 1.
unsafe { assert_unchecked!(self.current_scope_depth > 0) };
// SAFETY: `stack.len() > current_scope_depth` initially.
// Thereafter, `stack` never shrinks, only grows.
// `current_scope_depth` is only increased in `increment_scope_depth`,
// and it grows `stack` to ensure `stack.len()` always exceeds `current_scope_depth`.
unsafe { assert_unchecked!(self.stack.len() > self.current_scope_depth) };
let mut iter = self.stack.iter_mut();
let parent = iter.nth(self.current_scope_depth - 1).unwrap();
let current = iter.next().unwrap();
(current, parent)
}
pub(crate) fn into_root(self) -> UnresolvedReferences {
// SAFETY: Stack starts with a non-zero size and never shrinks.
// This assertion removes bounds check in `.next()`.
unsafe { assert_unchecked!(!self.stack.is_empty()) };
self.stack.into_iter().next().unwrap()
}
}