perf(semantic): reduce storage size for symbol redeclarations (#4463)

Most symbols don't have redeclarations.

So instead of storing `Vec<Span>` directly in `redeclare_variables` (24 bytes per symbol), store `Option<RedeclarationId>` (4 bytes).

`RedeclarationId` indexes into `redeclarations` where the actual `Vec<Span>` is stored. But for symbols with no redeclarations (the vast majority), it takes 4 bytes per symbol only.
This commit is contained in:
overlookmotel 2024-07-26 00:14:35 +00:00
parent f17254ae2a
commit 6a9f4db609
5 changed files with 46 additions and 12 deletions

View file

@ -66,7 +66,7 @@ impl Rule for NoRedeclare {
AstKind::VariableDeclarator(var) => {
if let BindingPatternKind::BindingIdentifier(ident) = &var.id.kind {
if symbol_name == ident.name.as_str() {
for span in ctx.symbols().get_redeclare_variables(symbol_id) {
for span in ctx.symbols().get_redeclarations(symbol_id) {
self.report_diagnostic(ctx, *span, ident);
}
}
@ -75,7 +75,7 @@ impl Rule for NoRedeclare {
AstKind::FormalParameter(param) => {
if let BindingPatternKind::BindingIdentifier(ident) = &param.pattern.kind {
if symbol_name == ident.name.as_str() {
for span in ctx.symbols().get_redeclare_variables(symbol_id) {
for span in ctx.symbols().get_redeclarations(symbol_id) {
self.report_diagnostic(ctx, *span, ident);
}
}

View file

@ -64,7 +64,7 @@ impl Rule for NoShadowRestrictedNames {
}
check_and_diagnostic(name, ctx.symbols().get_span(symbol_id), ctx);
for span in ctx.symbols().get_redeclare_variables(symbol_id) {
for span in ctx.symbols().get_redeclarations(symbol_id) {
check_and_diagnostic(name, *span, ctx);
}
});

View file

@ -465,7 +465,7 @@ impl<'a> SemanticBuilder<'a> {
}
pub fn add_redeclare_variable(&mut self, symbol_id: SymbolId, span: Span) {
self.symbols.add_redeclare_variable(symbol_id, span);
self.symbols.add_redeclaration(symbol_id, span);
}
fn add_export_flag_to_export_identifiers(&mut self, program: &Program<'a>) {

View file

@ -5,7 +5,7 @@ use oxc_index::IndexVec;
use oxc_span::{CompactStr, Span};
pub use oxc_syntax::{
scope::ScopeId,
symbol::{SymbolFlags, SymbolId},
symbol::{RedeclarationId, SymbolFlags, SymbolId},
};
#[cfg(feature = "serialize")]
use serde::Serialize;
@ -26,6 +26,10 @@ export type IndexVec<I, T> = Array<T>;
/// Symbol Table
///
/// `SoA` (Struct of Arrays) for memory efficiency.
///
/// Most symbols won't have redeclarations, so instead of storing `Vec<Span>` directly in
/// `redeclare_variables` (32 bytes per symbol), store `Option<RedeclarationId>` (4 bytes).
/// That ID indexes into `redeclarations` where the actual `Vec<Span>` is stored.
#[derive(Debug, Default)]
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify), serde(rename_all = "camelCase"))]
pub struct SymbolTable {
@ -36,7 +40,9 @@ pub struct SymbolTable {
/// Pointer to the AST Node where this symbol is declared
pub declarations: IndexVec<SymbolId, AstNodeId>,
pub resolved_references: IndexVec<SymbolId, Vec<ReferenceId>>,
pub redeclare_variables: IndexVec<SymbolId, Vec<Span>>,
redeclarations: IndexVec<SymbolId, Option<RedeclarationId>>,
redeclaration_spans: IndexVec<RedeclarationId, Vec<Span>>,
pub references: IndexVec<ReferenceId, Reference>,
}
@ -90,8 +96,13 @@ impl SymbolTable {
self.flags[symbol_id]
}
pub fn get_redeclare_variables(&self, symbol_id: SymbolId) -> &Vec<Span> {
&self.redeclare_variables[symbol_id]
pub fn get_redeclarations(&self, symbol_id: SymbolId) -> &[Span] {
if let Some(redeclaration_id) = self.redeclarations[symbol_id] {
&self.redeclaration_spans[redeclaration_id]
} else {
static EMPTY: &[Span] = &[];
EMPTY
}
}
pub fn union_flag(&mut self, symbol_id: SymbolId, includes: SymbolFlags) {
@ -128,11 +139,16 @@ impl SymbolTable {
self.scope_ids.push(scope_id);
self.declarations.push(node_id);
self.resolved_references.push(vec![]);
self.redeclare_variables.push(vec![])
self.redeclarations.push(None)
}
pub fn add_redeclare_variable(&mut self, symbol_id: SymbolId, span: Span) {
self.redeclare_variables[symbol_id].push(span);
pub fn add_redeclaration(&mut self, symbol_id: SymbolId, span: Span) {
if let Some(redeclaration_id) = self.redeclarations[symbol_id] {
self.redeclaration_spans[redeclaration_id].push(span);
} else {
let redeclaration_id = self.redeclaration_spans.push(vec![span]);
self.redeclarations[symbol_id] = Some(redeclaration_id);
};
}
pub fn create_reference(&mut self, reference: Reference) -> ReferenceId {
@ -200,7 +216,7 @@ impl SymbolTable {
self.scope_ids.reserve(additional_symbols);
self.declarations.reserve(additional_symbols);
self.resolved_references.reserve(additional_symbols);
self.redeclare_variables.reserve(additional_symbols);
self.redeclarations.reserve(additional_symbols);
self.references.reserve(additional_references);
}

View file

@ -23,11 +23,29 @@ impl Idx for SymbolId {
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serialize", derive(Serialize))]
pub struct RedeclarationId(NonZeroU32);
impl Idx for RedeclarationId {
#[allow(clippy::cast_possible_truncation)]
fn from_usize(idx: usize) -> Self {
// SAFETY: + 1 is always non-zero.
unsafe { Self(NonZeroU32::new_unchecked(idx as u32 + 1)) }
}
fn index(self) -> usize {
self.0.get() as usize - 1
}
}
#[cfg(feature = "serialize")]
#[wasm_bindgen::prelude::wasm_bindgen(typescript_custom_section)]
const TS_APPEND_CONTENT: &'static str = r#"
export type SymbolId = number;
export type SymbolFlags = unknown;
export type RedeclarationId = unknown;
"#;
bitflags! {