diff --git a/crates/oxc_semantic/examples/simple.rs b/crates/oxc_semantic/examples/simple.rs index 6f6271c36..eaed27b59 100644 --- a/crates/oxc_semantic/examples/simple.rs +++ b/crates/oxc_semantic/examples/simple.rs @@ -16,10 +16,12 @@ fn main() -> std::io::Result<()> { let name = env::args().nth(1).unwrap_or_else(|| "test.js".to_string()); let path = Path::new(&name); let source_text = Arc::new(std::fs::read_to_string(path)?); - let allocator = Allocator::default(); let source_type = SourceType::from_path(path).unwrap(); - let parser_ret = Parser::new(&allocator, &source_text, source_type).parse(); + // Memory arena where Semantic and Parser allocate objects + let allocator = Allocator::default(); + // Parse the source text into an AST + let parser_ret = Parser::new(&allocator, &source_text, source_type).parse(); if !parser_ret.errors.is_empty() { let error_message: String = parser_ret .errors @@ -34,7 +36,9 @@ fn main() -> std::io::Result<()> { let program = allocator.alloc(parser_ret.program); let semantic = SemanticBuilder::new(&source_text, source_type) + // Enable additional syntax checks not performed by the parser .with_check_syntax_error(true) + // Inform Semantic about comments found while parsing .with_trivias(parser_ret.trivias) .build(program); diff --git a/crates/oxc_semantic/src/builder.rs b/crates/oxc_semantic/src/builder.rs index ce53b0e30..f855445c7 100644 --- a/crates/oxc_semantic/src/builder.rs +++ b/crates/oxc_semantic/src/builder.rs @@ -44,9 +44,25 @@ macro_rules! control_flow { }; } +/// Semantic Builder +/// +/// Traverses a parsed AST and builds a [`Semantic`] representation of the +/// program. +/// +/// The main API is the [`build`] method. +/// +/// # Example +/// +/// ```rust +#[doc = include_str!("../examples/simple.rs")] +/// ``` +/// +/// [`build`]: SemanticBuilder::build pub struct SemanticBuilder<'a> { + /// source code of the parsed program pub source_text: &'a str, + /// source type of the parsed program pub source_type: SourceType, trivias: Trivias, @@ -83,6 +99,9 @@ pub struct SemanticBuilder<'a> { build_jsdoc: bool, jsdoc: JSDocBuilder<'a>, + /// Should additional syntax checks be performed? + /// + /// See: [`crate::checker::check`] check_syntax_error: bool, pub cfg: Option>, @@ -92,6 +111,7 @@ pub struct SemanticBuilder<'a> { ast_node_records: Vec, } +/// Data returned by [`SemanticBuilder::build`]. pub struct SemanticBuilderReturn<'a> { pub semantic: Semantic<'a>, pub errors: Vec, @@ -138,12 +158,20 @@ impl<'a> SemanticBuilder<'a> { self } + /// Enable/disable additional syntax checks. + /// + /// Set this to `true` to enable additional syntax checks. Without these, + /// there is no guarantee that the parsed program follows the ECMAScript + /// spec. + /// + /// By default, this is `false`. #[must_use] pub fn with_check_syntax_error(mut self, yes: bool) -> Self { self.check_syntax_error = yes; self } + /// Enable/disable JSDoc parsing. #[must_use] pub fn with_build_jsdoc(mut self, yes: bool) -> Self { self.build_jsdoc = yes; @@ -312,6 +340,7 @@ impl<'a> SemanticBuilder<'a> { self.scope.get_flags(self.current_scope_id) } + /// Is the current scope in strict mode? pub fn strict_mode(&self) -> bool { self.current_scope_flags().is_strict_mode() } @@ -355,6 +384,7 @@ impl<'a> SemanticBuilder<'a> { symbol_id } + /// Declare a new symbol on the current scope. pub fn declare_symbol( &mut self, span: Span, @@ -365,6 +395,10 @@ impl<'a> SemanticBuilder<'a> { self.declare_symbol_on_scope(span, name, self.current_scope_id, includes, excludes) } + /// Check if a symbol with the same name has already been declared in the + /// current scope. Returns the symbol id if it exists and is not excluded by `excludes`. + /// + /// Only records a redeclaration error if `report_error` is `true`. pub fn check_redeclaration( &self, scope_id: ScopeId, @@ -419,6 +453,10 @@ impl<'a> SemanticBuilder<'a> { symbol_id } + /// Try to resolve all references from the current scope that are not + /// already resolved. + /// + /// This gets called every time [`SemanticBuilder`] exists a scope. fn resolve_references_for_current_scope(&mut self) { let (current_refs, parent_refs) = self.unresolved_references.current_and_parent_mut(); @@ -501,6 +539,7 @@ impl<'a> SemanticBuilder<'a> { } } + /// Flag the symbol bound to an identifier in the current scope as exported. fn add_export_flag_to_identifier(&mut self, name: &str) { if let Some(symbol_id) = self.scope.get_binding(self.current_scope_id, name) { self.symbols.union_flag(symbol_id, SymbolFlags::Export); diff --git a/crates/oxc_semantic/src/lib.rs b/crates/oxc_semantic/src/lib.rs index e17b02f0c..c99d362ce 100644 --- a/crates/oxc_semantic/src/lib.rs +++ b/crates/oxc_semantic/src/lib.rs @@ -1,3 +1,9 @@ +//! Semantic analysis of a JavaScript/TypeScript program. +//! +//! # Example +//! ```rust +#![doc = include_str!("../examples/simple.rs")] +//! ``` mod binder; mod builder; mod checker; @@ -37,47 +43,76 @@ pub use crate::{ symbol::SymbolTable, }; +/// Semantic analysis of a JavaScript/TypeScript program. +/// +/// [`Semantic`] contains the results of analyzing a program, including the +/// [`Abstract Syntax Tree (AST)`], [`scope tree`], [`symbol table`], and +/// [`control flow graph (CFG)`]. +/// +/// Do not construct this struct directly; instead, use [`SemanticBuilder`]. +/// +/// [`Abstract Syntax Tree (AST)`]: crate::AstNodes +/// [`scope tree`]: crate::ScopeTree +/// [`symbol table`]: crate::SymbolTable +/// [`control flow graph (CFG)`]: crate::ControlFlowGraph pub struct Semantic<'a> { + /// Source code of the JavaScript/TypeScript program being analyzed. source_text: &'a str, + /// What kind of source code is being analyzed. Comes from the parser. source_type: SourceType, + /// The Abstract Syntax Tree (AST) nodes. nodes: AstNodes<'a>, + /// The scope tree containing scopes and what identifier names are bound in + /// each one. scopes: ScopeTree, + /// Symbol table containing all symbols in the program and their references. symbols: SymbolTable, classes: ClassTable, + /// Parsed comments. trivias: Trivias, module_record: Arc, + /// Parsed JSDoc comments. jsdoc: JSDocFinder<'a>, unused_labels: FxHashSet, + /// Control flow graph. Only present if [`Semantic`] is built with cfg + /// creation enabled using [`SemanticBuilder::with_cfg`]. cfg: Option, } impl<'a> Semantic<'a> { + /// Extract the [`SymbolTable`] and [`ScopeTree`] from the [`Semantic`] + /// instance, consuming `self`. pub fn into_symbol_table_and_scope_tree(self) -> (SymbolTable, ScopeTree) { (self.symbols, self.scopes) } + /// Source code of the JavaScript/TypeScript program being analyzed. pub fn source_text(&self) -> &'a str { self.source_text } + /// What kind of source code is being analyzed. Comes from the parser. pub fn source_type(&self) -> &SourceType { &self.source_type } + /// Nodes in the Abstract Syntax Tree (AST) pub fn nodes(&self) -> &AstNodes<'a> { &self.nodes } + /// The [`ScopeTree`] containing scopes and what identifier names are bound in + /// each one. pub fn scopes(&self) -> &ScopeTree { &self.scopes } @@ -86,22 +121,30 @@ impl<'a> Semantic<'a> { &self.classes } + /// Get a mutable reference to the [`ScopeTree`]. pub fn scopes_mut(&mut self) -> &mut ScopeTree { &mut self.scopes } + /// Trivias (comments) found while parsing pub fn trivias(&self) -> &Trivias { &self.trivias } + /// Parsed [`JSDoc`] comments. + /// + /// Will be empty if JSDoc parsing is disabled. pub fn jsdoc(&self) -> &JSDocFinder<'a> { &self.jsdoc } + /// ESM module record containing imports and exports. pub fn module_record(&self) -> &ModuleRecord { self.module_record.as_ref() } + /// [`SymbolTable`] containing all symbols in the program and their + /// [`Reference`]s. pub fn symbols(&self) -> &SymbolTable { &self.symbols } @@ -110,6 +153,10 @@ impl<'a> Semantic<'a> { &self.unused_labels } + /// Control flow graph. + /// + /// Only present if [`Semantic`] is built with cfg creation enabled using + /// [`SemanticBuilder::with_cfg`]. pub fn cfg(&self) -> Option<&ControlFlowGraph> { self.cfg.as_ref() } diff --git a/crates/oxc_semantic/src/reference.rs b/crates/oxc_semantic/src/reference.rs index 8db136a2e..8608d729a 100644 --- a/crates/oxc_semantic/src/reference.rs +++ b/crates/oxc_semantic/src/reference.rs @@ -9,11 +9,39 @@ use tsify::Tsify; use crate::{symbol::SymbolId, AstNodeId}; +/// Describes where and how a Symbol is used in the AST. +/// +/// References indicate how they are being used using [`ReferenceFlag`]. Refer +/// to the documentation for [`ReferenceFlag`] for more information. +/// +/// ## Resolution +/// References to symbols that could be resolved have their `symbol_id` field +/// populated. [`None`] indicates that either a global variable or a +/// non-existent symbol is being referenced. +/// +/// In most cases, the node identified by `node_id` will be an +/// [`IdentifierReference`], but it could be some special reference type like a +/// [`JSXIdentifier`]. Note that declarations do not count as references, even +/// if the declaration is being used in an expression. +/// +/// ```ts +/// const arr = [1, 2, 3].map(function mapper(x) { return x + 1; }); +/// // Not considered a reference ^^^^^^ +/// ``` +/// +/// [`IdentifierReference`]: oxc_ast::ast::IdentifierReference +/// [`JSXIdentifier`]: oxc_ast::ast::JSXIdentifier #[derive(Debug, Clone)] #[cfg_attr(feature = "serialize", derive(Serialize, Tsify))] #[cfg_attr(feature = "serialize", serde(rename_all = "camelCase"))] pub struct Reference { + /// The AST node making the reference. node_id: AstNodeId, + /// The symbol being referenced. + /// + /// This will be [`None`] if no symbol could be found within + /// the reference's scope tree. Usually this indicates a global variable or + /// a reference to a non-existent symbol. symbol_id: Option, /// Describes how this referenced is used by other AST nodes. References can /// be reads, writes, or both. @@ -21,11 +49,13 @@ pub struct Reference { } impl Reference { + /// Create a new unresolved reference. #[inline] pub fn new(node_id: AstNodeId, flag: ReferenceFlag) -> Self { Self { node_id, symbol_id: None, flag } } + /// Create a new resolved reference on a symbol. #[inline] pub fn new_with_symbol_id( node_id: AstNodeId, @@ -35,11 +65,21 @@ impl Reference { Self { node_id, symbol_id: Some(symbol_id), flag } } + /// Get the id of the node that is referencing the symbol. + /// + /// This will usually point to an [`IdentifierReference`] node, but it could + /// be some specialized reference type like a [`JSXIdentifier`]. + /// + /// [`IdentifierReference`]: oxc_ast::ast::IdentifierReference + /// [`JSXIdentifier`]: oxc_ast::ast::JSXIdentifier #[inline] pub fn node_id(&self) -> AstNodeId { self.node_id } + /// Get the id of the symbol being referenced. + /// + /// Will return [`None`] if the symbol could not be resolved. #[inline] pub fn symbol_id(&self) -> Option { self.symbol_id @@ -74,6 +114,7 @@ impl Reference { self.flag.is_write() } + /// Returns `true` if this reference is used in a type context. #[inline] pub fn is_type(&self) -> bool { self.flag.is_type() || self.flag.is_ts_type_query() diff --git a/crates/oxc_semantic/src/scope.rs b/crates/oxc_semantic/src/scope.rs index 521efb64a..4ab38807c 100644 --- a/crates/oxc_semantic/src/scope.rs +++ b/crates/oxc_semantic/src/scope.rs @@ -17,6 +17,13 @@ pub type UnresolvedReferences = FxHashMap>; /// Scope Tree /// +/// The scope tree stores lexical scopes created by a program, and all the +/// variable bindings each scope creates. +/// +/// - All scopes have a parent scope, except the root scope. +/// - Scopes can have 0 or more child scopes. +/// - Nodes that create a scope store the [`ScopeId`] of the scope they create. +/// /// `SoA` (Struct of Arrays) for memory efficiency. #[derive(Debug, Default)] pub struct ScopeTree { @@ -27,6 +34,9 @@ pub struct ScopeTree { /// Maps a scope to its node id. node_ids: IndexVec, flags: IndexVec, + /// Symbol bindings in a scope. + /// + /// A binding is a mapping from an identifier name to its [`SymbolId`] bindings: IndexVec, pub(crate) root_unresolved_references: UnresolvedReferences, } @@ -34,20 +44,35 @@ pub struct ScopeTree { impl ScopeTree { const ROOT_SCOPE_ID: ScopeId = ScopeId::new(0); + /// Returns the number of scopes found in the program. Includes the root + /// program scope. #[inline] pub fn len(&self) -> usize { self.parent_ids.len() } + /// Returns `true` if there are no scopes in the program. + /// + /// This will always return `false` when semantic analysis has completed + /// since there is a root scope. #[inline] pub fn is_empty(&self) -> bool { self.parent_ids.is_empty() } + /// Iterate over the scopes that contain a scope. + /// + /// The first element of this iterator will be the scope itself. This + /// guarantees the iterator will have at least 1 element. pub fn ancestors(&self, scope_id: ScopeId) -> impl Iterator + '_ { std::iter::successors(Some(scope_id), |scope_id| self.parent_ids[*scope_id]) } + /// Iterate over scopes contained by a scope in breadth-first order. + /// + /// Unlike [`ancestors`], this iterator will not include the scope itself. + /// + /// [`ancestors`]: ScopeTree::ancestors pub fn descendants(&self, scope_id: ScopeId) -> impl Iterator + '_ { // Has to be a `fn` and pass arguments because we can't // have recursive closures @@ -71,11 +96,21 @@ impl ScopeTree { list.into_iter() } + /// Get the child scopes of a scope. + /// + /// Will return [`None`] if no scope exists, which should never happen if + /// you obtained `scope_id` through valid means. Scopes with no children + /// return [`Some`] empty [`Vec`]. #[inline] pub fn get_child_ids(&self, scope_id: ScopeId) -> Option<&Vec> { self.child_ids.get(scope_id) } + /// Get a mutable reference to a scope's children. + /// + /// Will return [`None`] if no scope exists, which should never happen if + /// you obtained `scope_id` through valid means. Scopes with no children + /// return [`Some`] empty [`Vec`]. #[inline] pub fn get_child_ids_mut(&mut self, scope_id: ScopeId) -> Option<&mut Vec> { self.child_ids.get_mut(scope_id) @@ -85,11 +120,17 @@ impl ScopeTree { self.parent_ids.iter_enumerated().map(|(scope_id, _)| scope_id) } + /// Get the root [`Program`] scope id. + /// + /// [`Program`]: oxc_ast::ast::Program #[inline] pub const fn root_scope_id(&self) -> ScopeId { Self::ROOT_SCOPE_ID } + /// Get the flags for the root scope. + /// + /// This is a shorthand for `scope.get_flags(scope.root_scope_id())`. #[inline] pub fn root_flags(&self) -> ScopeFlags { self.flags[self.root_scope_id()] @@ -116,6 +157,7 @@ impl ScopeTree { &mut self.flags[scope_id] } + /// Get [`ScopeFlags`] for a new child scope under `parent_scope_id`. pub fn get_new_scope_flags( &self, mut flags: ScopeFlags, @@ -159,14 +201,27 @@ impl ScopeTree { self.root_unresolved_references.entry(name).or_default().push(reference); } + /// Check if a symbol is declared in a certain scope. pub fn has_binding(&self, scope_id: ScopeId, name: &str) -> bool { self.bindings[scope_id].get(name).is_some() } + /// Get the symbol bound to an identifier name in a scope. + /// + /// Returns [`None`] if that name is not bound in the scope. This could be + /// because the symbol is not declared within this tree, but it could also + /// be because its declaration is in a parent scope. If you want to find a + /// binding that might be declared in a parent scope, use [`find_binding`]. + /// + /// [`find_binding`]: ScopeTree::find_binding pub fn get_binding(&self, scope_id: ScopeId, name: &str) -> Option { self.bindings[scope_id].get(name).copied() } + /// Find a binding by name in a scope or its ancestors. + /// + /// Bindings are resolved by walking up the scope tree until a binding is + /// found. If no binding is found, [`None`] is returned. pub fn find_binding(&self, scope_id: ScopeId, name: &str) -> Option { for scope_id in self.ancestors(scope_id) { if let Some(symbol_id) = self.bindings[scope_id].get(name) { @@ -176,16 +231,25 @@ impl ScopeTree { None } + /// Get all bound identifiers in a scope. #[inline] pub fn get_bindings(&self, scope_id: ScopeId) -> &Bindings { &self.bindings[scope_id] } + /// Get the ID of the [`AstNode`] that created a scope. + /// + /// [`AstNode`]: crate::AstNode #[inline] pub fn get_node_id(&self, scope_id: ScopeId) -> AstNodeId { self.node_ids[scope_id] } + /// Iterate over all bindings declared in the entire program. + /// + /// If you only want bindings in a specific scope, use [`iter_bindings_in`]. + /// + /// [`iter_bindings_in`]: ScopeTree::iter_bindings_in pub fn iter_bindings(&self) -> impl Iterator + '_ { self.bindings.iter_enumerated().flat_map(|(scope_id, bindings)| { bindings.iter().map(move |(name, symbol_id)| (scope_id, *symbol_id, name)) @@ -203,8 +267,12 @@ impl ScopeTree { &mut self.bindings[scope_id] } - /// Create scope. - /// For root (`Program`) scope, use `add_root_scope`. + /// Create a scope inside another scope. + /// + /// For the root [`Program`] scope, use [`add_root_scope`]. + /// + /// [`Program`]: oxc_ast::ast::Program + /// [`add_root_scope`]: ScopeTree::add_root_scope pub fn add_scope( &mut self, parent_id: ScopeId, @@ -219,7 +287,13 @@ impl ScopeTree { scope_id } - /// Create root (`Program`) scope. + /// Create the root [`Program`] scope. + /// + /// Do not use this method if a root scope already exists. Use [`add_scope`] + /// to create a new scope inside the root scope. + /// + /// [`Program`]: oxc_ast::ast::Program + /// [`add_scope`]: ScopeTree::add_scope pub fn add_root_scope(&mut self, node_id: AstNodeId, flags: ScopeFlags) -> ScopeId { self.add_scope_impl(None, node_id, flags) } @@ -241,14 +315,19 @@ impl ScopeTree { scope_id } + /// Add a binding to a scope. + /// + /// [`binding`]: Bindings pub fn add_binding(&mut self, scope_id: ScopeId, name: CompactStr, symbol_id: SymbolId) { self.bindings[scope_id].insert(name, symbol_id); } + /// Remove an existing binding from a scope. pub fn remove_binding(&mut self, scope_id: ScopeId, name: &CompactStr) { self.bindings[scope_id].shift_remove(name); } + /// Reserve memory for an `additional` number of scopes. pub fn reserve(&mut self, additional: usize) { self.parent_ids.reserve(additional); self.child_ids.reserve(additional); diff --git a/crates/oxc_semantic/tests/integration/scopes.rs b/crates/oxc_semantic/tests/integration/scopes.rs index 7dd1e8808..36188103c 100644 --- a/crates/oxc_semantic/tests/integration/scopes.rs +++ b/crates/oxc_semantic/tests/integration/scopes.rs @@ -3,6 +3,33 @@ use oxc_semantic::{ScopeFlags, SymbolFlags}; use crate::util::{Expect, SemanticTester}; +#[test] +fn test_only_program() { + let tester = SemanticTester::js("let x = 1;"); + tester.has_root_symbol("x").is_in_scope(ScopeFlags::Top).test(); + + let semantic = tester.build(); + let scopes = semantic.scopes(); + let root = semantic.scopes().root_scope_id(); + + // ScopeTree contains a single root scope + assert_eq!(scopes.len(), 1); + assert!(!scopes.is_empty()); + + // Root scope is associated with the Program + let root_node_id = scopes.get_node_id(root); + let root_node = semantic.nodes().get_node(root_node_id); + assert!(matches!(root_node.kind(), AstKind::Program(_))); + + // ancestors + assert_eq!(scopes.ancestors(root).count(), 1); + assert!(scopes.get_parent_id(root).is_none()); + + // children + assert_eq!(scopes.descendants(root).count(), 0); + assert!(scopes.get_child_ids(root).unwrap().is_empty()); +} + #[test] fn test_top_level_strict() { // Module with top-level "use strict" diff --git a/crates/oxc_syntax/src/reference.rs b/crates/oxc_syntax/src/reference.rs index 4b8f5f55c..a6dba1433 100644 --- a/crates/oxc_syntax/src/reference.rs +++ b/crates/oxc_syntax/src/reference.rs @@ -46,16 +46,54 @@ export type ReferenceFlag = { "#; bitflags! { + /// Describes how a symbol is being referenced in the AST. + /// + /// There are three general categories of references: + /// 1. Values being referenced as values + /// 2. Types being referenced as types + /// 3. Values being referenced as types + /// + /// ## Values + /// Reading a value is indicated by [`Read`], writing a value + /// is indicated by [`Write`]. References can be both a read + /// and a write, such as in this scenario: + /// + /// ```js + /// let a = 1; + /// a++; + /// ``` + /// + /// When a value symbol is used as a type, such as in `typeof a`, it has + /// [`TSTypeQuery`] added to its flags. It is, however, still + /// considered a read. A good rule of thumb is that if a reference has [`Read`] + /// or [`Write`] in its flags, it is referencing a value symbol. + /// + /// ## Types + /// Type references are indicated by [`Type`]. These are used primarily in + /// type definitions and signatures. Types can never be re-assigned, so + /// there is no read/write distinction for type references. + /// + /// [`Read`]: ReferenceFlag::Read + /// [`Write`]: ReferenceFlag::Write + /// [`TSTypeQuery`]: ReferenceFlag::TSTypeQuery #[derive(Debug, Default, Clone, Copy, Eq, PartialEq, CloneIn)] #[cfg_attr(feature = "serialize", derive(Serialize))] pub struct ReferenceFlag: u8 { const None = 0; + /// A symbol is being read as a Value const Read = 1 << 0; + /// A symbol is being written to in a Value context. const Write = 1 << 1; // Used in type definitions. const Type = 1 << 2; // Used in `typeof xx` const TSTypeQuery = 1 << 3; + /// The symbol being referenced is a value. + /// + /// Note that this does not necessarily indicate the reference is used + /// in a value context, since type queries are also flagged as [`Read`] + /// + /// [`Read`]: ReferenceFlag::Read const Value = Self::Read.bits() | Self::Write.bits(); } }