diff --git a/Cargo.lock b/Cargo.lock index 89008afbf..a49218aeb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1608,6 +1608,7 @@ version = "0.1.0" dependencies = [ "bitflags 2.3.3", "indexmap 2.0.0", + "itertools 0.11.0", "oxc_allocator", "oxc_ast", "oxc_diagnostics", diff --git a/crates/oxc_semantic/Cargo.toml b/crates/oxc_semantic/Cargo.toml index abff32fd3..94a5ded8f 100644 --- a/crates/oxc_semantic/Cargo.toml +++ b/crates/oxc_semantic/Cargo.toml @@ -22,6 +22,7 @@ bitflags = { workspace = true } rustc-hash = { workspace = true } phf = { workspace = true, features = ["macros"] } indexmap = { workspace = true } +itertools = { workspace = true } [dev-dependencies] oxc_parser = { workspace = true } diff --git a/crates/oxc_semantic/src/builder.rs b/crates/oxc_semantic/src/builder.rs index 73dd00e1e..c45600d60 100644 --- a/crates/oxc_semantic/src/builder.rs +++ b/crates/oxc_semantic/src/builder.rs @@ -2,11 +2,12 @@ use std::{cell::RefCell, rc::Rc}; +use itertools::Itertools; #[allow(clippy::wildcard_imports)] use oxc_ast::{ast::*, AstKind, Trivias, Visit}; use oxc_diagnostics::Error; use oxc_span::{Atom, SourceType, Span}; -use oxc_syntax::module_record::ModuleRecord; +use oxc_syntax::{module_record::ModuleRecord, operator::AssignmentOperator}; use rustc_hash::FxHashMap; use crate::{ @@ -520,18 +521,77 @@ impl<'a> SemanticBuilder<'a> { } fn reference_identifier(&mut self, ident: &IdentifierReference) { - let flag = if matches!( - self.nodes.parent_kind(self.current_node_id), - Some(AstKind::SimpleAssignmentTarget(_) | AstKind::AssignmentTarget(_)) - ) { - ReferenceFlag::write() - } else { - ReferenceFlag::read() - }; + let flag = self.resolve_reference_usages(); let reference = Reference::new(ident.span, ident.name.clone(), flag); self.declare_reference(reference); } + /// Resolve reference flags for the current ast node. + fn resolve_reference_usages(&self) -> ReferenceFlag { + let mut flags = ReferenceFlag::None; + + if self.nodes.parent_id(self.current_node_id).is_none() { + return ReferenceFlag::Read; + } + + // This func should only get called when an IdentifierReference is + // reached + debug_assert!(matches!( + self.nodes.get_node(self.current_node_id).kind(), + AstKind::IdentifierReference(_) + )); + + for (curr, parent) in self + .nodes + .iter_parents(self.current_node_id) + .tuple_windows::<(&AstNode<'a>, &AstNode<'a>)>() + { + match (curr.kind(), parent.kind()) { + // lhs of assignment expression + (AstKind::SimpleAssignmentTarget(_), AstKind::AssignmentExpression(_)) => { + debug_assert!(!flags.is_read()); + flags = ReferenceFlag::write(); + // a lhs expr will not propagate upwards into a rhs + // expression, sow e can safely break + break; + } + (AstKind::AssignmentTarget(_), AstKind::AssignmentExpression(expr)) => { + flags |= if expr.operator == AssignmentOperator::Assign { + ReferenceFlag::write() + } else { + ReferenceFlag::read_write() + }; + break; + } + (_, AstKind::SimpleAssignmentTarget(_) | AstKind::AssignmentTarget(_)) => { + flags |= ReferenceFlag::write(); + // continue up tree + } + (_, AstKind::UpdateExpression(_)) => { + flags |= ReferenceFlag::Write; + // continue up tree + } + ( + AstKind::AssignmentTarget(_), + AstKind::ForInStatement(_) | AstKind::ForOfStatement(_), + ) => { + break; + } + (_, AstKind::ParenthesizedExpression(_)) => { + // continue up tree + } + _ => { + flags |= ReferenceFlag::Read; + break; + } + } + } + + debug_assert!(flags != ReferenceFlag::None); + + flags + } + fn reference_jsx_element_name(&mut self, elem: &JSXElementName) { if matches!( self.nodes.parent_kind(self.current_node_id), diff --git a/crates/oxc_semantic/src/lib.rs b/crates/oxc_semantic/src/lib.rs index 3129c0312..ef07d511b 100644 --- a/crates/oxc_semantic/src/lib.rs +++ b/crates/oxc_semantic/src/lib.rs @@ -137,7 +137,7 @@ mod tests { assert!(parse.errors.is_empty()); let program = allocator.alloc(parse.program); let semantic = SemanticBuilder::new(source, source_type).build(program); - assert!(semantic.errors.is_empty()); + assert!(semantic.errors.is_empty(), "Parse error: {}", semantic.errors[0]); semantic.semantic } @@ -187,4 +187,123 @@ mod tests { } } } + + #[test] + fn test_reference_resolutions_simple_read_write() { + let alloc = Allocator::default(); + let target_symbol_name = Atom::from("a"); + let typescript = SourceType::default().with_typescript(true).with_module(true); + let sources = [ + // simple cases + (SourceType::default(), "let a = 1; a = 2", ReferenceFlag::write()), + (SourceType::default(), "let a = 1, b; b = a", ReferenceFlag::read()), + (SourceType::default(), "let a = 1, b = 1, c; c = a + b", ReferenceFlag::read()), + (SourceType::default(), "function a() { return }; a()", ReferenceFlag::read()), + (SourceType::default(), "class a {}; new a()", ReferenceFlag::read()), + (SourceType::default(), "let a; function foo() { return a }", ReferenceFlag::read()), + // pattern assignment + (SourceType::default(), "let a = 1, b; b = { a }", ReferenceFlag::read()), + (SourceType::default(), "let a, b; ({ b } = { a })", ReferenceFlag::read()), + (SourceType::default(), "let a, b; ({ a } = { b })", ReferenceFlag::write()), + (SourceType::default(), "let a, b; ([ b ] = [ a ])", ReferenceFlag::read()), + (SourceType::default(), "let a, b; ([ a ] = [ b ])", ReferenceFlag::write()), + // property access/mutation + (SourceType::default(), "let a = { b: 1 }; a.b = 2", ReferenceFlag::read()), + (SourceType::default(), "let a = { b: 1 }; a.b += 2", ReferenceFlag::read()), + // parens are pass-through + (SourceType::default(), "let a = 1, b; b = (a)", ReferenceFlag::read()), + (SourceType::default(), "let a = 1, b; b = ++(a)", ReferenceFlag::read_write()), + (SourceType::default(), "let a = 1, b; b = ++((((a))))", ReferenceFlag::read_write()), + (SourceType::default(), "let a = 1, b; b = ((++((a))))", ReferenceFlag::read_write()), + // simple binops/calls for sanity check + (SourceType::default(), "let a, b; a + b", ReferenceFlag::read()), + (SourceType::default(), "let a, b; b(a)", ReferenceFlag::read()), + (SourceType::default(), "let a, b; a = 5", ReferenceFlag::write()), + // unary op counts as write, but checking continues up tree + (SourceType::default(), "let a = 1, b; b = ++a", ReferenceFlag::read_write()), + (SourceType::default(), "let a = 1, b; b = --a", ReferenceFlag::read_write()), + (SourceType::default(), "let a = 1, b; b = a++", ReferenceFlag::read_write()), + (SourceType::default(), "let a = 1, b; b = a--", ReferenceFlag::read_write()), + // assignment expressions count as read-write + (SourceType::default(), "let a = 1, b; b = a += 5", ReferenceFlag::read_write()), + (SourceType::default(), "let a = 1; a += 5", ReferenceFlag::read_write()), + // note: we consider a to be written, and the read of `1` propagates upwards + (SourceType::default(), "let a, b; b = a = 1", ReferenceFlag::write()), + (SourceType::default(), "let a, b; b = (a = 1)", ReferenceFlag::write()), + (SourceType::default(), "let a, b, c; b = c = a", ReferenceFlag::read()), + // sequences return last value in sequence + (SourceType::default(), "let a, b; b = (0, a++)", ReferenceFlag::read_write()), + // loops + ( + SourceType::default(), + "var a, arr = [1, 2, 3]; for(a in arr) { break }", + ReferenceFlag::write(), + ), + ( + SourceType::default(), + "var a, obj = { }; for(a of obj) { break }", + ReferenceFlag::write(), + ), + (SourceType::default(), "var a; for(; false; a++) { }", ReferenceFlag::read_write()), + (SourceType::default(), "var a = 1; while(a < 5) { break }", ReferenceFlag::read()), + // if statements + (SourceType::default(), "let a; if (a) { true } else { false }", ReferenceFlag::read()), + ( + SourceType::default(), + "let a, b; if (a == b) { true } else { false }", + ReferenceFlag::read(), + ), + ( + SourceType::default(), + "let a, b; if (b == a) { true } else { false }", + ReferenceFlag::read(), + ), + // identifiers not in last value are also considered a read (at + // least, or now) + (SourceType::default(), "let a, b; b = (a, 0)", ReferenceFlag::read()), + (SourceType::default(), "let a, b; b = (--a, 0)", ReferenceFlag::read_write()), + // other reads after a is written + // a = 1 writes, but the CallExpression reads the rhs (1) so a isn't read + ( + SourceType::default(), + "let a; function foo(a) { return a }; foo(a = 1)", + ReferenceFlag::write(), + ), + // typescript + (typescript, "let a: number = 1; (a as any) = true", ReferenceFlag::write()), + (typescript, "let a: number = 1; a = true as any", ReferenceFlag::write()), + (typescript, "let a: number = 1; a = 2 as const", ReferenceFlag::write()), + (typescript, "let a: number = 1; a = 2 satisfies number", ReferenceFlag::write()), + (typescript, "let a: number; (a as any) = 1;", ReferenceFlag::write()), + ]; + + for (source_type, source, flag) in sources { + let semantic = get_semantic(&alloc, source, source_type); + let a_id = + semantic.scopes().get_root_binding(&target_symbol_name).unwrap_or_else(|| { + panic!("no references for '{target_symbol_name}' found"); + }); + let a_refs: Vec<_> = semantic.symbol_references(a_id).collect(); + let num_refs = a_refs.len(); + + assert!(num_refs == 1, "expected to find 1 reference to '{target_symbol_name}' but {num_refs} were found\n\nsource:\n{source}"); + let ref_type = a_refs[0]; + if flag.is_write() { + assert!( + ref_type.is_write(), + "expected reference to '{target_symbol_name}' to be write\n\nsource:\n{source}" + ); + } else { + assert!(!ref_type.is_write(), "expected reference to '{target_symbol_name}' not to have been written to, but it is\n\nsource:\n{source}"); + } + if flag.is_read() { + assert!( + ref_type.is_read(), + "expected reference to '{target_symbol_name}' to be read\n\nsource:\n{source}" + ); + } else { + assert!(!ref_type.is_read(), "expected reference to '{target_symbol_name}' not to be read, but it is\n\nsource:\n{source}"); + } + } + } } diff --git a/crates/oxc_semantic/src/node.rs b/crates/oxc_semantic/src/node.rs index 91846e8e8..fcec90848 100644 --- a/crates/oxc_semantic/src/node.rs +++ b/crates/oxc_semantic/src/node.rs @@ -94,6 +94,15 @@ impl<'a> AstNodes<'a> { self.nodes.iter() } + /// Walk up the AST, iterating over each parent node. + /// + /// The first node produced by this iterator is the first parent of the node + /// pointed to by `node_id`. The last node will usually be a `Program`. + pub fn iter_parents(&self, node_id: AstNodeId) -> impl Iterator> + '_ { + let curr = Some(self.get_node(node_id)); + AstNodeParentIter { curr, nodes: self } + } + pub fn kind(&self, ast_node_id: AstNodeId) -> AstKind<'a> { self.nodes[ast_node_id].kind } @@ -118,6 +127,10 @@ impl<'a> AstNodes<'a> { &mut self.nodes[ast_node_id] } + /// Walk up the AST, iterating over each parent node. + /// + /// The first node produced by this iterator is the first parent of the node + /// pointed to by `node_id`. The last node will usually be a `Program`. pub fn ancestors(&self, ast_node_id: AstNodeId) -> impl Iterator + '_ { let parent_ids = &self.parent_ids; std::iter::successors(Some(ast_node_id), |node_id| parent_ids[*node_id]) @@ -131,3 +144,20 @@ impl<'a> AstNodes<'a> { ast_node_id } } + +#[derive(Debug)] +pub struct AstNodeParentIter<'s, 'a> { + curr: Option<&'s AstNode<'a>>, + nodes: &'s AstNodes<'a>, +} + +impl<'s, 'a> Iterator for AstNodeParentIter<'s, 'a> { + type Item = &'s AstNode<'a>; + + fn next(&mut self) -> Option { + let next = self.curr; + self.curr = self.curr.and_then(|curr| self.nodes.parent_node(curr.id())); + + next + } +} diff --git a/crates/oxc_semantic/src/reference.rs b/crates/oxc_semantic/src/reference.rs index 0fea90aca..0e5396a98 100644 --- a/crates/oxc_semantic/src/reference.rs +++ b/crates/oxc_semantic/src/reference.rs @@ -1,3 +1,4 @@ +use bitflags::bitflags; use oxc_index::define_index_type; use oxc_span::{Atom, Span}; @@ -10,8 +11,11 @@ define_index_type! { #[derive(Debug, Clone)] pub struct Reference { span: Span, + /// The name of the identifier that was referred to name: Atom, symbol_id: Option, + /// Describes how this referenced is used by other AST nodes. References can + /// be reads, writes, or both. flag: ReferenceFlag, } @@ -36,28 +40,64 @@ impl Reference { self.symbol_id = Some(symbol_id); } + /// Returns `true` if the identifier value was read. This is not mutually + /// exclusive with [`#is_write`] pub fn is_read(&self) -> bool { - self.flag == ReferenceFlag::Read + self.flag.is_read() } + /// Returns `true` if the identifier was written to. This is not mutually + /// exclusive with [`#is_read`] pub fn is_write(&self) -> bool { - self.flag == ReferenceFlag::Write + self.flag.is_write() } } -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -pub enum ReferenceFlag { - None, - Read, - Write, +bitflags! { + #[derive(Debug, Clone, Copy, Eq, PartialEq)] + pub struct ReferenceFlag: u8 { + const None = 0; + const Read = 1 << 0; + const Write = 1 << 1; + const ReadWrite = Self::Read.bits() | Self::Write.bits(); + } } impl ReferenceFlag { - pub fn read() -> Self { + pub const fn read() -> Self { Self::Read } - pub fn write() -> Self { + pub const fn write() -> Self { Self::Write } + + pub const fn read_write() -> Self { + Self::ReadWrite + } + + /// The identifier is read from. It may also be written to. + pub const fn is_read(&self) -> bool { + self.intersects(Self::Read) + } + + /// The identifier is only read from. + pub const fn is_read_only(&self) -> bool { + self.contains(Self::Read) + } + + /// The identifier is written to. It may also be read from. + pub const fn is_write(&self) -> bool { + self.intersects(Self::Write) + } + + /// The identifier is only written to. It is not read from in this reference. + pub const fn is_write_only(&self) -> bool { + self.contains(Self::Write) + } + + /// The identifier is both read from and written to, e.g `a += 1`. + pub const fn is_read_write(&self) -> bool { + self.contains(Self::ReadWrite) + } } diff --git a/crates/oxc_semantic/src/scope.rs b/crates/oxc_semantic/src/scope.rs index 2c67fc081..6a3af3f56 100644 --- a/crates/oxc_semantic/src/scope.rs +++ b/crates/oxc_semantic/src/scope.rs @@ -74,6 +74,11 @@ impl ScopeTree { self.parent_ids[scope_id] } + /// Get a variable binding by name that was declared in the top-level scope + pub fn get_root_binding(&self, name: &Atom) -> Option { + self.get_binding(self.root_scope_id(), name) + } + pub fn get_binding(&self, scope_id: ScopeId, name: &Atom) -> Option { self.bindings[scope_id].get(name).copied() }