feat(ast)!: add Program::comments (#6445)

This commit is contained in:
Boshen 2024-10-11 04:47:36 +00:00
parent 2b7be08af4
commit 2808973af3
17 changed files with 294 additions and 119 deletions

View file

@ -6,6 +6,7 @@ src:
- 'crates/oxc_ast/src/ast/js.rs'
- 'crates/oxc_ast/src/ast/ts.rs'
- 'crates/oxc_ast/src/ast/jsx.rs'
- 'crates/oxc_ast/src/ast/comment.rs'
- 'crates/oxc_syntax/src/number.rs'
- 'crates/oxc_syntax/src/operator.rs'
- 'crates/oxc_span/src/span/types.rs'

View file

@ -0,0 +1,112 @@
use oxc_allocator::CloneIn;
use oxc_ast_macros::ast;
use oxc_span::{cmp::ContentEq, hash::ContentHash, Span};
#[ast]
#[generate_derive(CloneIn, ContentEq, ContentHash)]
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
pub enum CommentKind {
#[default]
Line = 0,
Block = 1,
}
#[ast]
#[generate_derive(CloneIn, ContentEq, ContentHash)]
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
pub enum CommentPosition {
/// Comments prior to a token until another token or trailing comment.
///
/// e.g.
///
/// ```
/// /* leading */ token;
/// /* leading */
/// // leading
/// token;
/// ```
#[default]
Leading = 0,
/// Comments tailing a token until a newline.
/// e.g. `token /* trailing */ // trailing`
Trailing = 1,
}
#[ast]
#[generate_derive(CloneIn, ContentEq, ContentHash)]
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
pub struct Comment {
/// The span of the comment text (without leading/trailing delimiters).
pub span: Span,
/// Line or block comment
pub kind: CommentKind,
/// Leading or trailing comment
pub position: CommentPosition,
/// Start of token this leading comment is attached to.
/// `/* Leading */ token`
/// ^ This start
/// NOTE: Trailing comment attachment is not computed yet.
pub attached_to: u32,
/// Whether this comment has a preceding newline.
/// Used to avoid becoming a trailing comment in codegen.
pub preceded_by_newline: bool,
/// Whether this comment has a tailing newline.
pub followed_by_newline: bool,
}
impl Comment {
#[inline]
pub fn new(start: u32, end: u32, kind: CommentKind) -> Self {
let span = Span::new(start, end);
Self {
span,
kind,
position: CommentPosition::Trailing,
attached_to: 0,
preceded_by_newline: false,
followed_by_newline: false,
}
}
pub fn is_line(self) -> bool {
self.kind == CommentKind::Line
}
pub fn is_block(self) -> bool {
self.kind == CommentKind::Block
}
pub fn is_leading(self) -> bool {
self.position == CommentPosition::Leading
}
pub fn is_trailing(self) -> bool {
self.position == CommentPosition::Trailing
}
pub fn real_span(&self) -> Span {
Span::new(self.real_span_start(), self.real_span_end())
}
pub fn real_span_end(&self) -> u32 {
match self.kind {
CommentKind::Line => self.span.end,
// length of `*/`
CommentKind::Block => self.span.end + 2,
}
}
pub fn real_span_start(&self) -> u32 {
self.span.start - 2
}
pub fn is_jsdoc(&self, source_text: &str) -> bool {
self.is_leading() && self.is_block() && self.span.source_text(source_text).starts_with('*')
}
}

View file

@ -41,6 +41,9 @@ pub struct Program<'a> {
pub source_type: SourceType,
#[serde(skip)]
pub source_text: &'a str,
/// Sorted comments
#[serde(skip)]
pub comments: Vec<'a, Comment>,
pub hashbang: Option<Hashbang<'a>>,
pub directives: Vec<'a, Directive<'a>>,
pub body: Vec<'a, Statement<'a>>,

View file

@ -175,6 +175,7 @@
//!
//! If you are seeing compile-time errors in `src/ast/macros.rs`, this will be the cause.
pub(crate) mod comment;
pub(crate) mod js;
pub(crate) mod jsx;
pub(crate) mod literal;
@ -191,4 +192,4 @@ pub use oxc_syntax::{
},
};
pub use self::{js::*, jsx::*, literal::*, ts::*};
pub use self::{comment::*, js::*, jsx::*, literal::*, ts::*};

View file

@ -55,15 +55,16 @@ const _: () = {
assert!(offset_of!(StringLiteral, span) == 0usize);
assert!(offset_of!(StringLiteral, value) == 8usize);
assert!(size_of::<Program>() == 128usize);
assert!(size_of::<Program>() == 160usize);
assert!(align_of::<Program>() == 8usize);
assert!(offset_of!(Program, span) == 0usize);
assert!(offset_of!(Program, source_type) == 8usize);
assert!(offset_of!(Program, source_text) == 16usize);
assert!(offset_of!(Program, hashbang) == 32usize);
assert!(offset_of!(Program, directives) == 56usize);
assert!(offset_of!(Program, body) == 88usize);
assert!(offset_of!(Program, scope_id) == 120usize);
assert!(offset_of!(Program, comments) == 32usize);
assert!(offset_of!(Program, hashbang) == 64usize);
assert!(offset_of!(Program, directives) == 88usize);
assert!(offset_of!(Program, body) == 120usize);
assert!(offset_of!(Program, scope_id) == 152usize);
assert!(size_of::<Expression>() == 16usize);
assert!(align_of::<Expression>() == 8usize);
@ -1375,6 +1376,21 @@ const _: () = {
assert!(offset_of!(JSXText, span) == 0usize);
assert!(offset_of!(JSXText, value) == 8usize);
assert!(size_of::<CommentKind>() == 1usize);
assert!(align_of::<CommentKind>() == 1usize);
assert!(size_of::<CommentPosition>() == 1usize);
assert!(align_of::<CommentPosition>() == 1usize);
assert!(size_of::<Comment>() == 20usize);
assert!(align_of::<Comment>() == 4usize);
assert!(offset_of!(Comment, span) == 0usize);
assert!(offset_of!(Comment, kind) == 8usize);
assert!(offset_of!(Comment, position) == 9usize);
assert!(offset_of!(Comment, attached_to) == 12usize);
assert!(offset_of!(Comment, preceded_by_newline) == 16usize);
assert!(offset_of!(Comment, followed_by_newline) == 17usize);
assert!(size_of::<NumberBase>() == 1usize);
assert!(align_of::<NumberBase>() == 1usize);
@ -1598,15 +1614,16 @@ const _: () = {
assert!(offset_of!(StringLiteral, span) == 0usize);
assert!(offset_of!(StringLiteral, value) == 8usize);
assert!(size_of::<Program>() == 72usize);
assert!(size_of::<Program>() == 88usize);
assert!(align_of::<Program>() == 4usize);
assert!(offset_of!(Program, span) == 0usize);
assert!(offset_of!(Program, source_type) == 8usize);
assert!(offset_of!(Program, source_text) == 12usize);
assert!(offset_of!(Program, hashbang) == 20usize);
assert!(offset_of!(Program, directives) == 36usize);
assert!(offset_of!(Program, body) == 52usize);
assert!(offset_of!(Program, scope_id) == 68usize);
assert!(offset_of!(Program, comments) == 20usize);
assert!(offset_of!(Program, hashbang) == 36usize);
assert!(offset_of!(Program, directives) == 52usize);
assert!(offset_of!(Program, body) == 68usize);
assert!(offset_of!(Program, scope_id) == 84usize);
assert!(size_of::<Expression>() == 8usize);
assert!(align_of::<Expression>() == 4usize);
@ -2918,6 +2935,21 @@ const _: () = {
assert!(offset_of!(JSXText, span) == 0usize);
assert!(offset_of!(JSXText, value) == 8usize);
assert!(size_of::<CommentKind>() == 1usize);
assert!(align_of::<CommentKind>() == 1usize);
assert!(size_of::<CommentPosition>() == 1usize);
assert!(align_of::<CommentPosition>() == 1usize);
assert!(size_of::<Comment>() == 20usize);
assert!(align_of::<Comment>() == 4usize);
assert!(offset_of!(Comment, span) == 0usize);
assert!(offset_of!(Comment, kind) == 8usize);
assert!(offset_of!(Comment, position) == 9usize);
assert!(offset_of!(Comment, attached_to) == 12usize);
assert!(offset_of!(Comment, preceded_by_newline) == 16usize);
assert!(offset_of!(Comment, followed_by_newline) == 17usize);
assert!(size_of::<NumberBase>() == 1usize);
assert!(align_of::<NumberBase>() == 1usize);

View file

@ -222,6 +222,7 @@ impl<'a> AstBuilder<'a> {
/// - span: The [`Span`] covering this node
/// - source_type
/// - source_text
/// - comments: Sorted comments
/// - hashbang
/// - directives
/// - body
@ -231,6 +232,7 @@ impl<'a> AstBuilder<'a> {
span: Span,
source_type: SourceType,
source_text: S,
comments: Vec<'a, Comment>,
hashbang: Option<Hashbang<'a>>,
directives: Vec<'a, Directive<'a>>,
body: Vec<'a, Statement<'a>>,
@ -242,6 +244,7 @@ impl<'a> AstBuilder<'a> {
span,
source_type,
source_text: source_text.into_in(self.allocator),
comments,
hashbang,
directives,
body,
@ -257,6 +260,7 @@ impl<'a> AstBuilder<'a> {
/// - span: The [`Span`] covering this node
/// - source_type
/// - source_text
/// - comments: Sorted comments
/// - hashbang
/// - directives
/// - body
@ -266,6 +270,7 @@ impl<'a> AstBuilder<'a> {
span: Span,
source_type: SourceType,
source_text: S,
comments: Vec<'a, Comment>,
hashbang: Option<Hashbang<'a>>,
directives: Vec<'a, Directive<'a>>,
body: Vec<'a, Statement<'a>>,
@ -274,7 +279,7 @@ impl<'a> AstBuilder<'a> {
S: IntoIn<'a, &'a str>,
{
Box::new_in(
self.program(span, source_type, source_text, hashbang, directives, body),
self.program(span, source_type, source_text, comments, hashbang, directives, body),
self.allocator,
)
}

View file

@ -5,6 +5,9 @@
use oxc_allocator::{Allocator, CloneIn};
#[allow(clippy::wildcard_imports)]
use crate::ast::comment::*;
#[allow(clippy::wildcard_imports)]
use crate::ast::js::*;
@ -113,6 +116,7 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for Program<'old_alloc> {
span: CloneIn::clone_in(&self.span, allocator),
source_type: CloneIn::clone_in(&self.source_type, allocator),
source_text: CloneIn::clone_in(&self.source_text, allocator),
comments: CloneIn::clone_in(&self.comments, allocator),
hashbang: CloneIn::clone_in(&self.hashbang, allocator),
directives: CloneIn::clone_in(&self.directives, allocator),
body: CloneIn::clone_in(&self.body, allocator),
@ -4231,3 +4235,37 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for JSXText<'old_alloc> {
}
}
}
impl<'alloc> CloneIn<'alloc> for CommentKind {
type Cloned = CommentKind;
fn clone_in(&self, _: &'alloc Allocator) -> Self::Cloned {
match self {
Self::Line => CommentKind::Line,
Self::Block => CommentKind::Block,
}
}
}
impl<'alloc> CloneIn<'alloc> for CommentPosition {
type Cloned = CommentPosition;
fn clone_in(&self, _: &'alloc Allocator) -> Self::Cloned {
match self {
Self::Leading => CommentPosition::Leading,
Self::Trailing => CommentPosition::Trailing,
}
}
}
impl<'alloc> CloneIn<'alloc> for Comment {
type Cloned = Comment;
fn clone_in(&self, allocator: &'alloc Allocator) -> Self::Cloned {
Comment {
span: CloneIn::clone_in(&self.span, allocator),
kind: CloneIn::clone_in(&self.kind, allocator),
position: CloneIn::clone_in(&self.position, allocator),
attached_to: CloneIn::clone_in(&self.attached_to, allocator),
preceded_by_newline: CloneIn::clone_in(&self.preceded_by_newline, allocator),
followed_by_newline: CloneIn::clone_in(&self.followed_by_newline, allocator),
}
}
}

View file

@ -5,6 +5,9 @@
use oxc_span::cmp::ContentEq;
#[allow(clippy::wildcard_imports)]
use crate::ast::comment::*;
#[allow(clippy::wildcard_imports)]
use crate::ast::js::*;
@ -93,6 +96,7 @@ impl<'a> ContentEq for Program<'a> {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.source_type, &other.source_type)
&& ContentEq::content_eq(&self.source_text, &other.source_text)
&& ContentEq::content_eq(&self.comments, &other.comments)
&& ContentEq::content_eq(&self.hashbang, &other.hashbang)
&& ContentEq::content_eq(&self.directives, &other.directives)
&& ContentEq::content_eq(&self.body, &other.body)
@ -4212,3 +4216,25 @@ impl<'a> ContentEq for JSXText<'a> {
ContentEq::content_eq(&self.value, &other.value)
}
}
impl ContentEq for CommentKind {
fn content_eq(&self, other: &Self) -> bool {
self == other
}
}
impl ContentEq for CommentPosition {
fn content_eq(&self, other: &Self) -> bool {
self == other
}
}
impl ContentEq for Comment {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.kind, &other.kind)
&& ContentEq::content_eq(&self.position, &other.position)
&& ContentEq::content_eq(&self.attached_to, &other.attached_to)
&& ContentEq::content_eq(&self.preceded_by_newline, &other.preceded_by_newline)
&& ContentEq::content_eq(&self.followed_by_newline, &other.followed_by_newline)
}
}

View file

@ -7,6 +7,9 @@ use std::{hash::Hasher, mem::discriminant};
use oxc_span::hash::ContentHash;
#[allow(clippy::wildcard_imports)]
use crate::ast::comment::*;
#[allow(clippy::wildcard_imports)]
use crate::ast::js::*;
@ -71,6 +74,7 @@ impl<'a> ContentHash for Program<'a> {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&self.source_type, state);
ContentHash::content_hash(&self.source_text, state);
ContentHash::content_hash(&self.comments, state);
ContentHash::content_hash(&self.hashbang, state);
ContentHash::content_hash(&self.directives, state);
ContentHash::content_hash(&self.body, state);
@ -2374,3 +2378,25 @@ impl<'a> ContentHash for JSXText<'a> {
ContentHash::content_hash(&self.value, state);
}
}
impl ContentHash for CommentKind {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&discriminant(self), state);
}
}
impl ContentHash for CommentPosition {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&discriminant(self), state);
}
}
impl ContentHash for Comment {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&self.kind, state);
ContentHash::content_hash(&self.position, state);
ContentHash::content_hash(&self.attached_to, state);
ContentHash::content_hash(&self.preceded_by_newline, state);
ContentHash::content_hash(&self.followed_by_newline, state);
}
}

View file

@ -57,10 +57,11 @@ pub use generated::{ast_builder, ast_kind};
pub use num_bigint::BigUint;
pub use crate::{
ast::comment::{Comment, CommentKind, CommentPosition},
ast_builder::AstBuilder,
ast_builder_impl::NONE,
ast_kind::{AstKind, AstType},
trivia::{Comment, CommentKind, CommentPosition, SortedComments, Trivias},
trivia::{SortedComments, Trivias},
visit::{Visit, VisitMut},
};

View file

@ -8,107 +8,7 @@ use std::{
use oxc_span::Span;
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum CommentKind {
Line,
Block,
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum CommentPosition {
/// Comments prior to a token until another token or trailing comment.
///
/// e.g.
///
/// ```
/// /* leading */ token;
/// /* leading */
/// // leading
/// token;
/// ```
Leading,
/// Comments tailing a token until a newline.
/// e.g. `token /* trailing */ // trailing`
Trailing,
}
/// Single or multiline comment
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub struct Comment {
/// The span of the comment text (without leading/trailing delimiters).
pub span: Span,
/// Line or block comment
pub kind: CommentKind,
/// Leading or trailing comment
pub position: CommentPosition,
/// Start of token this leading comment is attached to.
/// `/* Leading */ token`
/// ^ This start
/// NOTE: Trailing comment attachment is not computed yet.
pub attached_to: u32,
/// Whether this comment has a preceding newline.
/// Used to avoid becoming a trailing comment in codegen.
pub preceded_by_newline: bool,
/// Whether this comment has a tailing newline.
pub followed_by_newline: bool,
}
impl Comment {
#[inline]
pub fn new(start: u32, end: u32, kind: CommentKind) -> Self {
let span = Span::new(start, end);
Self {
span,
kind,
position: CommentPosition::Trailing,
attached_to: 0,
preceded_by_newline: false,
followed_by_newline: false,
}
}
pub fn is_line(self) -> bool {
self.kind == CommentKind::Line
}
pub fn is_block(self) -> bool {
self.kind == CommentKind::Block
}
pub fn is_leading(self) -> bool {
self.position == CommentPosition::Leading
}
pub fn is_trailing(self) -> bool {
self.position == CommentPosition::Trailing
}
pub fn real_span(&self) -> Span {
Span::new(self.real_span_start(), self.real_span_end())
}
pub fn real_span_end(&self) -> u32 {
match self.kind {
CommentKind::Line => self.span.end,
// length of `*/`
CommentKind::Block => self.span.end + 2,
}
}
pub fn real_span_start(&self) -> u32 {
self.span.start - 2
}
pub fn is_jsdoc(&self, source_text: &str) -> bool {
self.is_leading() && self.is_block() && self.span.source_text(source_text).starts_with('*')
}
}
use crate::ast::comment::*;
/// Sorted set of unique trivia comments, in ascending order by starting position.
pub type SortedComments = Box<[Comment]>;

View file

@ -92,8 +92,15 @@ impl<'a> IsolatedDeclarations<'a> {
let source_type = SourceType::d_ts();
let directives = self.ast.vec();
let stmts = self.transform_program(program);
let program =
self.ast.program(SPAN, source_type, program.source_text, None, directives, stmts);
let program = self.ast.program(
SPAN,
source_type,
program.source_text,
self.ast.vec_from_iter(program.comments.iter().copied()),
None,
directives,
stmts,
);
IsolatedDeclarationsReturn { program, errors: self.take_errors() }
}

View file

@ -1,4 +1,7 @@
use oxc_ast::{Comment, CommentKind, CommentPosition, Trivias};
use oxc_ast::{
ast::{Comment, CommentKind, CommentPosition},
Trivias,
};
use oxc_span::Span;
use super::{Kind, Token};

View file

@ -405,6 +405,7 @@ impl<'a> ParserImpl<'a> {
Span::default(),
self.source_type,
self.source_text,
self.ast.vec(),
None,
self.ast.vec(),
self.ast.vec(),
@ -451,10 +452,12 @@ impl<'a> ParserImpl<'a> {
self.set_source_type_to_script_if_unambiguous();
let span = Span::new(0, self.source_text.len() as u32);
let comments = self.ast.vec_from_iter(self.lexer.trivia_builder.comments.iter().copied());
Ok(self.ast.program(
span,
self.source_type,
self.source_text,
comments,
hashbang,
directives,
statements,
@ -537,7 +540,7 @@ impl<'a> ParserImpl<'a> {
mod test {
use std::path::Path;
use oxc_ast::{ast::Expression, CommentKind};
use oxc_ast::ast::{CommentKind, Expression};
use super::*;

View file

@ -1,6 +1,6 @@
use rustc_hash::FxHashMap;
use oxc_ast::{AstKind, Comment, Trivias};
use oxc_ast::{ast::Comment, AstKind, Trivias};
use oxc_span::{GetSpan, Span};
use crate::jsdoc::JSDocFinder;

View file

@ -2202,6 +2202,7 @@ impl<'a, 't> Ancestor<'a, 't> {
pub(crate) const OFFSET_PROGRAM_SPAN: usize = offset_of!(Program, span);
pub(crate) const OFFSET_PROGRAM_SOURCE_TYPE: usize = offset_of!(Program, source_type);
pub(crate) const OFFSET_PROGRAM_SOURCE_TEXT: usize = offset_of!(Program, source_text);
pub(crate) const OFFSET_PROGRAM_COMMENTS: usize = offset_of!(Program, comments);
pub(crate) const OFFSET_PROGRAM_HASHBANG: usize = offset_of!(Program, hashbang);
pub(crate) const OFFSET_PROGRAM_DIRECTIVES: usize = offset_of!(Program, directives);
pub(crate) const OFFSET_PROGRAM_BODY: usize = offset_of!(Program, body);
@ -2230,6 +2231,11 @@ impl<'a, 't> ProgramWithoutHashbang<'a, 't> {
unsafe { &*((self.0 as *const u8).add(OFFSET_PROGRAM_SOURCE_TEXT) as *const &'a str) }
}
#[inline]
pub fn comments(self) -> &'t Vec<'a, Comment> {
unsafe { &*((self.0 as *const u8).add(OFFSET_PROGRAM_COMMENTS) as *const Vec<'a, Comment>) }
}
#[inline]
pub fn directives(self) -> &'t Vec<'a, Directive<'a>> {
unsafe {
@ -2276,6 +2282,11 @@ impl<'a, 't> ProgramWithoutDirectives<'a, 't> {
unsafe { &*((self.0 as *const u8).add(OFFSET_PROGRAM_SOURCE_TEXT) as *const &'a str) }
}
#[inline]
pub fn comments(self) -> &'t Vec<'a, Comment> {
unsafe { &*((self.0 as *const u8).add(OFFSET_PROGRAM_COMMENTS) as *const Vec<'a, Comment>) }
}
#[inline]
pub fn hashbang(self) -> &'t Option<Hashbang<'a>> {
unsafe {
@ -2321,6 +2332,11 @@ impl<'a, 't> ProgramWithoutBody<'a, 't> {
unsafe { &*((self.0 as *const u8).add(OFFSET_PROGRAM_SOURCE_TEXT) as *const &'a str) }
}
#[inline]
pub fn comments(self) -> &'t Vec<'a, Comment> {
unsafe { &*((self.0 as *const u8).add(OFFSET_PROGRAM_COMMENTS) as *const Vec<'a, Comment>) }
}
#[inline]
pub fn hashbang(self) -> &'t Option<Hashbang<'a>> {
unsafe {

View file

@ -31,6 +31,7 @@ static SOURCE_PATHS: &[&str] = &[
"crates/oxc_ast/src/ast/js.rs",
"crates/oxc_ast/src/ast/ts.rs",
"crates/oxc_ast/src/ast/jsx.rs",
"crates/oxc_ast/src/ast/comment.rs",
"crates/oxc_syntax/src/number.rs",
"crates/oxc_syntax/src/operator.rs",
"crates/oxc_span/src/span/types.rs",