mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
feat(parser): calculate leading and trailing position for comments (#5785)
This commit is contained in:
parent
31e9db442d
commit
8e7556f842
6 changed files with 211 additions and 16 deletions
|
|
@ -61,7 +61,7 @@ pub use crate::{
|
||||||
ast_builder::AstBuilder,
|
ast_builder::AstBuilder,
|
||||||
ast_builder_impl::NONE,
|
ast_builder_impl::NONE,
|
||||||
ast_kind::{AstKind, AstType},
|
ast_kind::{AstKind, AstType},
|
||||||
trivia::{Comment, CommentKind, SortedComments, Trivias},
|
trivia::{Comment, CommentKind, CommentPosition, SortedComments, Trivias},
|
||||||
visit::{Visit, VisitMut},
|
visit::{Visit, VisitMut},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,20 +14,63 @@ pub enum CommentKind {
|
||||||
Block,
|
Block,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
||||||
|
pub enum CommentPosition {
|
||||||
|
/// Comments prior to a token until another token or trailing comment.
|
||||||
|
///
|
||||||
|
/// e.g.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// /* leading */ token;
|
||||||
|
/// /* leading */
|
||||||
|
/// // leading
|
||||||
|
/// token;
|
||||||
|
/// ```
|
||||||
|
Leading,
|
||||||
|
|
||||||
|
/// Comments tailing a token until a newline.
|
||||||
|
/// e.g. `token /* trailing */ // trailing`
|
||||||
|
Trailing,
|
||||||
|
}
|
||||||
|
|
||||||
/// Single or multiline comment
|
/// Single or multiline comment
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
||||||
pub struct Comment {
|
pub struct Comment {
|
||||||
/// The span of the comment text (without leading/trailing delimiters).
|
/// The span of the comment text (without leading/trailing delimiters).
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
|
|
||||||
|
/// Line or block comment
|
||||||
pub kind: CommentKind,
|
pub kind: CommentKind,
|
||||||
|
|
||||||
|
/// Leading or trailing comment
|
||||||
|
pub position: CommentPosition,
|
||||||
|
|
||||||
|
/// Start of token this leading comment is attached to.
|
||||||
|
/// `/* Leading */ token`
|
||||||
|
/// ^ This start
|
||||||
|
/// NOTE: Trailing comment attachment is not computed yet.
|
||||||
|
pub attached_to: u32,
|
||||||
|
|
||||||
|
/// Whether this comment has a preceding newline.
|
||||||
|
/// Used to avoid becoming a trailing comment in codegen.
|
||||||
|
pub preceded_by_newline: bool,
|
||||||
|
|
||||||
|
/// Whether this comment has a tailing newline.
|
||||||
|
pub followed_by_newline: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Comment {
|
impl Comment {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new(start: u32, end: u32, kind: CommentKind) -> Self {
|
pub fn new(start: u32, end: u32, kind: CommentKind) -> Self {
|
||||||
let span = Span::new(start, end);
|
let span = Span::new(start, end);
|
||||||
Self { span, kind }
|
Self {
|
||||||
|
span,
|
||||||
|
kind,
|
||||||
|
position: CommentPosition::Trailing,
|
||||||
|
attached_to: 0,
|
||||||
|
preceded_by_newline: false,
|
||||||
|
followed_by_newline: false,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_line(self) -> bool {
|
pub fn is_line(self) -> bool {
|
||||||
|
|
@ -38,6 +81,14 @@ impl Comment {
|
||||||
self.kind == CommentKind::Block
|
self.kind == CommentKind::Block
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_leading(self) -> bool {
|
||||||
|
self.position == CommentPosition::Leading
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_trailing(self) -> bool {
|
||||||
|
self.position == CommentPosition::Trailing
|
||||||
|
}
|
||||||
|
|
||||||
pub fn real_span(&self) -> Span {
|
pub fn real_span(&self) -> Span {
|
||||||
Span::new(self.real_span_start(), self.real_span_end())
|
Span::new(self.real_span_start(), self.real_span_end())
|
||||||
}
|
}
|
||||||
|
|
@ -55,8 +106,6 @@ impl Comment {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CommentKind {}
|
|
||||||
|
|
||||||
/// Sorted set of unique trivia comments, in ascending order by starting position.
|
/// Sorted set of unique trivia comments, in ascending order by starting position.
|
||||||
pub type SortedComments = Box<[Comment]>;
|
pub type SortedComments = Box<[Comment]>;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ impl<'a> Lexer<'a> {
|
||||||
if next_byte != LS_OR_PS_FIRST {
|
if next_byte != LS_OR_PS_FIRST {
|
||||||
// `\r` or `\n`
|
// `\r` or `\n`
|
||||||
self.trivia_builder
|
self.trivia_builder
|
||||||
.add_single_line_comment(self.token.start, self.source.offset_of(pos));
|
.add_line_comment(self.token.start, self.source.offset_of(pos));
|
||||||
// SAFETY: Safe to consume `\r` or `\n` as both are ASCII
|
// SAFETY: Safe to consume `\r` or `\n` as both are ASCII
|
||||||
pos = unsafe { pos.add(1) };
|
pos = unsafe { pos.add(1) };
|
||||||
// We've found the end. Do not continue searching.
|
// We've found the end. Do not continue searching.
|
||||||
|
|
@ -50,7 +50,7 @@ impl<'a> Lexer<'a> {
|
||||||
if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) {
|
if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) {
|
||||||
// Irregular line break
|
// Irregular line break
|
||||||
self.trivia_builder
|
self.trivia_builder
|
||||||
.add_single_line_comment(self.token.start, self.source.offset_of(pos));
|
.add_line_comment(self.token.start, self.source.offset_of(pos));
|
||||||
// Advance `pos` to after this char.
|
// Advance `pos` to after this char.
|
||||||
// SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
|
// SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
|
||||||
// so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
|
// so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
|
||||||
|
|
@ -69,7 +69,7 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
handle_eof: {
|
handle_eof: {
|
||||||
self.trivia_builder.add_single_line_comment(self.token.start, self.offset());
|
self.trivia_builder.add_line_comment(self.token.start, self.offset());
|
||||||
return Kind::Skip;
|
return Kind::Skip;
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
@ -145,7 +145,7 @@ impl<'a> Lexer<'a> {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
self.trivia_builder.add_multi_line_comment(self.token.start, self.offset());
|
self.trivia_builder.add_block_comment(self.token.start, self.offset());
|
||||||
Kind::Skip
|
Kind::Skip
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -165,7 +165,7 @@ impl<'a> Lexer<'a> {
|
||||||
if let Some(index) = finder.find(remaining) {
|
if let Some(index) = finder.find(remaining) {
|
||||||
// SAFETY: `pos + index + 2` is end of `*/`, so a valid `SourcePosition`
|
// SAFETY: `pos + index + 2` is end of `*/`, so a valid `SourcePosition`
|
||||||
self.source.set_position(unsafe { pos.add(index + 2) });
|
self.source.set_position(unsafe { pos.add(index + 2) });
|
||||||
self.trivia_builder.add_multi_line_comment(self.token.start, self.offset());
|
self.trivia_builder.add_block_comment(self.token.start, self.offset());
|
||||||
Kind::Skip
|
Kind::Skip
|
||||||
} else {
|
} else {
|
||||||
self.source.advance_to_end();
|
self.source.advance_to_end();
|
||||||
|
|
|
||||||
|
|
@ -218,6 +218,7 @@ impl<'a> Lexer<'a> {
|
||||||
self.token.end = self.offset();
|
self.token.end = self.offset();
|
||||||
debug_assert!(self.token.start <= self.token.end);
|
debug_assert!(self.token.start <= self.token.end);
|
||||||
let token = self.token;
|
let token = self.token;
|
||||||
|
self.trivia_builder.handle_token(token.start);
|
||||||
self.token = Token::default();
|
self.token = Token::default();
|
||||||
token
|
token
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,21 @@
|
||||||
use oxc_ast::{Comment, CommentKind, Trivias};
|
use oxc_ast::{Comment, CommentKind, CommentPosition, Trivias};
|
||||||
use oxc_span::Span;
|
use oxc_span::Span;
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct TriviaBuilder {
|
pub struct TriviaBuilder {
|
||||||
// NOTE(lucab): This is a set of unique comments. Duplicated
|
// This is a set of unique comments. Duplicated
|
||||||
// comments could be generated in case of rewind; they are
|
// comments could be generated in case of rewind; they are
|
||||||
// filtered out at insertion time.
|
// filtered out at insertion time.
|
||||||
pub(crate) comments: Vec<Comment>,
|
pub(crate) comments: Vec<Comment>,
|
||||||
|
|
||||||
irregular_whitespaces: Vec<Span>,
|
irregular_whitespaces: Vec<Span>,
|
||||||
|
|
||||||
|
// states
|
||||||
|
/// index of processed comments
|
||||||
|
processed: usize,
|
||||||
|
|
||||||
|
/// Saw a newline before this position
|
||||||
|
saw_newline: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TriviaBuilder {
|
impl TriviaBuilder {
|
||||||
|
|
@ -15,16 +23,44 @@ impl TriviaBuilder {
|
||||||
Trivias::new(self.comments.into_boxed_slice(), self.irregular_whitespaces)
|
Trivias::new(self.comments.into_boxed_slice(), self.irregular_whitespaces)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_single_line_comment(&mut self, start: u32, end: u32) {
|
pub fn add_irregular_whitespace(&mut self, start: u32, end: u32) {
|
||||||
|
self.irregular_whitespaces.push(Span::new(start, end));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_line_comment(&mut self, start: u32, end: u32) {
|
||||||
// skip leading `//`
|
// skip leading `//`
|
||||||
self.add_comment(Comment::new(start + 2, end, CommentKind::Line));
|
self.add_comment(Comment::new(start + 2, end, CommentKind::Line));
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_multi_line_comment(&mut self, start: u32, end: u32) {
|
pub fn add_block_comment(&mut self, start: u32, end: u32) {
|
||||||
// skip leading `/*` and trailing `*/`
|
// skip leading `/*` and trailing `*/`
|
||||||
self.add_comment(Comment::new(start + 2, end - 2, CommentKind::Block));
|
self.add_comment(Comment::new(start + 2, end - 2, CommentKind::Block));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For block comments only. This function is not called after line comments because the lexer skips
|
||||||
|
// newline after line comments.
|
||||||
|
pub fn handle_newline(&mut self) {
|
||||||
|
// The last unprocessed comment is on a newline.
|
||||||
|
let len = self.comments.len();
|
||||||
|
if self.processed < len {
|
||||||
|
self.comments[len - 1].followed_by_newline = true;
|
||||||
|
}
|
||||||
|
self.saw_newline = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn handle_token(&mut self, token_start: u32) {
|
||||||
|
let len = self.comments.len();
|
||||||
|
if self.processed < len {
|
||||||
|
// All unprocess preceding comments are leading comments attached to this token start.
|
||||||
|
for comment in &mut self.comments[self.processed..] {
|
||||||
|
comment.position = CommentPosition::Leading;
|
||||||
|
comment.attached_to = token_start;
|
||||||
|
}
|
||||||
|
self.processed = len;
|
||||||
|
}
|
||||||
|
self.saw_newline = false;
|
||||||
|
}
|
||||||
|
|
||||||
fn add_comment(&mut self, comment: Comment) {
|
fn add_comment(&mut self, comment: Comment) {
|
||||||
// The comments array is an ordered vec, only add the comment if its not added before,
|
// The comments array is an ordered vec, only add the comment if its not added before,
|
||||||
// to avoid situations where the parser needs to rewind and tries to reinsert the comment.
|
// to avoid situations where the parser needs to rewind and tries to reinsert the comment.
|
||||||
|
|
@ -33,10 +69,118 @@ impl TriviaBuilder {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut comment = comment;
|
||||||
|
// This newly added comment may be preceded by a newline.
|
||||||
|
comment.preceded_by_newline = self.saw_newline;
|
||||||
|
if comment.is_line() {
|
||||||
|
// A line comment is always followed by a newline. This is never set in `handle_newline`.
|
||||||
|
comment.followed_by_newline = true;
|
||||||
|
// A line comment is trailing when it is no preceded by a newline.
|
||||||
|
if !self.saw_newline {
|
||||||
|
self.processed = self.comments.len() + 1; // +1 to include this comment.
|
||||||
|
}
|
||||||
|
self.saw_newline = true;
|
||||||
|
}
|
||||||
|
|
||||||
self.comments.push(comment);
|
self.comments.push(comment);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn add_irregular_whitespace(&mut self, start: u32, end: u32) {
|
#[cfg(test)]
|
||||||
self.irregular_whitespaces.push(Span::new(start, end));
|
mod test {
|
||||||
|
use crate::Parser;
|
||||||
|
use oxc_allocator::Allocator;
|
||||||
|
use oxc_ast::{Comment, CommentKind, CommentPosition};
|
||||||
|
use oxc_span::{SourceType, Span};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn comment_attachments() {
|
||||||
|
let allocator = Allocator::default();
|
||||||
|
let source_type = SourceType::default();
|
||||||
|
let source_text = "
|
||||||
|
/* Leading 1 */
|
||||||
|
// Leading 2
|
||||||
|
/* Leading 3 */ token /* Trailing 1 */ // Trailing 2
|
||||||
|
// Leading of EOF token
|
||||||
|
";
|
||||||
|
let ret = Parser::new(&allocator, source_text, source_type).parse();
|
||||||
|
let comments = ret.trivias.comments().copied().collect::<Vec<_>>();
|
||||||
|
let expected = [
|
||||||
|
Comment {
|
||||||
|
span: Span::new(11, 22),
|
||||||
|
kind: CommentKind::Block,
|
||||||
|
position: CommentPosition::Leading,
|
||||||
|
attached_to: 70,
|
||||||
|
preceded_by_newline: true,
|
||||||
|
followed_by_newline: true,
|
||||||
|
},
|
||||||
|
Comment {
|
||||||
|
span: Span::new(35, 45),
|
||||||
|
kind: CommentKind::Line,
|
||||||
|
position: CommentPosition::Leading,
|
||||||
|
attached_to: 70,
|
||||||
|
preceded_by_newline: true,
|
||||||
|
followed_by_newline: true,
|
||||||
|
},
|
||||||
|
Comment {
|
||||||
|
span: Span::new(56, 67),
|
||||||
|
kind: CommentKind::Block,
|
||||||
|
position: CommentPosition::Leading,
|
||||||
|
attached_to: 70,
|
||||||
|
preceded_by_newline: true,
|
||||||
|
followed_by_newline: false,
|
||||||
|
},
|
||||||
|
Comment {
|
||||||
|
span: Span::new(78, 90),
|
||||||
|
kind: CommentKind::Block,
|
||||||
|
position: CommentPosition::Trailing,
|
||||||
|
attached_to: 0,
|
||||||
|
preceded_by_newline: false,
|
||||||
|
followed_by_newline: false,
|
||||||
|
},
|
||||||
|
Comment {
|
||||||
|
span: Span::new(95, 106),
|
||||||
|
kind: CommentKind::Line,
|
||||||
|
position: CommentPosition::Trailing,
|
||||||
|
attached_to: 0,
|
||||||
|
preceded_by_newline: false,
|
||||||
|
followed_by_newline: true,
|
||||||
|
},
|
||||||
|
Comment {
|
||||||
|
span: Span::new(117, 138),
|
||||||
|
kind: CommentKind::Line,
|
||||||
|
position: CommentPosition::Leading,
|
||||||
|
attached_to: 147,
|
||||||
|
preceded_by_newline: true,
|
||||||
|
followed_by_newline: true,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
assert_eq!(comments.len(), expected.len());
|
||||||
|
for (comment, expected) in comments.iter().copied().zip(expected) {
|
||||||
|
assert_eq!(comment, expected, "{}", comment.real_span().source_text(source_text));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn comment_attachments2() {
|
||||||
|
let allocator = Allocator::default();
|
||||||
|
let source_type = SourceType::default();
|
||||||
|
let source_text = "#!/usr/bin/env node
|
||||||
|
/* Leading 1 */
|
||||||
|
token
|
||||||
|
";
|
||||||
|
let ret = Parser::new(&allocator, source_text, source_type).parse();
|
||||||
|
let comments = ret.trivias.comments().copied().collect::<Vec<_>>();
|
||||||
|
let expected = vec![Comment {
|
||||||
|
span: Span::new(22, 33),
|
||||||
|
kind: CommentKind::Block,
|
||||||
|
position: CommentPosition::Leading,
|
||||||
|
attached_to: 36,
|
||||||
|
preceded_by_newline: false, // hashbang comment always end in newline
|
||||||
|
followed_by_newline: true,
|
||||||
|
}];
|
||||||
|
assert_eq!(comments, expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ static NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE: SafeByteMatchTable =
|
||||||
impl<'a> Lexer<'a> {
|
impl<'a> Lexer<'a> {
|
||||||
pub(super) fn line_break_handler(&mut self) -> Kind {
|
pub(super) fn line_break_handler(&mut self) -> Kind {
|
||||||
self.token.is_on_new_line = true;
|
self.token.is_on_new_line = true;
|
||||||
|
self.trivia_builder.handle_newline();
|
||||||
|
|
||||||
// Indentation is common after a line break.
|
// Indentation is common after a line break.
|
||||||
// Consume it, along with any further line breaks.
|
// Consume it, along with any further line breaks.
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue