fix: comments gen regression (#5003)

try to fix: https://github.com/rolldown/rolldown/issues/2013
1. Before we only considering the ast is untouched, but considering the
scenario.
```js

const a = /*__PURE__*/ test(),
//    ^^^              ^^^^^^ is removed during transform
	b = a();

```
Then according to the previous algorithm, `PURE` will attach to `b =
a()`
2. Now, we try to attach comments as much as possible unless the
comments are separated by comments, for the case above, `PURE` will not
be attached to `a()` since the content between `b = a()` and `/*
__PURE__*/` is not all whitespace.
3. we added back `MoveMap`, for the special case 
```js
/*__NODE_SIDE_EFFECTS__*/ export const c = 100;
// ^^^^^^^^^^^^^^^^^^^^^         should be attached to first declarator,
//                        ^^^^^^ are not whitespace

```
This commit is contained in:
IWANABETHATGUY 2024-08-20 18:57:12 +08:00 committed by GitHub
parent cd9cf5efd8
commit b7db235065
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 113 additions and 34 deletions

1
Cargo.lock generated
View file

@ -1504,6 +1504,7 @@ dependencies = [
"oxc_span",
"oxc_syntax",
"pico-args",
"rustc-hash",
]
[[package]]

View file

@ -22,6 +22,20 @@ impl Comment {
let span = Span::new(start, end);
Self { kind, span }
}
pub fn real_span_end(&self) -> u32 {
match self.kind {
CommentKind::SingleLine => self.span.end,
// length of `*/`
CommentKind::MultiLine => self.span.end + 2,
}
}
pub fn real_span_start(&self) -> u32 {
match self.kind {
CommentKind::SingleLine | CommentKind::MultiLine => self.span.start - 2,
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]

View file

@ -28,10 +28,11 @@ oxc_sourcemap = { workspace = true }
oxc_mangler = { workspace = true }
oxc_index = { workspace = true }
bitflags = { workspace = true }
nonmax = { workspace = true }
once_cell = { workspace = true }
daachorse = { workspace = true }
bitflags = { workspace = true }
nonmax = { workspace = true }
once_cell = { workspace = true }
daachorse = { workspace = true }
rustc-hash = { workspace = true }
[dev-dependencies]
oxc_parser = { workspace = true }

View file

@ -53,7 +53,19 @@ impl<'a> Codegen<'a> {
if !self.comment_options.preserve_annotate_comments {
return vec![];
}
self.get_leading_comments(self.latest_consumed_comment_end, node_start)
let mut latest_comment_start = node_start;
let mut ret = self
.get_leading_comments(self.latest_consumed_comment_end, node_start)
.rev()
// each comment should be separated by whitespaces
.take_while(|comment| {
let comment_end = comment.real_span_end();
let range_content =
&self.source_text[comment_end as usize..latest_comment_start as usize];
let all_whitespace = range_content.chars().all(char::is_whitespace);
latest_comment_start = comment.real_span_start();
all_whitespace
})
.filter_map(|comment| {
let source_code = self.source_text;
let comment_content =
@ -68,7 +80,9 @@ impl<'a> Codegen<'a> {
}
None
})
.collect::<Vec<_>>()
.collect::<Vec<_>>();
ret.reverse();
ret
}
pub(crate) fn print_comment(&mut self, comment: AnnotationComment) {
@ -85,11 +99,12 @@ impl<'a> Codegen<'a> {
// in this example, `Object.getOwnPropertyNames(Symbol)` and `Object.getOwnPropertyNames(Symbol).filter()`, `Object.getOwnPropertyNames(Symbol).filter().map()`
// share the same leading comment. since they both are call expr and has same span start, we need to avoid print the same comment multiple times.
let comment_span = comment.span();
if self.latest_consumed_comment_end >= comment_span.end {
let real_span_end = comment.comment.real_span_end();
if self.latest_consumed_comment_end >= real_span_end {
return;
}
let real_comment_end = match comment.kind() {
self.update_last_consumed_comment_end(real_span_end);
match comment.kind() {
CommentKind::SingleLine => {
self.print_str("//");
self.print_range_of_source_code(
@ -97,7 +112,6 @@ impl<'a> Codegen<'a> {
);
self.print_soft_newline();
self.print_indent();
comment_span.end
}
CommentKind::MultiLine => {
self.print_str("/*");
@ -106,10 +120,8 @@ impl<'a> Codegen<'a> {
);
self.print_str("*/");
self.print_soft_space();
comment_span.end + 2
}
};
self.update_last_consumed_comment_end(real_comment_end);
}
// FIXME: esbuild function `restoreExprStartFlags`
self.start_of_default_export = self.code_len();
}
@ -118,25 +130,26 @@ impl<'a> Codegen<'a> {
if !self.comment_options.preserve_annotate_comments {
return;
}
let annotation_kind_set = AnnotationKind::empty();
let mut annotation_kind_set = AnnotationKind::empty();
if let Some(comments) = self.try_take_moved_comment(node_start) {
self.print_comments(&comments, &mut annotation_kind_set);
}
let leading_annotate_comments = self.get_leading_annotate_comments(node_start);
self.print_comments(&leading_annotate_comments, annotation_kind_set);
self.print_comments(&leading_annotate_comments, &mut annotation_kind_set);
}
#[inline]
pub(crate) fn print_comments(
&mut self,
leading_annotate_comment: &Vec<AnnotationComment>,
mut annotation_kind_set: AnnotationKind,
annotation_kind_set: &mut AnnotationKind,
) {
for &comment in leading_annotate_comment {
let kind = comment.annotation_kind();
if !annotation_kind_set.intersects(kind) {
if !annotation_kind_set.contains(kind) {
annotation_kind_set.insert(kind);
self.print_comment(comment);
}
self.update_last_consumed_comment_end(comment.span().end);
}
}

View file

@ -155,10 +155,6 @@ impl<'a> Gen for Statement<'a> {
p.print_semicolon_after_statement();
}
}
if p.comment_options.preserve_annotate_comments {
p.update_last_consumed_comment_end(self.span().end);
}
}
}
@ -592,9 +588,17 @@ impl<'a> Gen for VariableDeclaration<'a> {
}
if p.comment_options.preserve_annotate_comments
&& !matches!(self.kind, VariableDeclarationKind::Const)
&& matches!(self.kind, VariableDeclarationKind::Const)
{
p.update_last_consumed_comment_end(self.span.start);
if let Some(declarator) = self.declarations.first() {
if let Some(ref init) = declarator.init {
let leading_annotate_comments =
p.get_leading_annotate_comments(self.span.start);
if !leading_annotate_comments.is_empty() {
p.move_comments(init.span().start, leading_annotate_comments);
}
}
}
}
p.print_str(match self.kind {
VariableDeclarationKind::Const => "const",
@ -864,9 +868,17 @@ impl<'a> Gen for ExportNamedDeclaration<'a> {
p.gen_comments(self.span.start);
}
Some(Declaration::VariableDeclaration(var_decl))
if !matches!(var_decl.kind, VariableDeclarationKind::Const) =>
if matches!(var_decl.kind, VariableDeclarationKind::Const) =>
{
p.update_last_consumed_comment_end(self.span.start);
if let Some(declarator) = var_decl.declarations.first() {
if let Some(ref init) = declarator.init {
let leading_annotate_comments =
p.get_leading_annotate_comments(self.span.start);
if !leading_annotate_comments.is_empty() {
p.move_comments(init.span().start, leading_annotate_comments);
}
}
}
}
_ => {}
};
@ -1062,9 +1074,6 @@ impl<'a> GenExpr for Expression<'a> {
Self::TSNonNullExpression(e) => e.gen_expr(p, precedence, ctx),
Self::TSInstantiationExpression(e) => e.gen_expr(p, precedence, ctx),
}
if p.comment_options.preserve_annotate_comments {
p.update_last_consumed_comment_end(self.span().end);
}
}
}
@ -1409,7 +1418,7 @@ impl<'a> GenExpr for CallExpression<'a> {
wrap = true;
}
p.wrap(wrap, |p| {
p.print_comments(&annotate_comments, AnnotationKind::empty());
p.print_comments(&annotate_comments, &mut AnnotationKind::empty());
p.add_source_mapping(self.span.start);
self.callee.gen_expr(p, Precedence::Postfix, Context::empty());
if self.optional {
@ -2078,7 +2087,7 @@ impl<'a> GenExpr for NewExpression<'a> {
wrap = true;
}
p.wrap(wrap, |p| {
p.print_comments(&annotate_comment, AnnotationKind::empty());
p.print_comments(&annotate_comment, &mut AnnotationKind::empty());
p.print_space_before_identifier();
p.add_source_mapping(self.span.start);
p.print_str("new ");

View file

@ -10,7 +10,7 @@ mod gen;
mod operator;
mod sourcemap_builder;
use std::{borrow::Cow, ops::Range};
use std::{borrow::Cow, collections::hash_map::Entry, ops::Range};
use oxc_ast::{
ast::{BindingIdentifier, BlockStatement, Expression, IdentifierReference, Program, Statement},
@ -23,6 +23,7 @@ use oxc_syntax::{
operator::{BinaryOperator, UnaryOperator, UpdateOperator},
precedence::Precedence,
};
use rustc_hash::FxHashMap;
use crate::{
binary_expr_visitor::BinaryExpressionVisitor, operator::Operator,
@ -33,6 +34,8 @@ pub use crate::{
gen::{Gen, GenExpr},
};
use self::annotation_comment::AnnotationComment;
/// Code generator without whitespace removal.
pub type CodeGenerator<'a> = Codegen<'a>;
@ -95,7 +98,13 @@ pub struct Codegen<'a> {
sourcemap_builder: Option<SourcemapBuilder>,
latest_consumed_comment_end: u32,
/// The key of map is the node start position,
/// the first element of value is the start of the comment
/// the second element of value includes the end of the comment and comment kind.
move_comment_map: MoveCommentMap,
}
pub(crate) type MoveCommentMap = FxHashMap<u32, Vec<AnnotationComment>>;
impl<'a> Default for Codegen<'a> {
fn default() -> Self {
@ -140,6 +149,7 @@ impl<'a> Codegen<'a> {
quote: b'"',
sourcemap_builder: None,
latest_consumed_comment_end: 0,
move_comment_map: MoveCommentMap::default(),
}
}
@ -516,7 +526,38 @@ impl<'a> Codegen<'a> {
self.code.extend_from_slice(self.source_text[range].as_bytes());
}
fn get_leading_comments(&self, start: u32, end: u32) -> impl Iterator<Item = &'_ Comment> + '_ {
fn get_leading_comments(
&self,
start: u32,
end: u32,
) -> impl DoubleEndedIterator<Item = &'_ Comment> + '_ {
self.trivias.comments_range(start..end)
}
/// In some scenario, we want to move the comment that should be codegened to another position.
/// ```js
/// /* @__NO_SIDE_EFFECTS__ */ export const a = function() {
///
/// }, b = 10000;
///
/// ```
/// should generate such output:
/// ```js
/// export const /* @__NO_SIDE_EFFECTS__ */ a = function() {
///
/// }, b = 10000;
/// ```
fn move_comments(&mut self, position: u32, full_comment_infos: Vec<AnnotationComment>) {
match self.move_comment_map.entry(position) {
Entry::Occupied(mut occ) => {
occ.get_mut().extend(full_comment_infos);
}
Entry::Vacant(vac) => {
vac.insert(full_comment_infos);
}
}
}
fn try_take_moved_comment(&mut self, node_start: u32) -> Option<Vec<AnnotationComment>> {
self.move_comment_map.remove(&node_start)
}
}