refactor(codegen): simplify printing annotation comments (#6027)

Simplify printing annotation comments, reusing the architecture of printing JSDoc.
This commit is contained in:
Dunqing 2024-09-25 06:27:18 +00:00
parent f866781f49
commit fe696f0dec
5 changed files with 111 additions and 279 deletions

View file

@ -1,161 +0,0 @@
use daachorse::DoubleArrayAhoCorasick;
use once_cell::sync::Lazy;
use oxc_ast::{Comment, CommentKind};
use oxc_span::Span;
use crate::Codegen;
static MATCHER: Lazy<DoubleArrayAhoCorasick<usize>> = Lazy::new(|| {
let patterns = vec!["#__NO_SIDE_EFFECTS__", "@__NO_SIDE_EFFECTS__", "@__PURE__", "#__PURE__"];
DoubleArrayAhoCorasick::new(patterns).unwrap()
});
bitflags::bitflags! {
/// In theory this should be a enum,but using bitflags is easy to merge many flags into one
/// bitset, which is used to unique annotation comment in codegen
#[derive(Debug, Default, Clone, Copy)]
pub(crate) struct AnnotationKind: u8 {
const NO_SIDE_EFFECTS = 1 << 0;
const PURE = 1 << 1;
}
}
#[derive(Debug, Clone, Copy)]
pub struct AnnotationComment {
pub(crate) annotation_kind: AnnotationKind,
pub(crate) comment: Comment,
}
impl AnnotationComment {
pub fn annotation_kind(&self) -> AnnotationKind {
self.annotation_kind
}
pub fn span(&self) -> Span {
self.comment.span
}
pub fn kind(&self) -> CommentKind {
self.comment.kind
}
}
impl From<(Comment, AnnotationKind)> for AnnotationComment {
fn from(value: (Comment, AnnotationKind)) -> Self {
Self { annotation_kind: value.1, comment: value.0 }
}
}
impl<'a> Codegen<'a> {
pub(crate) fn get_leading_annotate_comments(
&mut self,
node_start: u32,
) -> Vec<AnnotationComment> {
if !self.preserve_annotate_comments() {
return vec![];
}
let mut latest_comment_start = node_start;
let source_text = self.source_text.unwrap_or_default();
let mut ret = self
.get_leading_comments(self.latest_consumed_comment_end, node_start)
.rev()
// each comment should be separated by whitespaces
.take_while(|comment| {
let comment_end = comment.real_span_end();
let range_content =
&source_text[comment_end as usize..latest_comment_start as usize];
let all_whitespace = range_content.chars().all(char::is_whitespace);
latest_comment_start = comment.real_span_start();
all_whitespace
})
.filter_map(|comment| {
let comment_content =
&source_text[comment.span.start as usize..comment.span.end as usize];
if let Some(m) = MATCHER.find_iter(&comment_content).next() {
let annotation_kind = match m.value() {
0 | 1 => AnnotationKind::NO_SIDE_EFFECTS,
2 | 3 => AnnotationKind::PURE,
_ => unreachable!(),
};
return Some((*comment, annotation_kind).into());
}
None
})
.collect::<Vec<_>>();
ret.reverse();
ret
}
pub(crate) fn print_comment(&mut self, comment: AnnotationComment) {
// ```js
// /*#__PURE__*/
// Object.getOwnPropertyNames(Symbol)
// // ios10.x Object.getOwnPropertyNames(Symbol) can enumerate 'arguments' and 'caller'
// // but accessing them on Symbol leads to TypeError because Symbol is a strict mode
// // function
// .filter(key => key !== 'arguments' && key !== 'caller')
// .map(key => (Symbol)[key])
// .filter(isSymbol),
// ```
// in this example, `Object.getOwnPropertyNames(Symbol)` and `Object.getOwnPropertyNames(Symbol).filter()`, `Object.getOwnPropertyNames(Symbol).filter().map()`
// share the same leading comment. since they both are call expr and has same span start, we need to avoid print the same comment multiple times.
let comment_span = comment.span();
let real_span_end = comment.comment.real_span_end();
if self.latest_consumed_comment_end >= real_span_end {
return;
}
self.update_last_consumed_comment_end(real_span_end);
match comment.kind() {
CommentKind::Line => {
self.print_str("//");
self.print_range_of_source_code(
comment_span.start as usize..comment_span.end as usize,
);
self.print_soft_newline();
self.print_indent();
}
CommentKind::Block => {
self.print_str("/*");
self.print_range_of_source_code(
comment_span.start as usize..comment_span.end as usize,
);
self.print_str("*/");
self.print_soft_space();
}
}
// FIXME: esbuild function `restoreExprStartFlags`
self.start_of_default_export = self.code_len();
}
pub(crate) fn gen_comments(&mut self, node_start: u32) {
if !self.preserve_annotate_comments() {
return;
}
let mut annotation_kind_set = AnnotationKind::empty();
if let Some(comments) = self.try_take_moved_comment(node_start) {
self.print_comments(&comments, &mut annotation_kind_set);
}
let leading_annotate_comments = self.get_leading_annotate_comments(node_start);
self.print_comments(&leading_annotate_comments, &mut annotation_kind_set);
}
#[inline]
pub(crate) fn print_comments(
&mut self,
leading_annotate_comment: &Vec<AnnotationComment>,
annotation_kind_set: &mut AnnotationKind,
) {
for &comment in leading_annotate_comment {
let kind = comment.annotation_kind();
if !annotation_kind_set.contains(kind) {
annotation_kind_set.insert(kind);
self.print_comment(comment);
}
}
}
#[inline]
pub fn update_last_consumed_comment_end(&mut self, end: u32) {
self.latest_consumed_comment_end = self.latest_consumed_comment_end.max(end);
}
}

View file

@ -1,3 +1,5 @@
use daachorse::DoubleArrayAhoCorasick;
use once_cell::sync::Lazy;
use rustc_hash::FxHashMap;
use oxc_ast::{Comment, CommentKind, Trivias};
@ -5,23 +7,35 @@ use oxc_syntax::identifier::is_line_terminator;
use crate::Codegen;
pub static ANNOTATION_MATCHER: Lazy<DoubleArrayAhoCorasick<usize>> = Lazy::new(|| {
let patterns = vec!["#__NO_SIDE_EFFECTS__", "@__NO_SIDE_EFFECTS__", "@__PURE__", "#__PURE__"];
DoubleArrayAhoCorasick::new(patterns).unwrap()
});
pub type CommentsMap = FxHashMap</* attached_to */ u32, Vec<Comment>>;
impl<'a> Codegen<'a> {
pub(crate) fn build_leading_comments(&mut self, source_text: &str, trivias: &Trivias) {
let mut leading_comments: CommentsMap = FxHashMap::default();
for comment in trivias
.comments()
.copied()
.filter(|comment| Self::should_keep_comment(comment, source_text))
{
leading_comments.entry(comment.attached_to).or_default().push(comment);
}
self.leading_comments = leading_comments;
pub fn preserve_annotate_comments(&self) -> bool {
self.comment_options.preserve_annotate_comments && !self.options.minify
}
fn should_keep_comment(comment: &Comment, source_text: &str) -> bool {
comment.is_jsdoc(source_text)
pub(crate) fn build_comments(&mut self, trivias: &Trivias) {
for comment in trivias.comments().copied() {
self.comments.entry(comment.attached_to).or_default().push(comment);
}
}
pub fn has_annotation_comments(&self, start: u32) -> bool {
let Some(source_text) = self.source_text else { return false };
self.comments.get(&start).is_some_and(|comments| {
comments.iter().any(|comment| Self::is_annotation_comments(comment, source_text))
})
}
/// Weather to keep leading comments.
fn is_leading_comments(comment: &Comment, source_text: &str) -> bool {
(comment.is_jsdoc(source_text) || (comment.is_line() && Self::is_annotation_comments(comment, source_text)))
&& comment.preceded_by_newline
// webpack comment `/*****/`
&& !comment.span.source_text(source_text).chars().all(|c| c == '*')
@ -32,7 +46,13 @@ impl<'a> Codegen<'a> {
return;
}
let Some(source_text) = self.source_text else { return };
let Some(comments) = self.leading_comments.remove(&start) else { return };
let Some(comments) = self.comments.remove(&start) else {
return;
};
let (comments, unused_comments): (Vec<_>, Vec<_>) = comments
.into_iter()
.partition(|comment| Self::is_leading_comments(comment, source_text));
if comments.first().is_some_and(|c| c.preceded_by_newline) {
// Skip printing newline if this comment is already on a newline.
@ -73,5 +93,34 @@ impl<'a> Codegen<'a> {
self.print_hard_newline();
self.print_indent();
}
if !unused_comments.is_empty() {
self.comments.insert(start, unused_comments);
}
}
fn is_annotation_comments(comment: &Comment, source_text: &str) -> bool {
let comment_content = comment.span.source_text(source_text);
ANNOTATION_MATCHER.find_iter(comment_content).count() != 0
}
pub(crate) fn print_annotation_comments(&mut self, node_start: u32) {
if !self.preserve_annotate_comments() {
return;
}
// If there is has annotation comments awaiting move to here, print them.
let start = self.start_of_annotation_comment.take().unwrap_or(node_start);
let Some(source_text) = self.source_text else { return };
let Some(comments) = self.comments.remove(&start) else { return };
for comment in comments {
if !Self::is_annotation_comments(&comment, source_text) {
continue;
}
self.print_str(comment.real_span().source_text(source_text));
self.print_hard_space();
}
}
}

View file

@ -11,7 +11,6 @@ use oxc_syntax::{
};
use crate::{
annotation_comment::AnnotationKind,
binary_expr_visitor::{BinaryExpressionVisitor, Binaryish, BinaryishOperator},
Codegen, Context, Operator,
};
@ -559,19 +558,15 @@ impl<'a> Gen for VariableDeclaration<'a> {
p.print_str("declare ");
}
if p.comment_options.preserve_annotate_comments
if p.preserve_annotate_comments()
&& p.start_of_annotation_comment.is_none()
&& matches!(self.kind, VariableDeclarationKind::Const)
&& matches!(self.declarations.first(), Some(VariableDeclarator { init: Some(init), .. }) if init.is_function())
&& p.has_annotation_comments(self.span.start)
{
if let Some(declarator) = self.declarations.first() {
if let Some(ref init) = declarator.init {
let leading_annotate_comments =
p.get_leading_annotate_comments(self.span.start);
if !leading_annotate_comments.is_empty() {
p.move_comments(init.span().start, leading_annotate_comments);
}
}
}
p.start_of_annotation_comment = Some(self.span.start);
}
p.print_str(match self.kind {
VariableDeclarationKind::Const => "const",
VariableDeclarationKind::Let => "let",
@ -604,6 +599,7 @@ impl<'a> Gen for VariableDeclarator<'a> {
p.print_soft_space();
p.print_equal();
p.print_soft_space();
p.print_annotation_comments(self.span.start);
init.print_expr(p, Precedence::Comma, ctx);
}
}
@ -613,7 +609,7 @@ impl<'a> Gen for Function<'a> {
fn gen(&self, p: &mut Codegen, ctx: Context) {
let n = p.code_len();
let wrap = self.is_expression() && (p.start_of_stmt == n || p.start_of_default_export == n);
p.gen_comments(self.span.start);
p.print_annotation_comments(self.span.start);
p.wrap(wrap, |p| {
p.print_space_before_identifier();
p.add_source_mapping(self.span.start);
@ -829,22 +825,18 @@ impl<'a> Gen for ExportNamedDeclaration<'a> {
p.add_source_mapping(self.span.start);
p.print_indent();
if p.comment_options.preserve_annotate_comments {
if p.preserve_annotate_comments() {
match &self.declaration {
Some(Declaration::FunctionDeclaration(_)) => {
p.gen_comments(self.span.start);
p.print_annotation_comments(self.span.start);
}
Some(Declaration::VariableDeclaration(var_decl))
if matches!(var_decl.kind, VariableDeclarationKind::Const) =>
{
if let Some(declarator) = var_decl.declarations.first() {
if let Some(ref init) = declarator.init {
let leading_annotate_comments =
p.get_leading_annotate_comments(self.span.start);
if !leading_annotate_comments.is_empty() {
p.move_comments(init.span().start, leading_annotate_comments);
}
}
if matches!(var_decl.declarations.first(), Some(VariableDeclarator { init: Some(init), .. }) if init.is_function())
&& p.has_annotation_comments(self.span.start)
{
p.start_of_annotation_comment = Some(self.span.start);
}
}
_ => {}
@ -1374,13 +1366,17 @@ impl<'a> GenExpr for PrivateFieldExpression<'a> {
impl<'a> GenExpr for CallExpression<'a> {
fn gen_expr(&self, p: &mut Codegen, precedence: Precedence, ctx: Context) {
let is_export_default = p.start_of_default_export == p.code_len();
let mut wrap = precedence >= Precedence::New || ctx.intersects(Context::FORBID_CALL);
let annotate_comments = p.get_leading_annotate_comments(self.span.start);
if !annotate_comments.is_empty() && precedence >= Precedence::Postfix {
if p.has_annotation_comments(self.span.start) && precedence >= Precedence::Postfix {
wrap = true;
}
p.wrap(wrap, |p| {
p.print_comments(&annotate_comments, &mut AnnotationKind::empty());
p.print_annotation_comments(self.span.start);
if is_export_default {
p.start_of_default_export = p.code_len();
}
p.add_source_mapping(self.span.start);
self.callee.print_expr(p, Precedence::Postfix, Context::empty());
if self.optional {
@ -1593,7 +1589,7 @@ impl<'a> Gen for PropertyKey<'a> {
impl<'a> GenExpr for ArrowFunctionExpression<'a> {
fn gen_expr(&self, p: &mut Codegen, precedence: Precedence, ctx: Context) {
p.wrap(precedence >= Precedence::Assign, |p| {
p.gen_comments(self.span.start);
p.print_annotation_comments(self.span.start);
if self.r#async {
p.add_source_mapping(self.span.start);
p.print_str("async");
@ -2028,12 +2024,11 @@ impl<'a> GenExpr for ChainExpression<'a> {
impl<'a> GenExpr for NewExpression<'a> {
fn gen_expr(&self, p: &mut Codegen, precedence: Precedence, ctx: Context) {
let mut wrap = precedence >= self.precedence();
let annotate_comment = p.get_leading_annotate_comments(self.span.start);
if !annotate_comment.is_empty() && precedence >= Precedence::Postfix {
if p.has_annotation_comments(self.span.start) && precedence >= Precedence::Postfix {
wrap = true;
}
p.wrap(wrap, |p| {
p.print_comments(&annotate_comment, &mut AnnotationKind::empty());
p.print_annotation_comments(self.span.start);
p.print_space_before_identifier();
p.add_source_mapping(self.span.start);
p.print_str("new ");

View file

@ -3,7 +3,6 @@
//! Code adapted from
//! * [esbuild](https://github.com/evanw/esbuild/blob/main/internal/js_printer/js_printer.go)
mod annotation_comment;
mod binary_expr_visitor;
mod comment;
mod context;
@ -11,11 +10,11 @@ mod gen;
mod operator;
mod sourcemap_builder;
use std::{borrow::Cow, collections::hash_map::Entry, ops::Range};
use std::borrow::Cow;
use oxc_ast::{
ast::{BindingIdentifier, BlockStatement, Expression, IdentifierReference, Program, Statement},
Comment, Trivias,
Trivias,
};
use oxc_mangler::Mangler;
use oxc_span::Span;
@ -24,11 +23,10 @@ use oxc_syntax::{
operator::{BinaryOperator, UnaryOperator, UpdateOperator},
precedence::Precedence,
};
use rustc_hash::FxHashMap;
use crate::{
annotation_comment::AnnotationComment, binary_expr_visitor::BinaryExpressionVisitor,
comment::CommentsMap, operator::Operator, sourcemap_builder::SourcemapBuilder,
binary_expr_visitor::BinaryExpressionVisitor, comment::CommentsMap, operator::Operator,
sourcemap_builder::SourcemapBuilder,
};
pub use crate::{
context::Context,
@ -75,7 +73,21 @@ pub struct Codegen<'a> {
source_text: Option<&'a str>,
trivias: Trivias,
leading_comments: CommentsMap,
comments: CommentsMap,
/// Start of comment that needs to be moved to the before VariableDeclarator
///
/// For example:
/// ```js
/// /* @__NO_SIDE_EFFECTS__ */ export const a = function() {
/// }, b = 10000;
/// ```
/// Should be generated as:
/// ```js
/// export const /* @__NO_SIDE_EFFECTS__ */ a = function() {
/// }, b = 10000;
/// ```
start_of_annotation_comment: Option<u32>,
mangler: Option<Mangler>,
@ -106,15 +118,7 @@ pub struct Codegen<'a> {
// Builders
sourcemap_builder: Option<SourcemapBuilder>,
latest_consumed_comment_end: u32,
/// The key of map is the node start position,
/// the first element of value is the start of the comment
/// the second element of value includes the end of the comment and comment kind.
move_comment_map: MoveCommentMap,
}
pub(crate) type MoveCommentMap = FxHashMap<u32, Vec<AnnotationComment>>;
impl<'a> Default for Codegen<'a> {
fn default() -> Self {
@ -143,7 +147,8 @@ impl<'a> Codegen<'a> {
comment_options: CommentOptions::default(),
source_text: None,
trivias: Trivias::default(),
leading_comments: CommentsMap::default(),
comments: CommentsMap::default(),
start_of_annotation_comment: None,
mangler: None,
code: vec![],
needs_semicolon: false,
@ -159,8 +164,6 @@ impl<'a> Codegen<'a> {
indent: 0,
quote: b'"',
sourcemap_builder: None,
latest_consumed_comment_end: 0,
move_comment_map: MoveCommentMap::default(),
}
}
@ -202,9 +205,9 @@ impl<'a> Codegen<'a> {
trivias: Trivias,
options: CommentOptions,
) -> Self {
self.build_leading_comments(source_text, &trivias);
self.trivias = trivias;
self.comment_options = options;
self.build_comments(&trivias);
self.trivias = trivias;
self.with_source_text(source_text)
}
@ -560,57 +563,3 @@ impl<'a> Codegen<'a> {
}
}
}
// Comment related
impl<'a> Codegen<'a> {
fn preserve_annotate_comments(&self) -> bool {
self.comment_options.preserve_annotate_comments && !self.options.minify
}
/// Avoid issue related to rustc borrow checker .
/// Since if you want to print a range of source code, you need to borrow the source code
/// as immutable first, and call the [Self::print_str] which is a mutable borrow.
///
/// # Panics
/// If `self.source_text` isn't set.
fn print_range_of_source_code(&mut self, range: Range<usize>) {
let source_text = self.source_text.expect("expect `Codegen::source_text` to be set.");
self.code.extend_from_slice(source_text[range].as_bytes());
}
fn get_leading_comments(
&self,
start: u32,
end: u32,
) -> impl DoubleEndedIterator<Item = &'_ Comment> + '_ {
self.trivias.comments_range(start..end)
}
/// In some scenario, we want to move the comment that should be codegened to another position.
/// ```js
/// /* @__NO_SIDE_EFFECTS__ */ export const a = function() {
///
/// }, b = 10000;
///
/// ```
/// should generate such output:
/// ```js
/// export const /* @__NO_SIDE_EFFECTS__ */ a = function() {
///
/// }, b = 10000;
/// ```
fn move_comments(&mut self, position: u32, full_comment_infos: Vec<AnnotationComment>) {
match self.move_comment_map.entry(position) {
Entry::Occupied(mut occ) => {
occ.get_mut().extend(full_comment_infos);
}
Entry::Vacant(vac) => {
vac.insert(full_comment_infos);
}
}
}
fn try_take_moved_comment(&mut self, node_start: u32) -> Option<Vec<AnnotationComment>> {
self.move_comment_map.remove(&node_start)
}
}

View file

@ -241,7 +241,7 @@ const defineSSRCustomElement = /* @__NO_SIDE_EFFECTS__ */ (
};
----------
const defineSSRCustomElement = /* #__NO_SIDE_EFFECTS__ */ (options, extraOptions) => {
const defineSSRCustomElement = /* #__NO_SIDE_EFFECTS__ */ /* @__NO_SIDE_EFFECTS__ */ (options, extraOptions) => {
return /* @__PURE__ */ defineCustomElement(options, extraOptions, hydrate);
};
@ -253,5 +253,5 @@ const defineSSRCustomElement = () => {
----------
const defineSSRCustomElement = () => {
return /* @__PURE__ */ /* @__NO_SIDE_EFFECTS__ */ defineCustomElement(options, extraOptions, hydrate);
return /* @__PURE__ */ /* @__NO_SIDE_EFFECTS__ */ /* #__NO_SIDE_EFFECTS__ */ defineCustomElement(options, extraOptions, hydrate);
};