From bcdbba39815e1e5b387c265d901e357b8f931957 Mon Sep 17 00:00:00 2001 From: Boshen <1430279+Boshen@users.noreply.github.com> Date: Wed, 18 Sep 2024 07:58:22 +0000 Subject: [PATCH] feat(codegen): print jsdoc comments that are attached to statements and class elements (#5845) I am unable to print all comments correctly. Comments have way too much semantic meaning in JavaScript. This PR reduces the scope to only print jsdoc comments that are attached to statements and class elements, in order to get isolated declarations shipped. --- crates/oxc_ast/src/trivia.rs | 4 + crates/oxc_codegen/src/comment.rs | 74 +++++++++++ crates/oxc_codegen/src/gen.rs | 7 + crates/oxc_codegen/src/lib.rs | 9 +- crates/oxc_codegen/tests/integration/jsdoc.rs | 78 +++++++++++ crates/oxc_codegen/tests/integration/main.rs | 1 + .../tests/integration/snapshots/jsodc.snap | 121 ++++++++++++++++++ crates/oxc_parser/src/lexer/trivia_builder.rs | 91 ++++++++++--- 8 files changed, 363 insertions(+), 22 deletions(-) create mode 100644 crates/oxc_codegen/src/comment.rs create mode 100644 crates/oxc_codegen/tests/integration/jsdoc.rs create mode 100644 crates/oxc_codegen/tests/integration/snapshots/jsodc.snap diff --git a/crates/oxc_ast/src/trivia.rs b/crates/oxc_ast/src/trivia.rs index f9ffb8f6d..e00f2c7e3 100644 --- a/crates/oxc_ast/src/trivia.rs +++ b/crates/oxc_ast/src/trivia.rs @@ -104,6 +104,10 @@ impl Comment { pub fn real_span_start(&self) -> u32 { self.span.start - 2 } + + pub fn is_jsdoc(&self, source_text: &str) -> bool { + self.is_leading() && self.is_block() && self.span.source_text(source_text).starts_with('*') + } } /// Sorted set of unique trivia comments, in ascending order by starting position. diff --git a/crates/oxc_codegen/src/comment.rs b/crates/oxc_codegen/src/comment.rs new file mode 100644 index 000000000..2a8de5ad7 --- /dev/null +++ b/crates/oxc_codegen/src/comment.rs @@ -0,0 +1,74 @@ +use oxc_syntax::identifier::is_line_terminator; +use rustc_hash::FxHashMap; + +use oxc_ast::{Comment, CommentKind, Trivias}; + +use crate::Codegen; + +pub type CommentsMap = FxHashMap>; + +impl<'a> Codegen<'a> { + pub(crate) fn build_leading_comments(&mut self, source_text: &str, trivias: &Trivias) { + let mut leading_comments: CommentsMap = FxHashMap::default(); + for comment in trivias + .comments() + .copied() + .filter(|comment| Self::should_keep_comment(comment, source_text)) + { + leading_comments.entry(comment.attached_to).or_default().push(comment); + } + self.leading_comments = leading_comments; + } + + fn should_keep_comment(comment: &Comment, source_text: &str) -> bool { + comment.is_jsdoc(source_text) + && comment.preceded_by_newline + // webpack comment `/*****/` + && !comment.span.source_text(source_text).chars().all(|c| c == '*') + } + + pub(crate) fn print_leading_comments(&mut self, start: u32) { + if self.options.minify { + return; + } + let Some(source_text) = self.source_text else { return }; + let Some(comments) = self.leading_comments.remove(&start) else { return }; + + let first = comments.first().unwrap(); + if first.preceded_by_newline { + // Skip printing newline if this comment is already on a newline. + if self.peek_nth(0).is_some_and(|c| c != '\n' && c != '\t') { + self.print_char(b'\n'); + self.print_indent(); + } + } + + for comment in &comments { + let s = comment.real_span().source_text(source_text); + match comment.kind { + CommentKind::Line => { + self.print_str(s); + } + CommentKind::Block => { + // Print block comments with our own indentation. + let lines = s.split(is_line_terminator); + for line in lines { + if !line.starts_with("/*") { + self.print_indent(); + } + self.print_str(line.trim_start()); + if !line.ends_with("*/") { + self.print_hard_newline(); + } + } + } + } + } + + let last = comments.last().unwrap(); + if last.is_line() || last.followed_by_newline { + self.print_hard_newline(); + self.print_indent(); + } + } +} diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index 8f16f6497..2c625d460 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -73,6 +73,7 @@ impl<'a> Gen for Directive<'a> { impl<'a> Gen for Statement<'a> { fn gen(&self, p: &mut Codegen, ctx: Context) { + p.print_leading_comments(self.span().start); match self { Self::BlockStatement(stmt) => stmt.print(p, ctx), Self::BreakStatement(stmt) => stmt.print(p, ctx), @@ -468,6 +469,7 @@ impl<'a> Gen for ReturnStatement<'a> { fn gen(&self, p: &mut Codegen, _ctx: Context) { p.add_source_mapping(self.span.start); p.print_indent(); + p.print_space_before_identifier(); p.print_str("return"); if let Some(arg) = &self.argument { p.print_hard_space(); @@ -2204,22 +2206,27 @@ impl<'a> Gen for ClassElement<'a> { fn gen(&self, p: &mut Codegen, ctx: Context) { match self { Self::StaticBlock(elem) => { + p.print_leading_comments(elem.span.start); elem.print(p, ctx); p.print_soft_newline(); } Self::MethodDefinition(elem) => { + p.print_leading_comments(elem.span.start); elem.print(p, ctx); p.print_soft_newline(); } Self::PropertyDefinition(elem) => { + p.print_leading_comments(elem.span.start); elem.print(p, ctx); p.print_semicolon_after_statement(); } Self::AccessorProperty(elem) => { + p.print_leading_comments(elem.span.start); elem.print(p, ctx); p.print_semicolon_after_statement(); } Self::TSIndexSignature(elem) => { + p.print_leading_comments(elem.span.start); elem.print(p, ctx); p.print_semicolon_after_statement(); } diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs index f529d44fc..3d03a6479 100644 --- a/crates/oxc_codegen/src/lib.rs +++ b/crates/oxc_codegen/src/lib.rs @@ -5,6 +5,7 @@ mod annotation_comment; mod binary_expr_visitor; +mod comment; mod context; mod gen; mod operator; @@ -25,10 +26,9 @@ use oxc_syntax::{ }; use rustc_hash::FxHashMap; -use self::annotation_comment::AnnotationComment; use crate::{ - binary_expr_visitor::BinaryExpressionVisitor, operator::Operator, - sourcemap_builder::SourcemapBuilder, + annotation_comment::AnnotationComment, binary_expr_visitor::BinaryExpressionVisitor, + comment::CommentsMap, operator::Operator, sourcemap_builder::SourcemapBuilder, }; pub use crate::{ context::Context, @@ -75,6 +75,7 @@ pub struct Codegen<'a> { source_text: Option<&'a str>, trivias: Trivias, + leading_comments: CommentsMap, mangler: Option, @@ -142,6 +143,7 @@ impl<'a> Codegen<'a> { comment_options: CommentOptions::default(), source_text: None, trivias: Trivias::default(), + leading_comments: CommentsMap::default(), mangler: None, code: vec![], needs_semicolon: false, @@ -200,6 +202,7 @@ impl<'a> Codegen<'a> { trivias: Trivias, options: CommentOptions, ) -> Self { + self.build_leading_comments(source_text, &trivias); self.trivias = trivias; self.comment_options = options; self.with_source_text(source_text) diff --git a/crates/oxc_codegen/tests/integration/jsdoc.rs b/crates/oxc_codegen/tests/integration/jsdoc.rs new file mode 100644 index 000000000..961915158 --- /dev/null +++ b/crates/oxc_codegen/tests/integration/jsdoc.rs @@ -0,0 +1,78 @@ +use crate::snapshot; + +#[test] +fn comment() { + let cases = vec![ + r" +/** This is a description of the foo function. */ +function foo() { +} + +/** + * Represents a book. + * @constructor + * @param {string} title - The title of the book. + * @param {string} author - The author of the book. + */ +function Book(title, author) { +} + +/** Class representing a point. */ +class Point { + /** + * Create a point. + * @param {number} x - The x value. + * @param {number} y - The y value. + */ + constructor(x, y) { + } + + /** + * Get the x value. + * @return {number} The x value. + */ + getX() { + } + + /** + * Get the y value. + * @return {number} The y value. + */ + getY() { + } + + /** + * Convert a string containing two comma-separated numbers into a point. + * @param {string} str - The string containing two comma-separated numbers. + * @return {Point} A Point object. + */ + static fromString(str) { + } +} + +/** Class representing a point. */ +const Point = class { +} + +/** + * Shirt module. + * @module my/shirt + */ + +/** Button the shirt. */ +exports.button = function() { +}; + +/** Unbutton the shirt. */ +exports.unbutton = function() { +}; + +this.Book = function(title) { + /** The title of the book. */ + this.title = title; +} + ", + ]; + + snapshot("jsodc", &cases); +} diff --git a/crates/oxc_codegen/tests/integration/main.rs b/crates/oxc_codegen/tests/integration/main.rs index 14d4cbfd5..31327f9a4 100644 --- a/crates/oxc_codegen/tests/integration/main.rs +++ b/crates/oxc_codegen/tests/integration/main.rs @@ -1,5 +1,6 @@ #![allow(clippy::missing_panics_doc)] pub mod esbuild; +pub mod jsdoc; pub mod pure_comments; pub mod tester; pub mod ts; diff --git a/crates/oxc_codegen/tests/integration/snapshots/jsodc.snap b/crates/oxc_codegen/tests/integration/snapshots/jsodc.snap new file mode 100644 index 000000000..f1b3b72d9 --- /dev/null +++ b/crates/oxc_codegen/tests/integration/snapshots/jsodc.snap @@ -0,0 +1,121 @@ +--- +source: crates/oxc_codegen/tests/integration/main.rs +--- +########## 0 + +/** This is a description of the foo function. */ +function foo() { +} + +/** + * Represents a book. + * @constructor + * @param {string} title - The title of the book. + * @param {string} author - The author of the book. + */ +function Book(title, author) { +} + +/** Class representing a point. */ +class Point { + /** + * Create a point. + * @param {number} x - The x value. + * @param {number} y - The y value. + */ + constructor(x, y) { + } + + /** + * Get the x value. + * @return {number} The x value. + */ + getX() { + } + + /** + * Get the y value. + * @return {number} The y value. + */ + getY() { + } + + /** + * Convert a string containing two comma-separated numbers into a point. + * @param {string} str - The string containing two comma-separated numbers. + * @return {Point} A Point object. + */ + static fromString(str) { + } +} + +/** Class representing a point. */ +const Point = class { +} + +/** + * Shirt module. + * @module my/shirt + */ + +/** Button the shirt. */ +exports.button = function() { +}; + +/** Unbutton the shirt. */ +exports.unbutton = function() { +}; + +this.Book = function(title) { + /** The title of the book. */ + this.title = title; +} + +---------- +/** This is a description of the foo function. */ +function foo() {} +/** +* Represents a book. +* @constructor +* @param {string} title - The title of the book. +* @param {string} author - The author of the book. +*/ +function Book(title, author) {} +/** Class representing a point. */ +class Point { + /** + * Create a point. + * @param {number} x - The x value. + * @param {number} y - The y value. + */ + constructor(x, y) {} + /** + * Get the x value. + * @return {number} The x value. + */ + getX() {} + /** + * Get the y value. + * @return {number} The y value. + */ + getY() {} + /** + * Convert a string containing two comma-separated numbers into a point. + * @param {string} str - The string containing two comma-separated numbers. + * @return {Point} A Point object. + */ + static fromString(str) {} +} +/** Class representing a point. */ +const Point = class {}; +/** +* Shirt module. +* @module my/shirt +*//** Button the shirt. */ +exports.button = function() {}; +/** Unbutton the shirt. */ +exports.unbutton = function() {}; +this.Book = function(title) { +/** The title of the book. */ + this.title = title; +}; diff --git a/crates/oxc_parser/src/lexer/trivia_builder.rs b/crates/oxc_parser/src/lexer/trivia_builder.rs index 1033b8d70..315f8c500 100644 --- a/crates/oxc_parser/src/lexer/trivia_builder.rs +++ b/crates/oxc_parser/src/lexer/trivia_builder.rs @@ -1,7 +1,7 @@ use oxc_ast::{Comment, CommentKind, CommentPosition, Trivias}; use oxc_span::Span; -#[derive(Debug, Default)] +#[derive(Debug)] pub struct TriviaBuilder { // This is a set of unique comments. Duplicated // comments could be generated in case of rewind; they are @@ -18,6 +18,12 @@ pub struct TriviaBuilder { saw_newline: bool, } +impl Default for TriviaBuilder { + fn default() -> Self { + Self { comments: vec![], irregular_whitespaces: vec![], processed: 0, saw_newline: true } + } +} + impl TriviaBuilder { pub fn build(self) -> Trivias { Trivias::new(self.comments.into_boxed_slice(), self.irregular_whitespaces) @@ -44,6 +50,9 @@ impl TriviaBuilder { let len = self.comments.len(); if self.processed < len { self.comments[len - 1].followed_by_newline = true; + if !self.saw_newline { + self.processed = self.comments.len(); + } } self.saw_newline = true; } @@ -51,7 +60,7 @@ impl TriviaBuilder { pub fn handle_token(&mut self, token_start: u32) { let len = self.comments.len(); if self.processed < len { - // All unprocess preceding comments are leading comments attached to this token start. + // All unprocessed preceding comments are leading comments attached to this token start. for comment in &mut self.comments[self.processed..] { comment.position = CommentPosition::Leading; comment.attached_to = token_start; @@ -94,18 +103,22 @@ mod test { use oxc_ast::{Comment, CommentKind, CommentPosition}; use oxc_span::{SourceType, Span}; - #[test] - fn comment_attachments() { + fn get_comments(source_text: &str) -> Vec { let allocator = Allocator::default(); let source_type = SourceType::default(); + let ret = Parser::new(&allocator, source_text, source_type).parse(); + ret.trivias.comments().copied().collect::>() + } + + #[test] + fn comment_attachments() { let source_text = " /* Leading 1 */ // Leading 2 /* Leading 3 */ token /* Trailing 1 */ // Trailing 2 // Leading of EOF token "; - let ret = Parser::new(&allocator, source_text, source_type).parse(); - let comments = ret.trivias.comments().copied().collect::>(); + let comments = get_comments(source_text); let expected = [ Comment { span: Span::new(11, 22), @@ -165,22 +178,62 @@ mod test { #[test] fn comment_attachments2() { - let allocator = Allocator::default(); - let source_type = SourceType::default(); let source_text = "#!/usr/bin/env node /* Leading 1 */ -token +token /* Trailing 1 */ "; - let ret = Parser::new(&allocator, source_text, source_type).parse(); - let comments = ret.trivias.comments().copied().collect::>(); - let expected = vec![Comment { - span: Span::new(22, 33), - kind: CommentKind::Block, - position: CommentPosition::Leading, - attached_to: 36, - preceded_by_newline: true, - followed_by_newline: true, - }]; + let comments = get_comments(source_text); + let expected = vec![ + Comment { + span: Span::new(22, 33), + kind: CommentKind::Block, + position: CommentPosition::Leading, + attached_to: 36, + preceded_by_newline: true, + followed_by_newline: true, + }, + Comment { + span: Span::new(44, 56), + kind: CommentKind::Block, + position: CommentPosition::Trailing, + attached_to: 0, + preceded_by_newline: false, + followed_by_newline: true, + }, + ]; + assert_eq!(comments, expected); + } + + #[test] + fn comment_attachments3() { + let source_text = " +/** + * A + **/ +/** + * B + **/ + token + "; + let comments = get_comments(source_text); + let expected = vec![ + Comment { + span: Span::new(3, 12), + kind: CommentKind::Block, + position: CommentPosition::Leading, + attached_to: 30, + preceded_by_newline: true, + followed_by_newline: true, + }, + Comment { + span: Span::new(17, 26), + kind: CommentKind::Block, + position: CommentPosition::Leading, + attached_to: 30, + preceded_by_newline: true, + followed_by_newline: true, + }, + ]; assert_eq!(comments, expected); } }