From 42dcadfccfc3ad3aa3c9e08cd574fb60ea9911b4 Mon Sep 17 00:00:00 2001 From: Boshen <1430279+Boshen@users.noreply.github.com> Date: Wed, 18 Sep 2024 05:41:44 +0000 Subject: [PATCH] fix(parser): hashbang comment should not keep the end newline char (#5844) Previously it included a newline in the value ``` "hashbang": { "type": "Hashbang", "start": 0, "end": 16, "value": "/usr/bin/node\n" }, ``` This change will also make the lexer emit a `\n` token, which will make comment position detection correct. --- crates/oxc_codegen/src/gen.rs | 1 + crates/oxc_codegen/src/lib.rs | 5 +++++ crates/oxc_parser/src/lexer/comment.rs | 3 ++- crates/oxc_parser/src/lexer/trivia_builder.rs | 2 +- crates/oxc_parser/src/lib.rs | 9 +++++++++ crates/oxc_prettier/src/format/mod.rs | 17 +++++++++++------ 6 files changed, 29 insertions(+), 8 deletions(-) diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index 8c309e532..8f16f6497 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -52,6 +52,7 @@ impl<'a> Gen for Hashbang<'a> { fn gen(&self, p: &mut Codegen, _ctx: Context) { p.print_str("#!"); p.print_str(self.value.as_str()); + p.print_hard_newline(); } } diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs index eff84bf94..f529d44fc 100644 --- a/crates/oxc_codegen/src/lib.rs +++ b/crates/oxc_codegen/src/lib.rs @@ -281,6 +281,11 @@ impl<'a> Codegen<'a> { } } + #[inline] + fn print_hard_newline(&mut self) { + self.print_char(b'\n'); + } + #[inline] fn print_semicolon(&mut self) { self.print_char(b';'); diff --git a/crates/oxc_parser/src/lexer/comment.rs b/crates/oxc_parser/src/lexer/comment.rs index 4d813d054..f366fe8d5 100644 --- a/crates/oxc_parser/src/lexer/comment.rs +++ b/crates/oxc_parser/src/lexer/comment.rs @@ -176,10 +176,11 @@ impl<'a> Lexer<'a> { /// Section 12.5 Hashbang Comments pub(super) fn read_hashbang_comment(&mut self) -> Kind { - while let Some(c) = self.next_char().as_ref() { + while let Some(c) = self.peek_char().as_ref() { if is_line_terminator(*c) { break; } + self.consume_char(); } self.token.is_on_new_line = true; Kind::HashbangComment diff --git a/crates/oxc_parser/src/lexer/trivia_builder.rs b/crates/oxc_parser/src/lexer/trivia_builder.rs index 5bc783614..1033b8d70 100644 --- a/crates/oxc_parser/src/lexer/trivia_builder.rs +++ b/crates/oxc_parser/src/lexer/trivia_builder.rs @@ -178,7 +178,7 @@ token kind: CommentKind::Block, position: CommentPosition::Leading, attached_to: 36, - preceded_by_newline: false, // hashbang comment always end in newline + preceded_by_newline: true, followed_by_newline: true, }]; assert_eq!(comments, expected); diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs index d433dae17..8dd4cf90e 100644 --- a/crates/oxc_parser/src/lib.rs +++ b/crates/oxc_parser/src/lib.rs @@ -523,6 +523,15 @@ mod test { } } + #[test] + fn hashbang() { + let allocator = Allocator::default(); + let source_type = SourceType::default(); + let source = "#!/usr/bin/node\n;"; + let ret = Parser::new(&allocator, source, source_type).parse(); + assert_eq!(ret.program.hashbang.unwrap().value.as_str(), "/usr/bin/node"); + } + #[test] fn unambiguous() { let allocator = Allocator::default(); diff --git a/crates/oxc_prettier/src/format/mod.rs b/crates/oxc_prettier/src/format/mod.rs index 222398c71..51b7839ff 100644 --- a/crates/oxc_prettier/src/format/mod.rs +++ b/crates/oxc_prettier/src/format/mod.rs @@ -30,7 +30,7 @@ use cow_utils::CowUtils; use oxc_allocator::{Box, Vec}; use oxc_ast::{ast::*, AstKind}; use oxc_span::GetSpan; -use oxc_syntax::identifier::is_identifier_name; +use oxc_syntax::identifier::{is_identifier_name, is_line_terminator}; use self::{array::Array, object::ObjectLike, template_literal::TemplateLiteralPrinter}; use crate::{ @@ -60,10 +60,6 @@ impl<'a> Format<'a> for Program<'a> { let mut parts = p.vec(); if let Some(hashbang) = &self.hashbang { parts.push(hashbang.format(p)); - let c = p.source_text[..hashbang.span.end as usize].chars().last().unwrap(); - if p.is_next_line_empty_after_index(hashbang.span.end - c.len_utf8() as u32) { - parts.extend(hardline!()); - } } if let Some(doc) = block::print_block_body( p, @@ -81,7 +77,16 @@ impl<'a> Format<'a> for Program<'a> { impl<'a> Format<'a> for Hashbang<'a> { fn format(&self, p: &mut Prettier<'a>) -> Doc<'a> { - Doc::Str(self.span.source_text(p.source_text)) + let mut parts = p.vec(); + parts.push(ss!(self.span.source_text(p.source_text))); + parts.extend(hardline!()); + // Preserve original newline + if let Some(c) = p.source_text[self.span.end as usize..].chars().nth(1) { + if is_line_terminator(c) { + parts.extend(hardline!()); + } + } + Doc::Array(parts) } }