mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 20:32:10 +00:00
try to fix: https://github.com/rolldown/rolldown/issues/2013 1. Before we only considering the ast is untouched, but considering the scenario. ```js const a = /*__PURE__*/ test(), // ^^^ ^^^^^^ is removed during transform b = a(); ``` Then according to the previous algorithm, `PURE` will attach to `b = a()` 2. Now, we try to attach comments as much as possible unless the comments are separated by comments, for the case above, `PURE` will not be attached to `a()` since the content between `b = a()` and `/* __PURE__*/` is not all whitespace. 3. we added back `MoveMap`, for the special case ```js /*__NODE_SIDE_EFFECTS__*/ export const c = 100; // ^^^^^^^^^^^^^^^^^^^^^ should be attached to first declarator, // ^^^^^^ are not whitespace ```
563 lines
16 KiB
Rust
563 lines
16 KiB
Rust
//! Oxc Codegen
|
|
//!
|
|
//! Code adapted from
|
|
//! * [esbuild](https://github.com/evanw/esbuild/blob/main/internal/js_printer/js_printer.go)
|
|
|
|
mod annotation_comment;
|
|
mod binary_expr_visitor;
|
|
mod context;
|
|
mod gen;
|
|
mod operator;
|
|
mod sourcemap_builder;
|
|
|
|
use std::{borrow::Cow, collections::hash_map::Entry, ops::Range};
|
|
|
|
use oxc_ast::{
|
|
ast::{BindingIdentifier, BlockStatement, Expression, IdentifierReference, Program, Statement},
|
|
Comment, Trivias,
|
|
};
|
|
use oxc_mangler::Mangler;
|
|
use oxc_span::Span;
|
|
use oxc_syntax::{
|
|
identifier::is_identifier_part,
|
|
operator::{BinaryOperator, UnaryOperator, UpdateOperator},
|
|
precedence::Precedence,
|
|
};
|
|
use rustc_hash::FxHashMap;
|
|
|
|
use crate::{
|
|
binary_expr_visitor::BinaryExpressionVisitor, operator::Operator,
|
|
sourcemap_builder::SourcemapBuilder,
|
|
};
|
|
pub use crate::{
|
|
context::Context,
|
|
gen::{Gen, GenExpr},
|
|
};
|
|
|
|
use self::annotation_comment::AnnotationComment;
|
|
|
|
/// Code generator without whitespace removal.
|
|
pub type CodeGenerator<'a> = Codegen<'a>;
|
|
|
|
#[derive(Default, Clone, Copy)]
|
|
pub struct CodegenOptions {
|
|
/// Use single quotes instead of double quotes.
|
|
pub single_quote: bool,
|
|
|
|
/// Remove whitespace.
|
|
pub minify: bool,
|
|
}
|
|
|
|
#[derive(Default, Clone, Copy)]
|
|
pub struct CommentOptions {
|
|
/// Enable preserve annotate comments, like `/* #__PURE__ */` and `/* #__NO_SIDE_EFFECTS__ */`.
|
|
pub preserve_annotate_comments: bool,
|
|
}
|
|
|
|
pub struct CodegenReturn {
|
|
pub source_text: String,
|
|
pub source_map: Option<oxc_sourcemap::SourceMap>,
|
|
}
|
|
|
|
pub struct Codegen<'a> {
|
|
options: CodegenOptions,
|
|
comment_options: CommentOptions,
|
|
|
|
source_text: &'a str,
|
|
|
|
trivias: Trivias,
|
|
|
|
mangler: Option<Mangler>,
|
|
|
|
/// Output Code
|
|
code: Vec<u8>,
|
|
|
|
// states
|
|
prev_op_end: usize,
|
|
prev_reg_exp_end: usize,
|
|
need_space_before_dot: usize,
|
|
print_next_indent_as_space: bool,
|
|
binary_expr_stack: Vec<BinaryExpressionVisitor<'a>>,
|
|
|
|
/// For avoiding `;` if the previous statement ends with `}`.
|
|
needs_semicolon: bool,
|
|
|
|
prev_op: Option<Operator>,
|
|
|
|
start_of_stmt: usize,
|
|
start_of_arrow_expr: usize,
|
|
start_of_default_export: usize,
|
|
|
|
/// Track the current indentation level
|
|
indent: u32,
|
|
|
|
/// Fast path for [CodegenOptions::single_quote]
|
|
quote: u8,
|
|
|
|
// Builders
|
|
sourcemap_builder: Option<SourcemapBuilder>,
|
|
|
|
latest_consumed_comment_end: u32,
|
|
|
|
/// The key of map is the node start position,
|
|
/// the first element of value is the start of the comment
|
|
/// the second element of value includes the end of the comment and comment kind.
|
|
move_comment_map: MoveCommentMap,
|
|
}
|
|
pub(crate) type MoveCommentMap = FxHashMap<u32, Vec<AnnotationComment>>;
|
|
|
|
impl<'a> Default for Codegen<'a> {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl<'a> From<Codegen<'a>> for String {
|
|
fn from(mut val: Codegen<'a>) -> Self {
|
|
val.into_source_text()
|
|
}
|
|
}
|
|
|
|
impl<'a> From<Codegen<'a>> for Cow<'a, str> {
|
|
fn from(mut val: Codegen<'a>) -> Self {
|
|
Cow::Owned(val.into_source_text())
|
|
}
|
|
}
|
|
|
|
// Public APIs
|
|
impl<'a> Codegen<'a> {
|
|
#[must_use]
|
|
pub fn new() -> Self {
|
|
Self {
|
|
options: CodegenOptions::default(),
|
|
comment_options: CommentOptions::default(),
|
|
source_text: "",
|
|
trivias: Trivias::default(),
|
|
mangler: None,
|
|
code: vec![],
|
|
needs_semicolon: false,
|
|
need_space_before_dot: 0,
|
|
print_next_indent_as_space: false,
|
|
binary_expr_stack: Vec::with_capacity(5),
|
|
prev_op_end: 0,
|
|
prev_reg_exp_end: 0,
|
|
prev_op: None,
|
|
start_of_stmt: 0,
|
|
start_of_arrow_expr: 0,
|
|
start_of_default_export: 0,
|
|
indent: 0,
|
|
quote: b'"',
|
|
sourcemap_builder: None,
|
|
latest_consumed_comment_end: 0,
|
|
move_comment_map: MoveCommentMap::default(),
|
|
}
|
|
}
|
|
|
|
/// Initialize the output code buffer to reduce memory reallocation.
|
|
/// Minification will reduce by at least half of the original size.
|
|
#[must_use]
|
|
pub fn with_capacity(mut self, source_text_len: usize) -> Self {
|
|
let capacity = if self.options.minify { source_text_len / 2 } else { source_text_len };
|
|
self.code = Vec::with_capacity(capacity);
|
|
self
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn with_options(mut self, options: CodegenOptions) -> Self {
|
|
self.options = options;
|
|
self.quote = if options.single_quote { b'\'' } else { b'"' };
|
|
self
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn enable_comment(
|
|
mut self,
|
|
source_text: &'a str,
|
|
trivias: Trivias,
|
|
options: CommentOptions,
|
|
) -> Self {
|
|
self.source_text = source_text;
|
|
self.trivias = trivias;
|
|
self.comment_options = options;
|
|
self
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn enable_source_map(mut self, source_name: &str, source_text: &str) -> Self {
|
|
let mut sourcemap_builder = SourcemapBuilder::default();
|
|
sourcemap_builder.with_name_and_source(source_name, source_text);
|
|
self.sourcemap_builder = Some(sourcemap_builder);
|
|
self
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn with_mangler(mut self, mangler: Option<Mangler>) -> Self {
|
|
self.mangler = mangler;
|
|
self
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn build(mut self, program: &Program<'_>) -> CodegenReturn {
|
|
program.gen(&mut self, Context::default());
|
|
let source_text = self.into_source_text();
|
|
let source_map = self.sourcemap_builder.map(SourcemapBuilder::into_sourcemap);
|
|
CodegenReturn { source_text, source_map }
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn into_source_text(&mut self) -> String {
|
|
// SAFETY: criteria of `from_utf8_unchecked` are met.
|
|
|
|
unsafe { String::from_utf8_unchecked(std::mem::take(&mut self.code)) }
|
|
}
|
|
|
|
/// Push a single character into the buffer
|
|
#[inline]
|
|
pub fn print_char(&mut self, ch: u8) {
|
|
self.code.push(ch);
|
|
}
|
|
|
|
/// Push str into the buffer
|
|
#[inline]
|
|
pub fn print_str(&mut self, s: &str) {
|
|
self.code.extend(s.as_bytes());
|
|
}
|
|
}
|
|
|
|
// Private APIs
|
|
impl<'a> Codegen<'a> {
|
|
fn code(&self) -> &Vec<u8> {
|
|
&self.code
|
|
}
|
|
|
|
fn code_len(&self) -> usize {
|
|
self.code().len()
|
|
}
|
|
|
|
#[inline]
|
|
fn print_soft_space(&mut self) {
|
|
if !self.options.minify {
|
|
self.print_char(b' ');
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub fn print_hard_space(&mut self) {
|
|
self.print_char(b' ');
|
|
}
|
|
|
|
#[inline]
|
|
fn print_soft_newline(&mut self) {
|
|
if !self.options.minify {
|
|
self.print_char(b'\n');
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn print_semicolon(&mut self) {
|
|
self.print_char(b';');
|
|
}
|
|
|
|
#[inline]
|
|
fn print_comma(&mut self) {
|
|
self.print_char(b',');
|
|
}
|
|
|
|
#[inline]
|
|
fn print_space_before_identifier(&mut self) {
|
|
if self
|
|
.peek_nth(0)
|
|
.is_some_and(|ch| is_identifier_part(ch) || self.prev_reg_exp_end == self.code.len())
|
|
{
|
|
self.print_hard_space();
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn peek_nth(&self, n: usize) -> Option<char> {
|
|
// SAFETY: criteria of `from_utf8_unchecked` are met.
|
|
unsafe { std::str::from_utf8_unchecked(self.code()) }.chars().nth_back(n)
|
|
}
|
|
|
|
#[inline]
|
|
fn indent(&mut self) {
|
|
if !self.options.minify {
|
|
self.indent += 1;
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn dedent(&mut self) {
|
|
if !self.options.minify {
|
|
self.indent -= 1;
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn print_indent(&mut self) {
|
|
if self.options.minify {
|
|
return;
|
|
}
|
|
if self.print_next_indent_as_space {
|
|
self.print_hard_space();
|
|
self.print_next_indent_as_space = false;
|
|
return;
|
|
}
|
|
self.code.extend(std::iter::repeat(b'\t').take(self.indent as usize));
|
|
}
|
|
|
|
#[inline]
|
|
fn print_semicolon_after_statement(&mut self) {
|
|
if self.options.minify {
|
|
self.needs_semicolon = true;
|
|
} else {
|
|
self.print_str(";\n");
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn print_semicolon_if_needed(&mut self) {
|
|
if self.needs_semicolon {
|
|
self.print_semicolon();
|
|
self.needs_semicolon = false;
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn print_ellipsis(&mut self) {
|
|
self.print_str("...");
|
|
}
|
|
|
|
#[inline]
|
|
pub fn print_colon(&mut self) {
|
|
self.print_char(b':');
|
|
}
|
|
|
|
#[inline]
|
|
fn print_equal(&mut self) {
|
|
self.print_char(b'=');
|
|
}
|
|
|
|
fn print_sequence<T: Gen>(&mut self, items: &[T], ctx: Context) {
|
|
for item in items {
|
|
item.gen(self, ctx);
|
|
self.print_comma();
|
|
}
|
|
}
|
|
|
|
fn print_curly_braces<F: FnOnce(&mut Self)>(&mut self, span: Span, single_line: bool, op: F) {
|
|
self.add_source_mapping(span.start);
|
|
self.print_char(b'{');
|
|
if !single_line {
|
|
self.print_soft_newline();
|
|
self.indent();
|
|
}
|
|
op(self);
|
|
if !single_line {
|
|
self.dedent();
|
|
self.print_indent();
|
|
}
|
|
self.add_source_mapping(span.end);
|
|
self.print_char(b'}');
|
|
}
|
|
|
|
fn print_block_start(&mut self, position: u32) {
|
|
self.add_source_mapping(position);
|
|
self.print_char(b'{');
|
|
self.print_soft_newline();
|
|
self.indent();
|
|
}
|
|
|
|
fn print_block_end(&mut self, position: u32) {
|
|
self.dedent();
|
|
self.print_indent();
|
|
self.add_source_mapping(position);
|
|
self.print_char(b'}');
|
|
}
|
|
|
|
fn print_body(&mut self, stmt: &Statement<'_>, need_space: bool, ctx: Context) {
|
|
match stmt {
|
|
Statement::BlockStatement(stmt) => {
|
|
self.print_soft_space();
|
|
self.print_block_statement(stmt, ctx);
|
|
self.print_soft_newline();
|
|
}
|
|
Statement::EmptyStatement(_) => {
|
|
self.print_semicolon();
|
|
self.print_soft_newline();
|
|
}
|
|
stmt => {
|
|
if need_space && self.options.minify {
|
|
self.print_hard_space();
|
|
}
|
|
self.print_next_indent_as_space = true;
|
|
stmt.gen(self, ctx);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn print_block_statement(&mut self, stmt: &BlockStatement<'_>, ctx: Context) {
|
|
self.print_curly_braces(stmt.span, stmt.body.is_empty(), |p| {
|
|
for stmt in &stmt.body {
|
|
p.print_semicolon_if_needed();
|
|
stmt.gen(p, ctx);
|
|
}
|
|
});
|
|
self.needs_semicolon = false;
|
|
}
|
|
|
|
fn print_list<T: Gen>(&mut self, items: &[T], ctx: Context) {
|
|
for (index, item) in items.iter().enumerate() {
|
|
if index != 0 {
|
|
self.print_comma();
|
|
self.print_soft_space();
|
|
}
|
|
item.gen(self, ctx);
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub fn print_expression(&mut self, expr: &Expression<'_>) {
|
|
expr.gen_expr(self, Precedence::Lowest, Context::empty());
|
|
}
|
|
|
|
fn print_expressions<T: GenExpr>(&mut self, items: &[T], precedence: Precedence, ctx: Context) {
|
|
for (index, item) in items.iter().enumerate() {
|
|
if index != 0 {
|
|
self.print_comma();
|
|
self.print_soft_space();
|
|
}
|
|
item.gen_expr(self, precedence, ctx);
|
|
}
|
|
}
|
|
|
|
fn get_identifier_reference_name(&self, reference: &IdentifierReference<'a>) -> &'a str {
|
|
if let Some(mangler) = &self.mangler {
|
|
if let Some(reference_id) = reference.reference_id.get() {
|
|
if let Some(name) = mangler.get_reference_name(reference_id) {
|
|
// SAFETY: Hack the lifetime to be part of the allocator.
|
|
return unsafe { std::mem::transmute_copy(&name) };
|
|
}
|
|
}
|
|
}
|
|
reference.name.as_str()
|
|
}
|
|
|
|
fn get_binding_identifier_name(&self, ident: &BindingIdentifier<'a>) -> &'a str {
|
|
if let Some(mangler) = &self.mangler {
|
|
if let Some(symbol_id) = ident.symbol_id.get() {
|
|
let name = mangler.get_symbol_name(symbol_id);
|
|
// SAFETY: Hack the lifetime to be part of the allocator.
|
|
return unsafe { std::mem::transmute_copy(&name) };
|
|
}
|
|
}
|
|
ident.name.as_str()
|
|
}
|
|
|
|
fn print_space_before_operator(&mut self, next: Operator) {
|
|
if self.prev_op_end != self.code.len() {
|
|
return;
|
|
}
|
|
let Some(prev) = self.prev_op else { return };
|
|
// "+ + y" => "+ +y"
|
|
// "+ ++ y" => "+ ++y"
|
|
// "x + + y" => "x+ +y"
|
|
// "x ++ + y" => "x+++y"
|
|
// "x + ++ y" => "x+ ++y"
|
|
// "-- >" => "-- >"
|
|
// "< ! --" => "<! --"
|
|
let bin_op_add = Operator::Binary(BinaryOperator::Addition);
|
|
let bin_op_sub = Operator::Binary(BinaryOperator::Subtraction);
|
|
let un_op_pos = Operator::Unary(UnaryOperator::UnaryPlus);
|
|
let un_op_pre_inc = Operator::Update(UpdateOperator::Increment);
|
|
let un_op_neg = Operator::Unary(UnaryOperator::UnaryNegation);
|
|
let un_op_pre_dec = Operator::Update(UpdateOperator::Decrement);
|
|
let un_op_post_dec = Operator::Update(UpdateOperator::Decrement);
|
|
let bin_op_gt = Operator::Binary(BinaryOperator::GreaterThan);
|
|
let un_op_not = Operator::Unary(UnaryOperator::LogicalNot);
|
|
if ((prev == bin_op_add || prev == un_op_pos)
|
|
&& (next == bin_op_add || next == un_op_pos || next == un_op_pre_inc))
|
|
|| ((prev == bin_op_sub || prev == un_op_neg)
|
|
&& (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec))
|
|
|| (prev == un_op_post_dec && next == bin_op_gt)
|
|
|| (prev == un_op_not && next == un_op_pre_dec && self.peek_nth(1) == Some('<'))
|
|
{
|
|
self.print_hard_space();
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn wrap<F: FnMut(&mut Self)>(&mut self, wrap: bool, mut f: F) {
|
|
if wrap {
|
|
self.print_char(b'(');
|
|
}
|
|
f(self);
|
|
if wrap {
|
|
self.print_char(b')');
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn wrap_quote<F: FnMut(&mut Self, u8)>(&mut self, mut f: F) {
|
|
self.print_char(self.quote);
|
|
f(self, self.quote);
|
|
self.print_char(self.quote);
|
|
}
|
|
|
|
fn add_source_mapping(&mut self, position: u32) {
|
|
if let Some(sourcemap_builder) = self.sourcemap_builder.as_mut() {
|
|
sourcemap_builder.add_source_mapping(&self.code, position, None);
|
|
}
|
|
}
|
|
|
|
fn add_source_mapping_for_name(&mut self, span: Span, name: &str) {
|
|
if let Some(sourcemap_builder) = self.sourcemap_builder.as_mut() {
|
|
sourcemap_builder.add_source_mapping_for_name(&self.code, span, name);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Comment related
|
|
impl<'a> Codegen<'a> {
|
|
/// Avoid issue related to rustc borrow checker .
|
|
/// Since if you want to print a range of source code, you need to borrow the source code
|
|
/// as immutable first, and call the [Self::print_str] which is a mutable borrow.
|
|
fn print_range_of_source_code(&mut self, range: Range<usize>) {
|
|
self.code.extend_from_slice(self.source_text[range].as_bytes());
|
|
}
|
|
|
|
fn get_leading_comments(
|
|
&self,
|
|
start: u32,
|
|
end: u32,
|
|
) -> impl DoubleEndedIterator<Item = &'_ Comment> + '_ {
|
|
self.trivias.comments_range(start..end)
|
|
}
|
|
/// In some scenario, we want to move the comment that should be codegened to another position.
|
|
/// ```js
|
|
/// /* @__NO_SIDE_EFFECTS__ */ export const a = function() {
|
|
///
|
|
/// }, b = 10000;
|
|
///
|
|
/// ```
|
|
/// should generate such output:
|
|
/// ```js
|
|
/// export const /* @__NO_SIDE_EFFECTS__ */ a = function() {
|
|
///
|
|
/// }, b = 10000;
|
|
/// ```
|
|
fn move_comments(&mut self, position: u32, full_comment_infos: Vec<AnnotationComment>) {
|
|
match self.move_comment_map.entry(position) {
|
|
Entry::Occupied(mut occ) => {
|
|
occ.get_mut().extend(full_comment_infos);
|
|
}
|
|
Entry::Vacant(vac) => {
|
|
vac.insert(full_comment_infos);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn try_take_moved_comment(&mut self, node_start: u32) -> Option<Vec<AnnotationComment>> {
|
|
self.move_comment_map.remove(&node_start)
|
|
}
|
|
}
|