//! Oxc Codegen //! //! Code adapted from //! * [esbuild](https://github.com/evanw/esbuild/blob/main/internal/js_printer/js_printer.go) mod binary_expr_visitor; mod code_buffer; mod comment; mod context; mod gen; mod operator; mod sourcemap_builder; use std::{borrow::Cow, path::PathBuf}; use oxc_ast::ast::{ BindingIdentifier, BlockStatement, Expression, IdentifierReference, Program, Statement, }; use oxc_mangler::Mangler; use oxc_span::{GetSpan, Span}; use oxc_syntax::{ identifier::{is_identifier_part, is_identifier_part_ascii}, operator::{BinaryOperator, UnaryOperator, UpdateOperator}, precedence::Precedence, }; use crate::{ binary_expr_visitor::BinaryExpressionVisitor, code_buffer::CodeBuffer, comment::CommentsMap, operator::Operator, sourcemap_builder::SourcemapBuilder, }; pub use crate::{ context::Context, gen::{Gen, GenExpr}, }; /// Code generator without whitespace removal. pub type CodeGenerator<'a> = Codegen<'a>; #[derive(Debug, Clone)] pub struct CodegenOptions { /// Use single quotes instead of double quotes. /// /// Default is `false`. pub single_quote: bool, /// Remove whitespace. /// /// Default is `false`. pub minify: bool, /// Print comments? /// /// Default is `true`. pub comments: bool, /// Print annotation comments, e.g. `/* #__PURE__ */` and `/* #__NO_SIDE_EFFECTS__ */`. /// /// Only takes into effect when `comments` is false. /// /// Default is `false`. pub annotation_comments: bool, pub source_map_path: Option, } impl Default for CodegenOptions { fn default() -> Self { Self { single_quote: false, minify: false, comments: true, annotation_comments: false, source_map_path: None, } } } impl CodegenOptions { fn print_annotation_comments(&self) -> bool { !self.minify && (self.comments || self.annotation_comments) } } /// Output from [`Codegen::build`] pub struct CodegenReturn { /// The generated source code. pub code: String, /// The source map from the input source code to the generated source code. /// /// You must set [`CodegenOptions::source_map_path`] for this to be [`Some`]. pub map: Option, } pub struct Codegen<'a> { pub(crate) options: CodegenOptions, /// Original source code of the AST source_text: &'a str, comments: CommentsMap, mangler: Option, /// Output Code code: CodeBuffer, // states prev_op_end: usize, prev_reg_exp_end: usize, need_space_before_dot: usize, print_next_indent_as_space: bool, binary_expr_stack: Vec>, /// For avoiding `;` if the previous statement ends with `}`. needs_semicolon: bool, prev_op: Option, start_of_stmt: usize, start_of_arrow_expr: usize, start_of_default_export: usize, /// Start of comment that needs to be moved to the before VariableDeclarator /// /// For example: /// ```js /// /* @__NO_SIDE_EFFECTS__ */ export const a = function() { /// }, b = 10000; /// ``` /// Should be generated as: /// ```js /// export const /* @__NO_SIDE_EFFECTS__ */ a = function() { /// }, b = 10000; /// ``` start_of_annotation_comment: Option, /// Track the current indentation level indent: u32, /// Fast path for [CodegenOptions::single_quote] quote: u8, // Builders sourcemap_builder: Option, } impl<'a> Default for Codegen<'a> { fn default() -> Self { Self::new() } } impl<'a> From> for String { fn from(val: Codegen<'a>) -> Self { val.into_source_text() } } impl<'a> From> for Cow<'a, str> { fn from(val: Codegen<'a>) -> Self { Cow::Owned(val.into_source_text()) } } // Public APIs impl<'a> Codegen<'a> { #[must_use] pub fn new() -> Self { Self { options: CodegenOptions::default(), source_text: "", comments: CommentsMap::default(), start_of_annotation_comment: None, mangler: None, code: CodeBuffer::default(), needs_semicolon: false, need_space_before_dot: 0, print_next_indent_as_space: false, binary_expr_stack: Vec::with_capacity(5), prev_op_end: 0, prev_reg_exp_end: 0, prev_op: None, start_of_stmt: 0, start_of_arrow_expr: 0, start_of_default_export: 0, indent: 0, quote: b'"', sourcemap_builder: None, } } #[must_use] pub fn with_options(mut self, options: CodegenOptions) -> Self { self.quote = if options.single_quote { b'\'' } else { b'"' }; self.options = options; self } #[must_use] pub fn with_mangler(mut self, mangler: Option) -> Self { self.mangler = mangler; self } #[must_use] pub fn build(mut self, program: &Program<'a>) -> CodegenReturn { self.quote = if self.options.single_quote { b'\'' } else { b'"' }; self.source_text = program.source_text; self.code.reserve(program.source_text.len()); if self.options.print_annotation_comments() { self.build_comments(&program.comments); } if let Some(path) = &self.options.source_map_path { self.sourcemap_builder = Some(SourcemapBuilder::new(path, program.source_text)); } program.print(&mut self, Context::default()); let code = self.code.into_string(); let map = self.sourcemap_builder.map(SourcemapBuilder::into_sourcemap); CodegenReturn { code, map } } #[must_use] pub fn into_source_text(self) -> String { self.code.into_string() } /// Push a single ASCII byte into the buffer. /// /// # Panics /// Panics if `byte` is not an ASCII byte (`0 - 0x7F`). #[inline] pub fn print_ascii_byte(&mut self, byte: u8) { self.code.print_ascii_byte(byte); } /// Push str into the buffer #[inline] pub fn print_str(&mut self, s: &str) { self.code.print_str(s); } #[inline] pub fn print_expression(&mut self, expr: &Expression<'_>) { expr.print_expr(self, Precedence::Lowest, Context::empty()); } } // Private APIs impl<'a> Codegen<'a> { fn code(&self) -> &CodeBuffer { &self.code } fn code_len(&self) -> usize { self.code().len() } #[inline] fn print_soft_space(&mut self) { if !self.options.minify { self.print_ascii_byte(b' '); } } #[inline] fn print_hard_space(&mut self) { self.print_ascii_byte(b' '); } #[inline] fn print_soft_newline(&mut self) { if !self.options.minify { self.print_ascii_byte(b'\n'); } } #[inline] fn print_hard_newline(&mut self) { self.print_ascii_byte(b'\n'); } #[inline] fn print_semicolon(&mut self) { self.print_ascii_byte(b';'); } #[inline] fn print_comma(&mut self) { self.print_ascii_byte(b','); } #[inline] fn print_space_before_identifier(&mut self) { let Some(byte) = self.last_byte() else { return }; if self.prev_reg_exp_end != self.code.len() { let is_identifier = if byte.is_ascii() { // Fast path for ASCII (very common case) is_identifier_part_ascii(byte as char) } else { is_identifier_part(self.last_char().unwrap()) }; if !is_identifier { return; } } self.print_hard_space(); } #[inline] fn last_byte(&self) -> Option { self.code.last_byte() } #[inline] fn last_char(&self) -> Option { self.code.last_char() } #[inline] fn indent(&mut self) { if !self.options.minify { self.indent += 1; } } #[inline] fn dedent(&mut self) { if !self.options.minify { self.indent -= 1; } } #[inline] fn print_indent(&mut self) { if self.options.minify { return; } if self.print_next_indent_as_space { self.print_hard_space(); self.print_next_indent_as_space = false; return; } // SAFETY: this iterator only yields tabs, which are always valid ASCII characters. unsafe { self.code.print_bytes_unchecked(std::iter::repeat(b'\t').take(self.indent as usize)); } } #[inline] fn print_semicolon_after_statement(&mut self) { if self.options.minify { self.needs_semicolon = true; } else { self.print_str(";\n"); } } #[inline] fn print_semicolon_if_needed(&mut self) { if self.needs_semicolon { self.print_semicolon(); self.needs_semicolon = false; } } #[inline] fn print_ellipsis(&mut self) { self.print_str("..."); } #[inline] fn print_colon(&mut self) { self.print_ascii_byte(b':'); } #[inline] fn print_equal(&mut self) { self.print_ascii_byte(b'='); } fn print_sequence(&mut self, items: &[T], ctx: Context) { for item in items { item.print(self, ctx); self.print_comma(); } } fn print_curly_braces(&mut self, span: Span, single_line: bool, op: F) { self.add_source_mapping(span.start); self.print_ascii_byte(b'{'); if !single_line { self.print_soft_newline(); self.indent(); } op(self); if !single_line { self.dedent(); self.print_indent(); } self.add_source_mapping(span.end); self.print_ascii_byte(b'}'); } fn print_block_start(&mut self, position: u32) { self.add_source_mapping(position); self.print_ascii_byte(b'{'); self.print_soft_newline(); self.indent(); } fn print_block_end(&mut self, position: u32) { self.dedent(); self.print_indent(); self.add_source_mapping(position); self.print_ascii_byte(b'}'); } fn print_body(&mut self, stmt: &Statement<'_>, need_space: bool, ctx: Context) { match stmt { Statement::BlockStatement(stmt) => { self.print_soft_space(); self.print_block_statement(stmt, ctx); self.print_soft_newline(); } Statement::EmptyStatement(_) => { self.print_semicolon(); self.print_soft_newline(); } stmt => { if need_space && self.options.minify { self.print_hard_space(); } self.print_next_indent_as_space = true; stmt.print(self, ctx); } } } fn print_block_statement(&mut self, stmt: &BlockStatement<'_>, ctx: Context) { self.print_curly_braces(stmt.span, stmt.body.is_empty(), |p| { for stmt in &stmt.body { p.print_semicolon_if_needed(); stmt.print(p, ctx); } }); self.needs_semicolon = false; } // We tried optimizing this to move the `index != 0` check out of the loop: // ``` // let mut iter = items.iter(); // let Some(item) = iter.next() else { return }; // item.print(self, ctx); // for item in iter { // self.print_comma(); // self.print_soft_space(); // item.print(self, ctx); // } // ``` // But it turned out this was actually a bit slower. // fn print_list(&mut self, items: &[T], ctx: Context) { for (index, item) in items.iter().enumerate() { if index != 0 { self.print_comma(); self.print_soft_space(); } item.print(self, ctx); } } fn print_list_with_comments(&mut self, items: &[T], ctx: Context) { for (index, item) in items.iter().enumerate() { if index != 0 { self.print_comma(); } if self.has_non_annotation_comment(item.span().start) { self.print_expr_comments(item.span().start); self.print_indent(); } else { self.print_soft_newline(); self.print_indent(); } item.print(self, ctx); } } fn print_expressions(&mut self, items: &[T], precedence: Precedence, ctx: Context) { for (index, item) in items.iter().enumerate() { if index != 0 { self.print_comma(); self.print_soft_space(); } item.print_expr(self, precedence, ctx); } } fn get_identifier_reference_name(&self, reference: &IdentifierReference<'a>) -> &'a str { if let Some(mangler) = &self.mangler { if let Some(reference_id) = reference.reference_id.get() { if let Some(name) = mangler.get_reference_name(reference_id) { // SAFETY: Hack the lifetime to be part of the allocator. return unsafe { std::mem::transmute_copy(&name) }; } } } reference.name.as_str() } fn get_binding_identifier_name(&self, ident: &BindingIdentifier<'a>) -> &'a str { if let Some(mangler) = &self.mangler { if let Some(symbol_id) = ident.symbol_id.get() { let name = mangler.get_symbol_name(symbol_id); // SAFETY: Hack the lifetime to be part of the allocator. return unsafe { std::mem::transmute_copy(&name) }; } } ident.name.as_str() } fn print_space_before_operator(&mut self, next: Operator) { if self.prev_op_end != self.code.len() { return; } let Some(prev) = self.prev_op else { return }; // "+ + y" => "+ +y" // "+ ++ y" => "+ ++y" // "x + + y" => "x+ +y" // "x ++ + y" => "x+++y" // "x + ++ y" => "x+ ++y" // "-- >" => "-- >" // "< ! --" => "(&mut self, wrap: bool, mut f: F) { if wrap { self.print_ascii_byte(b'('); } f(self); if wrap { self.print_ascii_byte(b')'); } } #[inline] fn wrap_quote(&mut self, mut f: F) { self.print_ascii_byte(self.quote); f(self, self.quote); self.print_ascii_byte(self.quote); } fn add_source_mapping(&mut self, position: u32) { if let Some(sourcemap_builder) = self.sourcemap_builder.as_mut() { sourcemap_builder.add_source_mapping(self.code.as_bytes(), position, None); } } fn add_source_mapping_for_name(&mut self, span: Span, name: &str) { if let Some(sourcemap_builder) = self.sourcemap_builder.as_mut() { sourcemap_builder.add_source_mapping_for_name(self.code.as_bytes(), span, name); } } }