feat(codegen): move minifying printer to codegen crate (#985)

This commit is contained in:
Boshen 2023-10-12 14:56:30 +08:00 committed by GitHub
parent f28d96c378
commit 809f050a5f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 562 additions and 608 deletions

2
.github/labeler.yml vendored
View file

@ -10,7 +10,7 @@ A-minifier: 'crates/oxc_minifier/**/*'
A-parser: 'crates/oxc_parser/**/*'
A-printer: 'crates/oxc_minifier/src/printer/**/*'
A-codegen: 'crates/oxc_codegen/**/*'
A-semantic: 'crates/oxc_semantic/**/*'

4
Cargo.lock generated
View file

@ -1517,10 +1517,13 @@ dependencies = [
name = "oxc_codegen"
version = "0.2.0"
dependencies = [
"bitflags 2.4.0",
"num-bigint",
"oxc_allocator",
"oxc_ast",
"oxc_parser",
"oxc_span",
"oxc_syntax",
]
[[package]]
@ -1659,6 +1662,7 @@ dependencies = [
"num-traits",
"oxc_allocator",
"oxc_ast",
"oxc_codegen",
"oxc_index",
"oxc_parser",
"oxc_semantic",

View file

@ -19,6 +19,10 @@ doctest = false
oxc_ast = { workspace = true }
oxc_span = { workspace = true }
oxc_allocator = { workspace = true }
oxc_syntax = { workspace = true }
bitflags = { workspace = true }
num-bigint = { workspace = true }
[dev-dependencies]
oxc_parser = { workspace = true }

View file

@ -4,9 +4,32 @@
//!
//! * whitespace removal
//! * sourcemaps
//!
//! Code adapted from
//! * [esbuild](https://github.com/evanw/esbuild/blob/main/internal/js_printer/js_printer.go)
mod context;
mod gen;
mod operator;
use std::str::from_utf8_unchecked;
#[allow(clippy::wildcard_imports)]
use oxc_ast::ast::*;
use oxc_span::Atom;
use oxc_syntax::{
identifier::is_identifier_part,
operator::{BinaryOperator, UnaryOperator, UpdateOperator},
precedence::Precedence,
symbol::SymbolId,
};
use self::{
context::Context,
gen::{Gen, GenExpr},
operator::Operator,
};
// use crate::mangler::Mangler;
#[derive(Debug, Default, Clone, Copy)]
pub struct CodegenOptions;
@ -15,8 +38,30 @@ pub struct Codegen<const MINIFY: bool> {
#[allow(unused)]
options: CodegenOptions,
// mangler: Option<Mangler>,
/// Output Code
code: Vec<u8>,
// states
prev_op_end: usize,
prev_reg_exp_end: usize,
need_space_before_dot: usize,
/// For avoiding `;` if the previous statement ends with `}`.
needs_semicolon: bool,
prev_op: Option<Operator>,
start_of_stmt: usize,
start_of_arrow_expr: usize,
start_of_default_export: usize,
}
#[derive(Debug, Clone, Copy)]
pub enum Separator {
Comma,
Semicolon,
None,
}
impl<const MINIFY: bool> Codegen<MINIFY> {
@ -24,14 +69,206 @@ impl<const MINIFY: bool> Codegen<MINIFY> {
// Initialize the output code buffer to reduce memory reallocation.
// Minification will reduce by at least half of the original size.
let capacity = if MINIFY { source_len / 2 } else { source_len };
Self { options, code: Vec::with_capacity(capacity) }
Self {
options,
// mangler: None,
code: Vec::with_capacity(capacity),
needs_semicolon: false,
need_space_before_dot: 0,
prev_op_end: 0,
prev_reg_exp_end: 0,
prev_op: None,
start_of_stmt: 0,
start_of_arrow_expr: 0,
start_of_default_export: 0,
}
}
pub fn build(self, _program: &Program<'_>) -> String {
// pub fn with_mangler(&mut self, mangler: Mangler) {
// self.mangler = Some(mangler);
// }
pub fn build(mut self, program: &Program<'_>) -> String {
program.gen(&mut self, Context::default());
self.into_code()
}
fn into_code(self) -> String {
unsafe { String::from_utf8_unchecked(self.code) }
}
fn code(&self) -> &Vec<u8> {
&self.code
}
fn code_len(&self) -> usize {
self.code().len()
}
/// Push a single character into the buffer
fn print(&mut self, ch: u8) {
self.code.push(ch);
}
/// Push a string into the buffer
fn print_str(&mut self, s: &[u8]) {
self.code.extend_from_slice(s);
}
fn print_semicolon(&mut self) {
self.print(b';');
}
fn print_comma(&mut self) {
self.print(b',');
}
fn print_space_before_operator(&mut self, next: Operator) {
if self.prev_op_end != self.code.len() {
return;
}
let Some(prev) = self.prev_op else { return };
// "+ + y" => "+ +y"
// "+ ++ y" => "+ ++y"
// "x + + y" => "x+ +y"
// "x ++ + y" => "x+++y"
// "x + ++ y" => "x+ ++y"
// "-- >" => "-- >"
// "< ! --" => "<! --"
let bin_op_add = Operator::Binary(BinaryOperator::Addition);
let bin_op_sub = Operator::Binary(BinaryOperator::Subtraction);
let un_op_pos = Operator::Unary(UnaryOperator::UnaryPlus);
let un_op_pre_inc = Operator::Update(UpdateOperator::Increment);
let un_op_neg = Operator::Unary(UnaryOperator::UnaryNegation);
let un_op_pre_dec = Operator::Update(UpdateOperator::Decrement);
let un_op_post_dec = Operator::Update(UpdateOperator::Decrement);
let bin_op_gt = Operator::Binary(BinaryOperator::GreaterThan);
let un_op_not = Operator::Unary(UnaryOperator::LogicalNot);
if ((prev == bin_op_add || prev == un_op_pos)
&& (next == bin_op_add || next == un_op_pos || next == un_op_pre_inc))
|| ((prev == bin_op_sub || prev == un_op_neg)
&& (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec))
|| (prev == un_op_post_dec && next == bin_op_gt)
|| (prev == un_op_not && next == un_op_pre_dec && self.peek_nth(1) == Some('<'))
{
self.print(b' ');
}
}
fn print_space_before_identifier(&mut self) {
if self
.peek_nth(0)
.is_some_and(|ch| is_identifier_part(ch) || self.prev_reg_exp_end == self.code.len())
{
self.print(b' ');
}
}
fn peek_nth(&self, n: usize) -> Option<char> {
unsafe { from_utf8_unchecked(self.code()) }.chars().nth_back(n)
}
fn print_semicolon_after_statement(&mut self) {
self.needs_semicolon = true;
}
fn print_semicolon_if_needed(&mut self) {
if self.needs_semicolon {
self.print_semicolon();
self.needs_semicolon = false;
}
}
fn print_ellipsis(&mut self) {
self.print_str(b"...");
}
fn print_colon(&mut self) {
self.print(b':');
}
fn print_equal(&mut self) {
self.print(b'=');
}
fn print_sequence<T: Gen<MINIFY>>(&mut self, items: &[T], separator: Separator, ctx: Context) {
let len = items.len();
for (index, item) in items.iter().enumerate() {
item.gen(self, ctx);
match separator {
Separator::Semicolon => self.print_semicolon(),
Separator::Comma => self.print(b','),
Separator::None => {}
}
if index != len - 1 {}
}
}
// fn print_body(&mut self, stmt: &Statement<'_>, ctx: Context) {
// if let Statement::BlockStatement(block) = stmt {
// self.print_block1(block, ctx);
// } else {
// stmt.gen(self, ctx);
// }
// }
fn print_block1(&mut self, stmt: &BlockStatement<'_>, ctx: Context) {
self.print(b'{');
for item in &stmt.body {
self.print_semicolon_if_needed();
item.gen(self, ctx);
}
self.needs_semicolon = false;
self.print(b'}');
}
fn print_block<T: Gen<MINIFY>>(&mut self, items: &[T], separator: Separator, ctx: Context) {
self.print(b'{');
self.print_sequence(items, separator, ctx);
self.print(b'}');
}
fn print_list<T: Gen<MINIFY>>(&mut self, items: &[T], ctx: Context) {
for (index, item) in items.iter().enumerate() {
if index != 0 {
self.print_comma();
}
item.gen(self, ctx);
}
}
fn print_expressions<T: GenExpr<MINIFY>>(
&mut self,
items: &[T],
precedence: Precedence,
ctx: Context,
) {
for (index, item) in items.iter().enumerate() {
if index != 0 {
self.print_comma();
}
item.gen_expr(self, precedence, ctx);
}
}
fn print_symbol(&mut self, _symbol_id: Option<SymbolId>, fallback: &Atom) {
// if let Some(mangler) = &self.mangler {
// if let Some(symbol_id) = symbol_id {
// let name = mangler.get_symbol_name(symbol_id);
// self.print_str(name.clone().as_bytes());
// return;
// }
// }
self.print_str(fallback.as_bytes());
}
fn wrap<F: FnMut(&mut Self)>(&mut self, wrap: bool, mut f: F) {
if wrap {
self.print(b'(');
}
f(self);
if wrap {
self.print(b')');
}
}
}

View file

@ -23,6 +23,8 @@ oxc_ast = { workspace = true }
oxc_semantic = { workspace = true }
oxc_syntax = { workspace = true }
oxc_index = { workspace = true }
oxc_codegen = { workspace = true }
bitflags = { workspace = true }
num-bigint = { workspace = true }
itertools = { workspace = true }

View file

@ -2,28 +2,28 @@
mod compressor;
mod mangler;
mod printer;
use oxc_allocator::Allocator;
use oxc_codegen::Codegen;
use oxc_parser::Parser;
use oxc_span::SourceType;
pub use crate::{
compressor::{CompressOptions, Compressor},
mangler::ManglerBuilder,
printer::{Printer, PrinterOptions},
};
pub use oxc_codegen::CodegenOptions;
#[derive(Debug, Clone, Copy)]
pub struct MinifierOptions {
pub mangle: bool,
pub compress: CompressOptions,
pub print: PrinterOptions,
pub codegen: CodegenOptions,
}
impl Default for MinifierOptions {
fn default() -> Self {
Self { mangle: true, compress: CompressOptions::default(), print: PrinterOptions }
Self { mangle: true, compress: CompressOptions::default(), codegen: CodegenOptions }
}
}
@ -45,11 +45,11 @@ impl<'a> Minifier<'a> {
let program = allocator.alloc(ret.program);
Compressor::new(&allocator, self.options.compress).build(program);
let mut printer = Printer::new(self.source_text.len(), self.options.print);
if self.options.mangle {
let mangler = ManglerBuilder.build(program);
printer.with_mangler(mangler);
}
printer.build(program)
let codegen = Codegen::<true>::new(self.source_text.len(), self.options.codegen);
// if self.options.mangle {
// let mangler = ManglerBuilder.build(program);
// printer.with_mangler(mangler);
// }
codegen.build(program)
}
}

View file

@ -1,269 +0,0 @@
//! Printer with whitespace minification
//! code adapted from [esbuild](https://github.com/evanw/esbuild/blob/main/internal/js_printer/js_printer.go)
#![allow(unused)]
mod context;
mod gen;
mod operator;
use std::{rc::Rc, str::from_utf8_unchecked};
#[allow(clippy::wildcard_imports)]
use oxc_ast::ast::*;
use oxc_ast::precedence;
use oxc_semantic::{SymbolId, SymbolTable};
use oxc_span::{Atom, Span};
use oxc_syntax::{
identifier::is_identifier_part,
operator::{
AssignmentOperator, BinaryOperator, LogicalOperator, UnaryOperator, UpdateOperator,
},
precedence::Precedence,
};
use self::{
context::Context,
gen::{Gen, GenExpr},
operator::Operator,
};
use crate::mangler::Mangler;
#[derive(Debug, Default, Clone, Copy)]
pub struct PrinterOptions;
pub struct Printer {
options: PrinterOptions,
mangler: Option<Mangler>,
/// Output Code
code: Vec<u8>,
// states
prev_op_end: usize,
prev_reg_exp_end: usize,
need_space_before_dot: usize,
/// For avoiding `;` if the previous statement ends with `}`.
needs_semicolon: bool,
prev_op: Option<Operator>,
start_of_stmt: usize,
start_of_arrow_expr: usize,
start_of_default_export: usize,
}
#[derive(Debug, Clone, Copy)]
pub enum Separator {
Comma,
Semicolon,
None,
}
/// Codegen interface for pretty print or minification
impl Printer {
pub fn new(source_len: usize, options: PrinterOptions) -> Self {
// Initialize the output code buffer to reduce memory reallocation.
// Minification will reduce by at least half the original size,
// so in fact no reallocation should happen at all.
let capacity = source_len / 2;
Self {
options,
mangler: None,
code: Vec::with_capacity(capacity),
needs_semicolon: false,
need_space_before_dot: 0,
prev_op_end: 0,
prev_reg_exp_end: 0,
prev_op: None,
start_of_stmt: 0,
start_of_arrow_expr: 0,
start_of_default_export: 0,
}
}
pub fn with_mangler(&mut self, mangler: Mangler) {
self.mangler = Some(mangler);
}
pub fn build(mut self, program: &Program<'_>) -> String {
program.gen(&mut self, Context::default());
self.into_code()
}
fn into_code(self) -> String {
unsafe { String::from_utf8_unchecked(self.code) }
}
fn code(&self) -> &Vec<u8> {
&self.code
}
fn code_len(&self) -> usize {
self.code().len()
}
/// Push a single character into the buffer
fn print(&mut self, ch: u8) {
self.code.push(ch);
}
/// Push a string into the buffer
fn print_str(&mut self, s: &[u8]) {
self.code.extend_from_slice(s);
}
fn print_semicolon(&mut self) {
self.print(b';');
}
fn print_comma(&mut self) {
self.print(b',');
}
fn print_space_before_operator(&mut self, next: Operator) {
if self.prev_op_end != self.code.len() {
return;
}
let Some(prev) = self.prev_op else { return };
// "+ + y" => "+ +y"
// "+ ++ y" => "+ ++y"
// "x + + y" => "x+ +y"
// "x ++ + y" => "x+++y"
// "x + ++ y" => "x+ ++y"
// "-- >" => "-- >"
// "< ! --" => "<! --"
let bin_op_add = Operator::Binary(BinaryOperator::Addition);
let bin_op_sub = Operator::Binary(BinaryOperator::Subtraction);
let un_op_pos = Operator::Unary(UnaryOperator::UnaryPlus);
let un_op_pre_inc = Operator::Update(UpdateOperator::Increment);
let un_op_neg = Operator::Unary(UnaryOperator::UnaryNegation);
let un_op_pre_dec = Operator::Update(UpdateOperator::Decrement);
let un_op_post_dec = Operator::Update(UpdateOperator::Decrement);
let bin_op_gt = Operator::Binary(BinaryOperator::GreaterThan);
let un_op_not = Operator::Unary(UnaryOperator::LogicalNot);
if ((prev == bin_op_add || prev == un_op_pos)
&& (next == bin_op_add || next == un_op_pos || next == un_op_pre_inc))
|| ((prev == bin_op_sub || prev == un_op_neg)
&& (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec))
|| (prev == un_op_post_dec && next == bin_op_gt)
|| (prev == un_op_not && next == un_op_pre_dec && self.peek_nth(1) == Some('<'))
{
self.print(b' ');
}
}
fn print_space_before_identifier(&mut self) {
if self
.peek_nth(0)
.is_some_and(|ch| is_identifier_part(ch) || self.prev_reg_exp_end == self.code.len())
{
self.print(b' ');
}
}
fn peek_nth(&self, n: usize) -> Option<char> {
unsafe { from_utf8_unchecked(self.code()) }.chars().nth_back(n)
}
fn print_semicolon_after_statement(&mut self) {
self.needs_semicolon = true;
}
fn print_semicolon_if_needed(&mut self) {
if self.needs_semicolon {
self.print_semicolon();
self.needs_semicolon = false;
}
}
fn print_ellipsis(&mut self) {
self.print_str(b"...");
}
fn print_colon(&mut self) {
self.print(b':');
}
fn print_equal(&mut self) {
self.print(b'=');
}
fn print_sequence<T: Gen>(&mut self, items: &[T], separator: Separator, ctx: Context) {
let len = items.len();
for (index, item) in items.iter().enumerate() {
item.gen(self, ctx);
match separator {
Separator::Semicolon => self.print_semicolon(),
Separator::Comma => self.print(b','),
Separator::None => {}
}
if index != len - 1 {}
}
}
fn print_body(&mut self, stmt: &Statement<'_>, ctx: Context) {
if let Statement::BlockStatement(block) = stmt {
self.print_block1(block, ctx);
} else {
stmt.gen(self, ctx);
}
}
fn print_block1(&mut self, stmt: &BlockStatement<'_>, ctx: Context) {
self.print(b'{');
for item in &stmt.body {
self.print_semicolon_if_needed();
item.gen(self, ctx);
}
self.needs_semicolon = false;
self.print(b'}');
}
fn print_block<T: Gen>(&mut self, items: &[T], separator: Separator, ctx: Context) {
self.print(b'{');
self.print_sequence(items, separator, ctx);
self.print(b'}');
}
fn print_list<T: Gen>(&mut self, items: &[T], ctx: Context) {
for (index, item) in items.iter().enumerate() {
if index != 0 {
self.print_comma();
}
item.gen(self, ctx);
}
}
fn print_expressions<T: GenExpr>(&mut self, items: &[T], precedence: Precedence, ctx: Context) {
for (index, item) in items.iter().enumerate() {
if index != 0 {
self.print_comma();
}
item.gen_expr(self, precedence, ctx);
}
}
fn print_symbol(&mut self, symbol_id: Option<SymbolId>, fallback: &Atom) {
if let Some(mangler) = &self.mangler {
if let Some(symbol_id) = symbol_id {
let name = mangler.get_symbol_name(symbol_id);
self.print_str(name.clone().as_bytes());
return;
}
}
self.print_str(fallback.as_bytes());
}
fn wrap<F: FnMut(&mut Self)>(&mut self, wrap: bool, mut f: F) {
if wrap {
self.print(b'(');
}
f(self);
if wrap {
self.print(b')');
}
}
}

View file

@ -6,7 +6,7 @@ mod oxc;
mod tdewolff;
mod terser;
use oxc_minifier::{CompressOptions, Minifier, MinifierOptions, PrinterOptions};
use oxc_minifier::{CodegenOptions, CompressOptions, Minifier, MinifierOptions};
use oxc_span::SourceType;
pub(crate) fn test(source_text: &str, expected: &str) {
@ -36,7 +36,7 @@ pub(crate) fn test_without_compress_booleans(source_text: &str, expected: &str)
let source_type = SourceType::default();
let compress_options = CompressOptions { booleans: false, ..CompressOptions::default() };
let options =
MinifierOptions { mangle: false, compress: compress_options, print: PrinterOptions };
MinifierOptions { mangle: false, compress: compress_options, codegen: CodegenOptions };
let minified = Minifier::new(source_text, source_type, options).build();
assert_eq!(expected, minified, "for source {source_text}");
}

View file

@ -1,7 +1,7 @@
use oxc_allocator::Allocator;
#[allow(clippy::wildcard_imports)]
use oxc_ast::ast::*;
use oxc_minifier::{CompressOptions, Minifier, MinifierOptions, PrinterOptions};
use oxc_minifier::{CodegenOptions, CompressOptions, Minifier, MinifierOptions};
use oxc_parser::Parser;
use oxc_span::{SourceType, Span};
use walkdir::WalkDir;
@ -68,7 +68,7 @@ impl TestCase {
let options = MinifierOptions {
mangle: false,
compress: self.compress_options,
print: PrinterOptions,
codegen: CodegenOptions,
};
let minified_source_text = Minifier::new(self.input.as_ref(), source_type, options).build();
assert_eq!(

View file

@ -1,27 +1,3 @@
minifier_test262 Summary:
AST Parsed : 44738/44738 (100.00%)
Positive Passed: 44714/44738 (99.95%)
Expect to Parse: "built-ins/Atomics/wait/bigint/waiterlist-order-of-operations-is-fifo.js"
Expect to Parse: "built-ins/Atomics/wait/waiterlist-order-of-operations-is-fifo.js"
Expect to Parse: "built-ins/Math/max/S15.8.2.11_A2.js"
Expect to Parse: "built-ins/Math/min/S15.8.2.12_A2.js"
Expect to Parse: "built-ins/Number/prototype/toString/S15.7.4.2_A4_T01.js"
Expect to Parse: "built-ins/Number/prototype/toString/S15.7.4.2_A4_T02.js"
Expect to Parse: "built-ins/Number/prototype/toString/S15.7.4.2_A4_T03.js"
Expect to Parse: "built-ins/Number/prototype/toString/S15.7.4.2_A4_T04.js"
Expect to Parse: "built-ins/Number/prototype/toString/S15.7.4.2_A4_T05.js"
Expect to Parse: "built-ins/Number/prototype/valueOf/S15.7.4.4_A2_T01.js"
Expect to Parse: "built-ins/Number/prototype/valueOf/S15.7.4.4_A2_T02.js"
Expect to Parse: "built-ins/Number/prototype/valueOf/S15.7.4.4_A2_T03.js"
Expect to Parse: "built-ins/Number/prototype/valueOf/S15.7.4.4_A2_T04.js"
Expect to Parse: "built-ins/Number/prototype/valueOf/S15.7.4.4_A2_T05.js"
Expect to Parse: "built-ins/encodeURI/S15.1.3.3_A2.4_T1.js"
Expect to Parse: "built-ins/encodeURI/S15.1.3.3_A2.4_T2.js"
Expect to Parse: "built-ins/encodeURIComponent/S15.1.3.4_A2.4_T1.js"
Expect to Parse: "built-ins/encodeURIComponent/S15.1.3.4_A2.4_T2.js"
Expect to Parse: "language/module-code/top-level-await/await-expr-reject-throws.js"
Expect to Parse: "language/statements/try/S12.14_A1.js"
Expect to Parse: "language/statements/try/S12.14_A13_T2.js"
Expect to Parse: "language/statements/try/S12.14_A13_T3.js"
Expect to Parse: "language/statements/variable/S12.2_A6_T2.js"
Expect to Parse: "staging/Intl402/Temporal/old/non-iso-calendars.js"
Positive Passed: 44738/44738 (100.00%)