From 7ea99f40ac6fb546b2324ebc4e234d44ca899bd5 Mon Sep 17 00:00:00 2001 From: sapphi-red <49056869+sapphi-red@users.noreply.github.com> Date: Fri, 31 Jan 2025 04:13:40 +0000 Subject: [PATCH] feat(minifier): compress array of string literals to `'str1,str2'.split(',')` (#8786) Ported `["str1", "str2", ...]` => `"str1 str2".split(" ")` compression from closure compiler with some tweaks. --- .../peephole/substitute_alternate_syntax.rs | 108 ++++++++++++++++-- tasks/minsize/minsize.snap | 14 +-- 2 files changed, 104 insertions(+), 18 deletions(-) diff --git a/crates/oxc_minifier/src/peephole/substitute_alternate_syntax.rs b/crates/oxc_minifier/src/peephole/substitute_alternate_syntax.rs index e93af24b4..22648e72b 100644 --- a/crates/oxc_minifier/src/peephole/substitute_alternate_syntax.rs +++ b/crates/oxc_minifier/src/peephole/substitute_alternate_syntax.rs @@ -907,6 +907,7 @@ impl<'a> LatePeepholeOptimizations { if let Some(folded_expr) = match expr { Expression::BooleanLiteral(_) => Self::try_compress_boolean(expr, ctx), + Expression::ArrayExpression(_) => Self::try_compress_array_expression(expr, ctx), _ => None, } { *expr = folded_expr; @@ -939,6 +940,78 @@ impl<'a> LatePeepholeOptimizations { Some(ctx.ast.expression_unary(lit.span, UnaryOperator::LogicalNot, num)) } + /// Transforms long array expression with string literals to `"str1,str2".split(',')` + fn try_compress_array_expression( + expr: &mut Expression<'a>, + ctx: Ctx<'a, '_>, + ) -> Option> { + // this threshold is chosen by hand by checking the minsize output + const THRESHOLD: usize = 40; + + let Expression::ArrayExpression(array) = expr else { unreachable!() }; + + let is_all_string = array + .elements + .iter() + .all(|element| element.as_expression().is_some_and(Expression::is_string_literal)); + if !is_all_string { + return None; + } + + let element_count = array.elements.len(); + // replace with `.split` only when the saved size is great enough + // because using `.split` in some places and not in others may cause gzipped size to be bigger + let can_save = element_count * 2 > ".split('.')".len() + THRESHOLD; + if !can_save { + return None; + } + + let strings = array.elements.iter().map(|element| { + let Expression::StringLiteral(str) = element.to_expression() else { unreachable!() }; + str.value.as_str() + }); + let delimiter = Self::pick_delimiter(&strings)?; + + let concatenated_string = strings.collect::>().join(delimiter); + + // "str1,str2".split(',') + Some(ctx.ast.expression_call( + expr.span(), + Expression::StaticMemberExpression(ctx.ast.alloc_static_member_expression( + expr.span(), + ctx.ast.expression_string_literal( + expr.span(), + ctx.ast.atom(&concatenated_string), + None, + ), + ctx.ast.identifier_name(expr.span(), "split"), + false, + )), + Option::::None, + ctx.ast.vec1(Argument::from(ctx.ast.expression_string_literal( + expr.span(), + ctx.ast.atom(delimiter), + None, + ))), + false, + )) + } + + fn pick_delimiter<'s>( + strings: &(impl Iterator + Clone), + ) -> Option<&'static str> { + // These delimiters are chars that appears a lot in the program + // therefore probably have a small Huffman encoding. + const DELIMITERS: [&str; 5] = [".", ",", "(", ")", " "]; + + let is_all_length_1 = strings.clone().all(|s| s.len() == 1); + if is_all_length_1 { + return Some(""); + } + + DELIMITERS.into_iter().find(|&delimiter| strings.clone().all(|s| !s.contains(delimiter))) + } + pub fn substitute_catch_clause(&mut self, catch: &mut CatchClause<'a>, ctx: Ctx<'a, '_>) { if self.target >= ESTarget::ES2019 { if let Some(param) = &catch.param { @@ -1233,20 +1306,33 @@ mod test { } #[test] - #[ignore] fn test_string_array_splitting() { - test_same("var x=['1','2','3','4']"); - test_same("var x=['1','2','3','4','5']"); - test("var x=['1','2','3','4','5','6']", "var x='123456'.split('')"); - test("var x=['1','2','3','4','5','00']", "var x='1 2 3 4 5 00'.split(' ')"); - test("var x=['1','2','3','4','5','6','7']", "var x='1234567'.split('')"); - test("var x=['1','2','3','4','5','6','00']", "var x='1 2 3 4 5 6 00'.split(' ')"); - test("var x=[' ,',',',',',',',',',',']", "var x=' ,;,;,;,;,;,'.split(';')"); - test("var x=[',,',' ',',',',',',',',']", "var x=',,; ;,;,;,;,'.split(';')"); - test("var x=['a,',' ',',',',',',',',']", "var x='a,; ;,;,;,;,'.split(';')"); + const REPEAT: usize = 20; + let additional_args = ",'1'".repeat(REPEAT); + let test_with_longer_args = + |source_text_partial: &str, expected_partial: &str, delimiter: &str| { + let expected = &format!( + "var x='{expected_partial}{}'.split('{delimiter}')", + format!("{delimiter}1").repeat(REPEAT) + ); + test(&format!("var x=[{source_text_partial}{additional_args}]"), expected); + }; + let test_same_with_longer_args = |source_text_partial: &str| { + test_same(&format!("var x=[{source_text_partial}{additional_args}]")); + }; + + test_same_with_longer_args("'1','2','3','4'"); + test_same_with_longer_args("'1','2','3','4','5'"); + test_with_longer_args("'1','2','3','4','5','6'", "123456", ""); + test_with_longer_args("'1','2','3','4','5','00'", "1.2.3.4.5.00", "."); + test_with_longer_args("'1','2','3','4','5','6','7'", "1234567", ""); + test_with_longer_args("'1','2','3','4','5','6','00'", "1.2.3.4.5.6.00", "."); + test_with_longer_args("'.,',',',',',',',',',','", ".,(,(,(,(,(,", "("); + test_with_longer_args("',,','.',',',',',',',','", ",,(.(,(,(,(,", "("); + test_with_longer_args("'a,','.',',',',',',',','", "a,(.(,(,(,(,", "("); // all possible delimiters used, leave it alone - test_same("var x=[',', ' ', ';', '{', '}']"); + test_same_with_longer_args("'.', ',', '(', ')', ' '"); } #[test] diff --git a/tasks/minsize/minsize.snap b/tasks/minsize/minsize.snap index 47ed2f450..59e8edb36 100644 --- a/tasks/minsize/minsize.snap +++ b/tasks/minsize/minsize.snap @@ -9,19 +9,19 @@ Original | minified | minified | gzip | gzip | Fixture 342.15 kB | 117.69 kB | 118.14 kB | 43.55 kB | 44.37 kB | vue.js -544.10 kB | 71.49 kB | 72.48 kB | 25.89 kB | 26.20 kB | lodash.js +544.10 kB | 71.44 kB | 72.48 kB | 25.87 kB | 26.20 kB | lodash.js 555.77 kB | 271.48 kB | 270.13 kB | 88.38 kB | 90.80 kB | d3.js -1.01 MB | 457.63 kB | 458.89 kB | 123.53 kB | 126.71 kB | bundle.min.js +1.01 MB | 441.51 kB | 458.89 kB | 122.54 kB | 126.71 kB | bundle.min.js -1.25 MB | 650.59 kB | 646.76 kB | 161.11 kB | 163.73 kB | three.js +1.25 MB | 650.46 kB | 646.76 kB | 161 kB | 163.73 kB | three.js -2.14 MB | 718.83 kB | 724.14 kB | 162.15 kB | 181.07 kB | victory.js +2.14 MB | 718.76 kB | 724.14 kB | 162.15 kB | 181.07 kB | victory.js -3.20 MB | 1.01 MB | 1.01 MB | 324.36 kB | 331.56 kB | echarts.js +3.20 MB | 1.01 MB | 1.01 MB | 324.35 kB | 331.56 kB | echarts.js -6.69 MB | 2.30 MB | 2.31 MB | 469.42 kB | 488.28 kB | antd.js +6.69 MB | 2.30 MB | 2.31 MB | 469.30 kB | 488.28 kB | antd.js -10.95 MB | 3.37 MB | 3.49 MB | 864.74 kB | 915.50 kB | typescript.js +10.95 MB | 3.37 MB | 3.49 MB | 864.70 kB | 915.50 kB | typescript.js