feat(minifier): compress array of string literals to 'str1,str2'.split(',') (#8786)

Ported `["str1", "str2", ...]` => `"str1 str2".split(" ")` compression from closure compiler with some tweaks.
This commit is contained in:
sapphi-red 2025-01-31 04:13:40 +00:00
parent 2eac9c0370
commit 7ea99f40ac
No known key found for this signature in database
GPG key ID: 67631A259A77AC6C
2 changed files with 104 additions and 18 deletions

View file

@ -907,6 +907,7 @@ impl<'a> LatePeepholeOptimizations {
if let Some(folded_expr) = match expr {
Expression::BooleanLiteral(_) => Self::try_compress_boolean(expr, ctx),
Expression::ArrayExpression(_) => Self::try_compress_array_expression(expr, ctx),
_ => None,
} {
*expr = folded_expr;
@ -939,6 +940,78 @@ impl<'a> LatePeepholeOptimizations {
Some(ctx.ast.expression_unary(lit.span, UnaryOperator::LogicalNot, num))
}
/// Transforms long array expression with string literals to `"str1,str2".split(',')`
fn try_compress_array_expression(
expr: &mut Expression<'a>,
ctx: Ctx<'a, '_>,
) -> Option<Expression<'a>> {
// this threshold is chosen by hand by checking the minsize output
const THRESHOLD: usize = 40;
let Expression::ArrayExpression(array) = expr else { unreachable!() };
let is_all_string = array
.elements
.iter()
.all(|element| element.as_expression().is_some_and(Expression::is_string_literal));
if !is_all_string {
return None;
}
let element_count = array.elements.len();
// replace with `.split` only when the saved size is great enough
// because using `.split` in some places and not in others may cause gzipped size to be bigger
let can_save = element_count * 2 > ".split('.')".len() + THRESHOLD;
if !can_save {
return None;
}
let strings = array.elements.iter().map(|element| {
let Expression::StringLiteral(str) = element.to_expression() else { unreachable!() };
str.value.as_str()
});
let delimiter = Self::pick_delimiter(&strings)?;
let concatenated_string = strings.collect::<std::vec::Vec<_>>().join(delimiter);
// "str1,str2".split(',')
Some(ctx.ast.expression_call(
expr.span(),
Expression::StaticMemberExpression(ctx.ast.alloc_static_member_expression(
expr.span(),
ctx.ast.expression_string_literal(
expr.span(),
ctx.ast.atom(&concatenated_string),
None,
),
ctx.ast.identifier_name(expr.span(), "split"),
false,
)),
Option::<TSTypeParameterInstantiation>::None,
ctx.ast.vec1(Argument::from(ctx.ast.expression_string_literal(
expr.span(),
ctx.ast.atom(delimiter),
None,
))),
false,
))
}
fn pick_delimiter<'s>(
strings: &(impl Iterator<Item = &'s str> + Clone),
) -> Option<&'static str> {
// These delimiters are chars that appears a lot in the program
// therefore probably have a small Huffman encoding.
const DELIMITERS: [&str; 5] = [".", ",", "(", ")", " "];
let is_all_length_1 = strings.clone().all(|s| s.len() == 1);
if is_all_length_1 {
return Some("");
}
DELIMITERS.into_iter().find(|&delimiter| strings.clone().all(|s| !s.contains(delimiter)))
}
pub fn substitute_catch_clause(&mut self, catch: &mut CatchClause<'a>, ctx: Ctx<'a, '_>) {
if self.target >= ESTarget::ES2019 {
if let Some(param) = &catch.param {
@ -1233,20 +1306,33 @@ mod test {
}
#[test]
#[ignore]
fn test_string_array_splitting() {
test_same("var x=['1','2','3','4']");
test_same("var x=['1','2','3','4','5']");
test("var x=['1','2','3','4','5','6']", "var x='123456'.split('')");
test("var x=['1','2','3','4','5','00']", "var x='1 2 3 4 5 00'.split(' ')");
test("var x=['1','2','3','4','5','6','7']", "var x='1234567'.split('')");
test("var x=['1','2','3','4','5','6','00']", "var x='1 2 3 4 5 6 00'.split(' ')");
test("var x=[' ,',',',',',',',',',',']", "var x=' ,;,;,;,;,;,'.split(';')");
test("var x=[',,',' ',',',',',',',',']", "var x=',,; ;,;,;,;,'.split(';')");
test("var x=['a,',' ',',',',',',',',']", "var x='a,; ;,;,;,;,'.split(';')");
const REPEAT: usize = 20;
let additional_args = ",'1'".repeat(REPEAT);
let test_with_longer_args =
|source_text_partial: &str, expected_partial: &str, delimiter: &str| {
let expected = &format!(
"var x='{expected_partial}{}'.split('{delimiter}')",
format!("{delimiter}1").repeat(REPEAT)
);
test(&format!("var x=[{source_text_partial}{additional_args}]"), expected);
};
let test_same_with_longer_args = |source_text_partial: &str| {
test_same(&format!("var x=[{source_text_partial}{additional_args}]"));
};
test_same_with_longer_args("'1','2','3','4'");
test_same_with_longer_args("'1','2','3','4','5'");
test_with_longer_args("'1','2','3','4','5','6'", "123456", "");
test_with_longer_args("'1','2','3','4','5','00'", "1.2.3.4.5.00", ".");
test_with_longer_args("'1','2','3','4','5','6','7'", "1234567", "");
test_with_longer_args("'1','2','3','4','5','6','00'", "1.2.3.4.5.6.00", ".");
test_with_longer_args("'.,',',',',',',',',',','", ".,(,(,(,(,(,", "(");
test_with_longer_args("',,','.',',',',',',',','", ",,(.(,(,(,(,", "(");
test_with_longer_args("'a,','.',',',',',',',','", "a,(.(,(,(,(,", "(");
// all possible delimiters used, leave it alone
test_same("var x=[',', ' ', ';', '{', '}']");
test_same_with_longer_args("'.', ',', '(', ')', ' '");
}
#[test]

View file

@ -9,19 +9,19 @@ Original | minified | minified | gzip | gzip | Fixture
342.15 kB | 117.69 kB | 118.14 kB | 43.55 kB | 44.37 kB | vue.js
544.10 kB | 71.49 kB | 72.48 kB | 25.89 kB | 26.20 kB | lodash.js
544.10 kB | 71.44 kB | 72.48 kB | 25.87 kB | 26.20 kB | lodash.js
555.77 kB | 271.48 kB | 270.13 kB | 88.38 kB | 90.80 kB | d3.js
1.01 MB | 457.63 kB | 458.89 kB | 123.53 kB | 126.71 kB | bundle.min.js
1.01 MB | 441.51 kB | 458.89 kB | 122.54 kB | 126.71 kB | bundle.min.js
1.25 MB | 650.59 kB | 646.76 kB | 161.11 kB | 163.73 kB | three.js
1.25 MB | 650.46 kB | 646.76 kB | 161 kB | 163.73 kB | three.js
2.14 MB | 718.83 kB | 724.14 kB | 162.15 kB | 181.07 kB | victory.js
2.14 MB | 718.76 kB | 724.14 kB | 162.15 kB | 181.07 kB | victory.js
3.20 MB | 1.01 MB | 1.01 MB | 324.36 kB | 331.56 kB | echarts.js
3.20 MB | 1.01 MB | 1.01 MB | 324.35 kB | 331.56 kB | echarts.js
6.69 MB | 2.30 MB | 2.31 MB | 469.42 kB | 488.28 kB | antd.js
6.69 MB | 2.30 MB | 2.31 MB | 469.30 kB | 488.28 kB | antd.js
10.95 MB | 3.37 MB | 3.49 MB | 864.74 kB | 915.50 kB | typescript.js
10.95 MB | 3.37 MB | 3.49 MB | 864.70 kB | 915.50 kB | typescript.js