From 2d984f04018854f279f9cb938a1346fb7bee941e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=98=BF=E8=89=AF=E4=BB=94?= <32487868+cijiugechu@users.noreply.github.com> Date: Mon, 10 Jul 2023 23:11:28 +0800 Subject: [PATCH] feat(minifier): fold bigint number comparison (#536) --- crates/oxc_hir/src/hir_util.rs | 93 ++++++++++++++-- crates/oxc_minifier/src/compressor/fold.rs | 102 ++++++++++++++++-- .../tests/closure/fold_constants.rs | 56 ++++++++-- 3 files changed, 231 insertions(+), 20 deletions(-) diff --git a/crates/oxc_hir/src/hir_util.rs b/crates/oxc_hir/src/hir_util.rs index 8a9596789..bb11c5af1 100644 --- a/crates/oxc_hir/src/hir_util.rs +++ b/crates/oxc_hir/src/hir_util.rs @@ -1,5 +1,7 @@ use std::borrow::Cow; +use num_bigint::BigInt; +use num_traits::{One, Zero}; use oxc_semantic::ReferenceFlag; use oxc_syntax::operator::{AssignmentOperator, LogicalOperator, UnaryOperator}; @@ -227,6 +229,41 @@ impl NumberValue { } } +pub fn is_exact_int64(num: f64) -> bool { + num.fract() == 0.0 +} + +/// port from [closure compiler](https://github.com/google/closure-compiler/blob/master/src/com/google/javascript/jscomp/NodeUtil.java#L540) +pub fn get_string_bigint_value(raw_string: &str) -> Option { + if raw_string.contains('\u{000b}') { + // vertical tab is not always whitespace + return None; + } + + let s = raw_string.trim(); + + if s.is_empty() { + return Some(BigInt::zero()); + } + + if s.len() > 2 && s.starts_with('0') { + let radix: u32 = match s.chars().nth(1) { + Some('x' | 'X') => 16, + Some('o' | 'O') => 8, + Some('b' | 'B') => 2, + _ => 0, + }; + + if radix == 0 { + return None; + } + + return BigInt::parse_bytes(s[2..].as_bytes(), radix); + } + + return BigInt::parse_bytes(s.as_bytes(), 10); +} + /// port from [closure compiler](https://github.com/google/closure-compiler/blob/a4c880032fba961f7a6c06ef99daa3641810bfdd/src/com/google/javascript/jscomp/NodeUtil.java#L348) /// Gets the value of a node as a Number, or None if it cannot be converted. /// This method does not consider whether `expr` may have side effects. @@ -277,6 +314,45 @@ pub fn get_number_value(expr: &Expression) -> Option { } } +#[allow(clippy::cast_possible_truncation)] +pub fn get_bigint_value(expr: &Expression) -> Option { + match expr { + Expression::NumberLiteral(number_literal) => { + let value = number_literal.value; + if value.abs() < 2_f64.powi(53) && is_exact_int64(value) { + Some(BigInt::from(value as i64)) + } else { + None + } + } + Expression::BigintLiteral(bigint_literal) => Some(bigint_literal.value.clone()), + Expression::BooleanLiteral(bool_literal) => { + if bool_literal.value { + Some(BigInt::one()) + } else { + Some(BigInt::zero()) + } + } + Expression::UnaryExpression(unary_expr) => match unary_expr.operator { + UnaryOperator::LogicalNot => get_boolean_value(expr) + .map(|boolean| if boolean { BigInt::one() } else { BigInt::zero() }), + UnaryOperator::UnaryNegation => { + get_bigint_value(&unary_expr.argument).map(std::ops::Neg::neg) + } + UnaryOperator::BitwiseNot => { + get_bigint_value(&unary_expr.argument).map(std::ops::Not::not) + } + UnaryOperator::UnaryPlus => get_bigint_value(&unary_expr.argument), + _ => None, + }, + Expression::StringLiteral(string_literal) => get_string_bigint_value(&string_literal.value), + Expression::TemplateLiteral(_) => { + get_string_value(expr).and_then(|value| get_string_bigint_value(&value)) + } + _ => None, + } +} + /// port from [closure compiler](https://github.com/google/closure-compiler/blob/a4c880032fba961f7a6c06ef99daa3641810bfdd/src/com/google/javascript/jscomp/AbstractPeepholeOptimization.java#L104-L114) /// Returns the number value of the node if it has one and it cannot have side effects. pub fn get_side_free_number_value(expr: &Expression) -> Option { @@ -285,10 +361,17 @@ pub fn get_side_free_number_value(expr: &Expression) -> Option { // and there are only a very few cases where we can compute a number value, but there could // also be side effects. e.g. `void doSomething()` has value NaN, regardless of the behavior // of `doSomething()` - if value.is_some() && !expr.may_have_side_effects() { - return value; - } - None + if value.is_some() && expr.may_have_side_effects() { None } else { value } +} + +/// port from [closure compiler](https://github.com/google/closure-compiler/blob/master/src/com/google/javascript/jscomp/AbstractPeepholeOptimization.java#L121) +pub fn get_side_free_bigint_value(expr: &Expression) -> Option { + let value = get_bigint_value(expr); + // Calculating the bigint value, if any, is likely to be faster than calculating side effects, + // and there are only a very few cases where we can compute a bigint value, but there could + // also be side effects. e.g. `void doSomething()` has value NaN, regardless of the behavior + // of `doSomething()` + if value.is_some() && expr.may_have_side_effects() { None } else { value } } /// port from [closure compiler](https://github.com/google/closure-compiler/blob/a4c880032fba961f7a6c06ef99daa3641810bfdd/src/com/google/javascript/jscomp/NodeUtil.java#L109) @@ -296,8 +379,6 @@ pub fn get_side_free_number_value(expr: &Expression) -> Option { /// such value can be determined by static analysis. /// This method does not consider whether the node may have side-effects. pub fn get_boolean_value(expr: &Expression) -> Option { - use num_traits::Zero; - match expr { Expression::RegExpLiteral(_) | Expression::ArrayExpression(_) diff --git a/crates/oxc_minifier/src/compressor/fold.rs b/crates/oxc_minifier/src/compressor/fold.rs index 93eb44218..909a104a4 100644 --- a/crates/oxc_minifier/src/compressor/fold.rs +++ b/crates/oxc_minifier/src/compressor/fold.rs @@ -4,11 +4,12 @@ use std::{cmp::Ordering, mem, ops::Not}; +use num_bigint::BigInt; #[allow(clippy::wildcard_imports)] use oxc_hir::hir::*; use oxc_hir::hir_util::{ - get_boolean_value, get_number_value, get_side_free_number_value, get_side_free_string_value, - IsLiteralValue, MayHaveSideEffects, NumberValue, + get_boolean_value, get_number_value, get_side_free_bigint_value, get_side_free_number_value, + get_side_free_string_value, is_exact_int64, IsLiteralValue, MayHaveSideEffects, NumberValue, }; use oxc_span::{Atom, GetSpan, Span}; use oxc_syntax::{ @@ -35,9 +36,67 @@ impl Tri { } } + pub fn xor(self, other: Self) -> Self { + self.for_int(-self.value() * other.value()) + } + + pub fn for_int(self, int: i8) -> Self { + match int { + -1 => Self::False, + 1 => Self::True, + _ => Self::Unknown, + } + } + pub fn for_boolean(boolean: bool) -> Self { if boolean { Self::True } else { Self::False } } + + pub fn value(self) -> i8 { + match self { + Self::True => 1, + Self::False => -1, + Self::Unknown => 0, + } + } +} + +/// ported from [closure compiler](https://github.com/google/closure-compiler/blob/master/src/com/google/javascript/jscomp/PeepholeFoldConstants.java#L1250) +#[allow(clippy::cast_possible_truncation)] +fn bigint_less_than_number( + bigint_value: &BigInt, + number_value: &NumberValue, + invert: Tri, + will_negative: bool, +) -> Tri { + // if invert is false, then the number is on the right in tryAbstractRelationalComparison + // if it's true, then the number is on the left + match number_value { + NumberValue::NaN => Tri::for_boolean(will_negative), + NumberValue::PositiveInfinity => Tri::True.xor(invert), + NumberValue::NegativeInfinity => Tri::False.xor(invert), + NumberValue::Number(num) => { + if let Some(Ordering::Equal | Ordering::Greater) = + num.abs().partial_cmp(&2_f64.powi(53)) + { + Tri::Unknown + } else { + let number_as_bigint = BigInt::from(*num as i64); + + match bigint_value.cmp(&number_as_bigint) { + Ordering::Less => Tri::True.xor(invert), + Ordering::Greater => Tri::False.xor(invert), + Ordering::Equal => { + if is_exact_int64(*num) { + Tri::False + } else { + Tri::for_boolean(num.is_sign_positive()).xor(invert) + } + } + } + } + } + } } /// JavaScript Language Type @@ -209,6 +268,15 @@ impl<'a> Compressor<'a> { return Tri::True; } + if matches!(left, Ty::BigInt) || matches!(right, Ty::BigInt) { + let left_bigint = get_side_free_bigint_value(left_expr); + let right_bigint = get_side_free_bigint_value(right_expr); + + if let Some(l_big) = left_bigint && let Some(r_big) = right_bigint { + return Tri::for_boolean(l_big.eq(&r_big)); + } + } + return Tri::False; } Tri::Unknown @@ -248,15 +316,33 @@ impl<'a> Compressor<'a> { } } - // try comparing as Numbers. + let left_bigint = get_side_free_bigint_value(left_expr); + let right_bigint = get_side_free_bigint_value(right_expr); + let left_num = get_side_free_number_value(left_expr); let right_num = get_side_free_number_value(right_expr); - if let Some(left_num) = left_num && let Some(right_num) = right_num { - match (left_num, right_num) { - (NumberValue::NaN, _) | (_, NumberValue::NaN) => return Tri::for_boolean(will_negative), - (NumberValue::Number(left_num), NumberValue::Number(right_num)) => return Tri::for_boolean(left_num < right_num), - _ => {} + + match (left_bigint, right_bigint, left_num, right_num) { + // Next, try to evaluate based on the value of the node. Try comparing as BigInts first. + (Some(l_big), Some(r_big), _, _) => { + return Tri::for_boolean(l_big < r_big); } + // try comparing as Numbers. + (_, _, Some(l_num), Some(r_num)) => match (l_num, r_num) { + (NumberValue::NaN, _) | (_, NumberValue::NaN) => { + return Tri::for_boolean(will_negative); + } + (NumberValue::Number(l), NumberValue::Number(r)) => return Tri::for_boolean(l < r), + _ => {} + }, + // Finally, try comparisons between BigInt and Number. + (Some(l_big), _, _, Some(r_num)) => { + return bigint_less_than_number(&l_big, &r_num, Tri::False, will_negative); + } + (_, Some(r_big), Some(l_num), _) => { + return bigint_less_than_number(&r_big, &l_num, Tri::True, will_negative); + } + _ => {} } Tri::Unknown diff --git a/crates/oxc_minifier/tests/closure/fold_constants.rs b/crates/oxc_minifier/tests/closure/fold_constants.rs index f12422358..a58bbfeb5 100644 --- a/crates/oxc_minifier/tests/closure/fold_constants.rs +++ b/crates/oxc_minifier/tests/closure/fold_constants.rs @@ -1,6 +1,6 @@ //! -use crate::{test, test_same, test_without_compress_booleans}; +use crate::{test, test_same, test_without_compress_booleans as test_wcb}; #[test] fn undefined_comparison1() { @@ -153,6 +153,50 @@ fn test_string_string_comparison() { test_same("''+x===''+x"); // potentially foldable } +#[test] +fn test_bigint_number_comparison() { + test_wcb("1n < 2", "true"); + test_wcb("1n > 2", "false"); + test_wcb("1n == 1", "true"); + test_wcb("1n == 2", "false"); + + // comparing with decimals is allowed + test_wcb("1n < 1.1", "true"); + test_wcb("1n < 1.9", "true"); + test_wcb("1n < 0.9", "false"); + test_wcb("-1n < -1.1", "false"); + test_wcb("-1n < -1.9", "false"); + test_wcb("-1n < -0.9", "true"); + test_wcb("1n > 1.1", "false"); + test_wcb("1n > 0.9", "true"); + test_wcb("-1n > -1.1", "true"); + test_wcb("-1n > -0.9", "false"); + + // Don't fold unsafely large numbers because there might be floating-point error + let max_safe_int = 9_007_199_254_740_991_i64; + let neg_max_safe_int = -9_007_199_254_740_991_i64; + let max_safe_float = 9_007_199_254_740_991_f64; + let neg_max_safe_float = -9_007_199_254_740_991_f64; + test_wcb(&format!("0n > {max_safe_int}"), "false"); + test_wcb(&format!("0n < {max_safe_int}"), "true"); + test_wcb(&format!("0n > {neg_max_safe_int}"), "true"); + test_wcb(&format!("0n < {neg_max_safe_int}"), "false"); + test_wcb(&format!("0n > {max_safe_float}"), "false"); + test_wcb(&format!("0n < {max_safe_float}"), "true"); + test_wcb(&format!("0n > {neg_max_safe_float}"), "true"); + test_wcb(&format!("0n < {neg_max_safe_float}"), "false"); + + // comparing with Infinity is allowed + test_wcb("1n < Infinity", "true"); + test_wcb("1n > Infinity", "false"); + test_wcb("1n < -Infinity", "false"); + test_wcb("1n > -Infinity", "true"); + + // null is interpreted as 0 when comparing with bigint + test_wcb("1n < null", "false"); + test_wcb("1n > null", "true"); +} + #[test] fn js_typeof() { test("x = typeof 1", "x='number'"); @@ -215,7 +259,7 @@ fn unary_ops() { fn unary_with_big_int() { test("-(1n)", "-1n"); test("- -1n", "1n"); - test_without_compress_booleans("!1n", "false"); + test_wcb("!1n", "false"); test("~0n", "-1n"); } @@ -252,8 +296,8 @@ fn test_fold_logical_op() { test("a = b ? x && true : c", "a=b?x&&!0:c"); // folded, but not here. - test_without_compress_booleans("a = x || false ? b : c", "a=x||false?b:c"); - test_without_compress_booleans("a = x && true ? b : c", "a=x&&true?b:c"); + test_wcb("a = x || false ? b : c", "a=x||false?b:c"); + test_wcb("a = x && true ? b : c", "a=x&&true?b:c"); test("x = foo() || true || bar()", "x=foo()||!0"); test("x = foo() || true && bar()", "x=foo()||bar()"); @@ -291,8 +335,8 @@ fn test_fold_logical_op() { // An example would be if foo() is 1 (truthy) and bar() is 0 (falsey): // (1 && true) || 0 == true // 1 || 0 == 1, but true =/= 1 - test_without_compress_booleans("x=foo()&&true||bar()", "x=foo()&&true||bar()"); - test_without_compress_booleans("foo()&&true||bar()", "foo()&&true||bar()"); + test_wcb("x=foo()&&true||bar()", "x=foo()&&true||bar()"); + test_wcb("foo()&&true||bar()", "foo()&&true||bar()"); } #[test]