fix(regular_expression): Prevent panic on too large number (#5282)

Partially close #5257

Use `checked_(mul|add)` to prevent panic.
This commit is contained in:
leaysgur 2024-08-28 01:31:54 +00:00
parent d71f0ed2d0
commit cffce11620
2 changed files with 42 additions and 20 deletions

View file

@ -141,6 +141,9 @@ mod test {
("a{,", ParserOptions::default().with_unicode_mode()),
("x{9007199254740992}", ParserOptions::default()),
("x{9007199254740991,9007199254740992}", ParserOptions::default()),
("x{99999999999999999999999999999999999999999999999999}", ParserOptions::default()),
(r"\99999999999999999999999999999999999999999999999999", ParserOptions::default()),
(r"\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}", ParserOptions::default().with_unicode_mode()),
("(?=a", ParserOptions::default()),
("(?<!a", ParserOptions::default()),
(r"\c0", ParserOptions::default().with_unicode_mode()),

View file

@ -459,7 +459,7 @@ impl<'a> PatternParser<'a> {
let checkpoint = self.reader.checkpoint();
// DecimalEscape: \1 means indexed reference
if let Some(index) = self.consume_decimal_escape() {
if let Some(index) = self.consume_decimal_escape()? {
if self.state.unicode_mode {
// [SS:EE] AtomEscape :: DecimalEscape
// It is a Syntax Error if the CapturingGroupNumber of DecimalEscape is strictly greater than CountLeftCapturingParensWithin(the Pattern containing AtomEscape).
@ -1582,7 +1582,7 @@ impl<'a> PatternParser<'a> {
let span_start = self.reader.offset();
let checkpoint = self.reader.checkpoint();
if self.reader.eat('{') {
if let Some(min) = self.consume_decimal_digits() {
if let Some(min) = self.consume_decimal_digits()? {
if self.reader.eat('}') {
if MAX_QUANTIFIER < min {
return Err(OxcDiagnostic::error(
@ -1608,7 +1608,7 @@ impl<'a> PatternParser<'a> {
return Ok(Some(((min, None), is_greedy(&mut self.reader))));
}
if let Some(max) = self.consume_decimal_digits() {
if let Some(max) = self.consume_decimal_digits()? {
if self.reader.eat('}') {
if max < min {
// [SS:EE] QuantifierPrefix :: { DecimalDigits , DecimalDigits }
@ -1645,20 +1645,20 @@ impl<'a> PatternParser<'a> {
// DecimalEscape ::
// NonZeroDigit DecimalDigits[~Sep][opt] [lookahead ∉ DecimalDigit]
// ```
fn consume_decimal_escape(&mut self) -> Option<u32> {
fn consume_decimal_escape(&mut self) -> Result<Option<u32>> {
let checkpoint = self.reader.checkpoint();
if let Some(index) = self.consume_decimal_digits() {
if let Some(index) = self.consume_decimal_digits()? {
// \0 is CharacterEscape, not DecimalEscape
if index != 0 {
#[allow(clippy::cast_possible_truncation)]
return Some(index as u32);
return Ok(Some(index as u32));
}
self.reader.rewind(checkpoint);
}
None
Ok(None)
}
// ```
@ -1668,23 +1668,33 @@ impl<'a> PatternParser<'a> {
// [+Sep] DecimalDigits[+Sep] NumericLiteralSeparator DecimalDigit
// ```
// ([Sep] is disabled for `QuantifierPrefix` and `DecimalEscape`, skip it)
fn consume_decimal_digits(&mut self) -> Option<u64> {
fn consume_decimal_digits(&mut self) -> Result<Option<u64>> {
let span_start = self.reader.offset();
let checkpoint = self.reader.checkpoint();
let mut value = 0;
let mut value: u64 = 0;
while let Some(cp) = self.reader.peek().filter(|&cp| unicode::is_decimal_digit(cp)) {
// `- '0' as u32`: convert code point to digit
#[allow(clippy::cast_lossless)]
let d = (cp - '0' as u32) as u64;
value = (10 * value) + d;
self.reader.advance();
// To prevent panic on overflow cases like `\999999999999999999999`, `a{999999999999999999999}`
if let Some(v) = value.checked_mul(10).and_then(|v| v.checked_add(d)) {
value = v;
self.reader.advance();
} else {
return Err(OxcDiagnostic::error(
"Invalid regular expression: Number is too large in decimal digits",
)
.with_label(self.span_factory.create(span_start, self.reader.offset())));
}
}
if self.reader.checkpoint() != checkpoint {
return Some(value);
return Ok(Some(value));
}
None
Ok(None)
}
// ```
@ -2014,7 +2024,7 @@ impl<'a> PatternParser<'a> {
if self.reader.eat('{') {
if let Some(hex_digits) =
self.consume_hex_digits().filter(|&cp| unicode::is_valid_unicode(cp))
self.consume_hex_digits()?.filter(|&cp| unicode::is_valid_unicode(cp))
{
if self.reader.eat('}') {
return Ok(Some(hex_digits));
@ -2150,20 +2160,29 @@ impl<'a> PatternParser<'a> {
Some(cp)
}
fn consume_hex_digits(&mut self) -> Option<u32> {
fn consume_hex_digits(&mut self) -> Result<Option<u32>> {
let span_start = self.reader.offset();
let checkpoint = self.reader.checkpoint();
let mut value = 0;
let mut value: u32 = 0;
while let Some(hex) = self.reader.peek().and_then(unicode::map_hex_digit) {
value = (16 * value) + hex;
self.reader.advance();
// To prevent panic on overflow cases like `\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}`
if let Some(v) = value.checked_mul(16).and_then(|v| v.checked_add(hex)) {
value = v;
self.reader.advance();
} else {
return Err(OxcDiagnostic::error(
"Invalid regular expression: Number is too large in hex digits",
)
.with_label(self.span_factory.create(span_start, self.reader.offset())));
}
}
if self.reader.checkpoint() != checkpoint {
return Some(value);
return Ok(Some(value));
}
None
Ok(None)
}
fn consume_fixed_hex_digits(&mut self, len: usize) -> Option<u32> {