mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 12:19:15 +00:00
fix(regular_expression): Keep LegacyOctalEscape raw digits for to_string (#5692)
Fixes #5690 - Update `CharacterKind` enum from `Octal` to `Octal1`, `Octal2` and `Octal3` - Stylistic refactoring for `impl Display`
This commit is contained in:
parent
9e9435f03b
commit
304ce25446
6 changed files with 149 additions and 117 deletions
|
|
@ -174,10 +174,13 @@ pub enum CharacterKind {
|
|||
HexadecimalEscape = 1,
|
||||
Identifier = 2,
|
||||
Null = 3,
|
||||
Octal = 4,
|
||||
SingleEscape = 5,
|
||||
Symbol = 6,
|
||||
UnicodeEscape = 7,
|
||||
// To distinguish leading 0 cases like `\00` and `\000`
|
||||
Octal1 = 4,
|
||||
Octal2 = 5,
|
||||
Octal3 = 6,
|
||||
SingleEscape = 7,
|
||||
Symbol = 8,
|
||||
UnicodeEscape = 9,
|
||||
}
|
||||
|
||||
/// Character class.
|
||||
|
|
|
|||
|
|
@ -691,12 +691,21 @@ impl<'a> PatternParser<'a> {
|
|||
}));
|
||||
}
|
||||
|
||||
// e.g. \18
|
||||
// e.g. \1, \00, \000
|
||||
if !self.state.unicode_mode {
|
||||
if let Some(cp) = self.consume_legacy_octal_escape_sequence() {
|
||||
let span_end = self.reader.offset();
|
||||
// Keep original digits for `to_string()`
|
||||
// Otherwise `\0022`(octal \002 + symbol 2) will be `\22`(octal \22)
|
||||
let digits = span_end - span_start - 1; // -1 for '\'
|
||||
|
||||
return Ok(Some(ast::Character {
|
||||
span: self.span_factory.create(span_start, self.reader.offset()),
|
||||
kind: ast::CharacterKind::Octal,
|
||||
span: self.span_factory.create(span_start, span_end),
|
||||
kind: (match digits {
|
||||
3 => ast::CharacterKind::Octal3,
|
||||
2 => ast::CharacterKind::Octal2,
|
||||
_ => ast::CharacterKind::Octal1,
|
||||
}),
|
||||
value: cp,
|
||||
}));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,24 +16,23 @@ impl<'a> Display for RegularExpression<'a> {
|
|||
impl Display for Flags {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let mut flags = String::with_capacity(8);
|
||||
macro_rules! if_true_append {
|
||||
($flag:ident, $char:literal) => {
|
||||
if self.$flag {
|
||||
flags.push($char);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// write flags in the order they are described in the `MDN`
|
||||
// <https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#advanced_searching_with_flags>
|
||||
if_true_append!(has_indices, 'd');
|
||||
if_true_append!(global, 'g');
|
||||
if_true_append!(ignore_case, 'i');
|
||||
if_true_append!(multiline, 'm');
|
||||
if_true_append!(dot_all, 's');
|
||||
if_true_append!(unicode, 'u');
|
||||
if_true_append!(unicode_sets, 'v');
|
||||
if_true_append!(sticky, 'y');
|
||||
for (v, ch) in [
|
||||
(self.has_indices, 'd'),
|
||||
(self.global, 'g'),
|
||||
(self.ignore_case, 'i'),
|
||||
(self.multiline, 'm'),
|
||||
(self.dot_all, 's'),
|
||||
(self.unicode, 'u'),
|
||||
(self.unicode_sets, 'v'),
|
||||
(self.sticky, 'y'),
|
||||
] {
|
||||
if v {
|
||||
flags.push(ch);
|
||||
}
|
||||
}
|
||||
|
||||
write!(f, "{flags}")
|
||||
}
|
||||
|
|
@ -60,14 +59,17 @@ impl<'a> Display for Alternative<'a> {
|
|||
None
|
||||
}
|
||||
}
|
||||
|
||||
write_join_with(f, "", &self.body, |iter| {
|
||||
let next = iter.next()?;
|
||||
let Some(next) = as_character(next) else { return Some(next.to_string()) };
|
||||
|
||||
let peek = iter.peek().and_then(|it| as_character(it));
|
||||
let (result, eat) = character_to_string(next, peek);
|
||||
if eat {
|
||||
_ = iter.next();
|
||||
iter.next();
|
||||
}
|
||||
|
||||
Some(result)
|
||||
})
|
||||
}
|
||||
|
|
@ -208,25 +210,30 @@ impl<'a> Display for CharacterClass<'a> {
|
|||
None
|
||||
}
|
||||
}
|
||||
|
||||
write!(f, "[")?;
|
||||
|
||||
if !self.body.is_empty() {
|
||||
if self.negative {
|
||||
write!(f, "^")?;
|
||||
}
|
||||
|
||||
let sep = match self.kind {
|
||||
CharacterClassContentsKind::Union => "",
|
||||
CharacterClassContentsKind::Subtraction => "--",
|
||||
CharacterClassContentsKind::Intersection => "&&",
|
||||
};
|
||||
|
||||
write_join_with(f, sep, &self.body, |iter| {
|
||||
let next = iter.next()?;
|
||||
let Some(next) = as_character(next) else { return Some(next.to_string()) };
|
||||
|
||||
let peek = iter.peek().and_then(|it| as_character(it));
|
||||
let (result, eat) = character_to_string(next, peek);
|
||||
if eat {
|
||||
_ = iter.next();
|
||||
iter.next();
|
||||
}
|
||||
|
||||
Some(result)
|
||||
})?;
|
||||
}
|
||||
|
|
@ -270,12 +277,14 @@ impl<'a> Display for ClassString<'a> {
|
|||
|
||||
impl<'a> Display for CapturingGroup<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let body = &self.body;
|
||||
write!(f, "(")?;
|
||||
|
||||
if let Some(name) = &self.name {
|
||||
write!(f, "(?<{name}>{body})")
|
||||
} else {
|
||||
write!(f, "({body})")
|
||||
write!(f, "?<{name}>")?;
|
||||
}
|
||||
write!(f, "{}", &self.body)?;
|
||||
|
||||
write!(f, ")")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -299,12 +308,14 @@ impl<'a> Display for IgnoreGroup<'a> {
|
|||
}
|
||||
|
||||
write!(f, "(?")?;
|
||||
|
||||
if let Some(enabling) = &self.enabling_modifiers {
|
||||
write_flags(f, '\0', enabling)?;
|
||||
}
|
||||
if let Some(disabling) = &self.disabling_modifiers {
|
||||
write_flags(f, '-', disabling)?;
|
||||
}
|
||||
|
||||
write!(f, ":{})", self.body)
|
||||
}
|
||||
}
|
||||
|
|
@ -321,6 +332,88 @@ impl<'a> Display for NamedReference<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// ---
|
||||
|
||||
fn character_to_string(
|
||||
this: &Character,
|
||||
peek: Option<&Character>,
|
||||
) -> (/* result */ String, /* true of peek should be consumed */ bool) {
|
||||
let cp = this.value;
|
||||
|
||||
if matches!(this.kind, CharacterKind::Symbol | CharacterKind::UnicodeEscape) {
|
||||
// Trail only
|
||||
if is_trail_surrogate(cp) {
|
||||
return (format!(r"\u{cp:X}"), false);
|
||||
}
|
||||
|
||||
if is_lead_surrogate(cp) {
|
||||
if let Some(peek) = peek.filter(|peek| is_trail_surrogate(peek.value)) {
|
||||
// Lead+Trail
|
||||
let cp = combine_surrogate_pair(cp, peek.value);
|
||||
let ch = char::from_u32(cp).expect("Invalid surrogate pair `Character`!");
|
||||
return (format!("{ch}"), true);
|
||||
}
|
||||
|
||||
// Lead only
|
||||
return (format!(r"\u{cp:X}"), false);
|
||||
}
|
||||
}
|
||||
|
||||
let ch = char::from_u32(cp).expect("Invalid `Character`!");
|
||||
let result = match this.kind {
|
||||
// Not a surrogate, like BMP, or all units in unicode mode
|
||||
CharacterKind::Symbol => format!("{ch}"),
|
||||
CharacterKind::ControlLetter => match ch {
|
||||
'\n' => r"\cJ".to_string(),
|
||||
'\r' => r"\cM".to_string(),
|
||||
'\t' => r"\cI".to_string(),
|
||||
_ => format!(r"\c{ch}"),
|
||||
},
|
||||
CharacterKind::Identifier => {
|
||||
format!(r"\{ch}")
|
||||
}
|
||||
CharacterKind::SingleEscape => match ch {
|
||||
'\n' => String::from(r"\n"),
|
||||
'\r' => String::from(r"\r"),
|
||||
'\t' => String::from(r"\t"),
|
||||
'\u{b}' => String::from(r"\v"),
|
||||
'\u{c}' => String::from(r"\f"),
|
||||
'\u{8}' => String::from(r"\b"),
|
||||
'\u{2D}' => String::from(r"\-"),
|
||||
_ => format!(r"\{ch}"),
|
||||
},
|
||||
CharacterKind::Null => String::from(r"\0"),
|
||||
CharacterKind::UnicodeEscape => {
|
||||
let hex = &format!("{cp:04X}");
|
||||
if hex.len() <= 4 {
|
||||
format!(r"\u{hex}")
|
||||
} else {
|
||||
format!(r"\u{{{hex}}}")
|
||||
}
|
||||
}
|
||||
CharacterKind::HexadecimalEscape => {
|
||||
let hex = &format!("{cp:02X}");
|
||||
format!(r"\x{hex}")
|
||||
}
|
||||
CharacterKind::Octal1 => {
|
||||
let octal = format!("{cp:o}");
|
||||
format!(r"\{octal}")
|
||||
}
|
||||
CharacterKind::Octal2 => {
|
||||
let octal = format!("{cp:02o}");
|
||||
format!(r"\{octal}")
|
||||
}
|
||||
CharacterKind::Octal3 => {
|
||||
let octal = format!("{cp:03o}");
|
||||
format!(r"\{octal}")
|
||||
}
|
||||
};
|
||||
|
||||
(result, false)
|
||||
}
|
||||
|
||||
// ---
|
||||
|
||||
fn write_join<S, I, E>(f: &mut fmt::Formatter<'_>, sep: S, items: I) -> fmt::Result
|
||||
where
|
||||
S: AsRef<str>,
|
||||
|
|
@ -351,78 +444,9 @@ where
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn character_to_string(
|
||||
this: &Character,
|
||||
peek: Option<&Character>,
|
||||
) -> (/* result */ String, /* true of peek should be consumed */ bool) {
|
||||
let cp = this.value;
|
||||
|
||||
if matches!(this.kind, CharacterKind::Symbol | CharacterKind::UnicodeEscape) {
|
||||
// Trail only
|
||||
if is_trail_surrogate(cp) {
|
||||
return (format!(r"\u{cp:X}"), false);
|
||||
}
|
||||
|
||||
if is_lead_surrogate(cp) {
|
||||
if let Some(peek) = peek.filter(|peek| is_trail_surrogate(peek.value)) {
|
||||
// Lead+Trail
|
||||
let cp = combine_surrogate_pair(cp, peek.value);
|
||||
let ch = char::from_u32(cp).expect("Invalid surrogate pair `Character`!");
|
||||
return (format!("{ch}"), true);
|
||||
}
|
||||
|
||||
// Lead only
|
||||
return (format!(r"\u{cp:X}"), false);
|
||||
}
|
||||
}
|
||||
|
||||
let ch = char::from_u32(cp).expect("Invalid `Character`!");
|
||||
let result = match this.kind {
|
||||
CharacterKind::ControlLetter => match ch {
|
||||
'\n' => r"\cJ".to_string(),
|
||||
'\r' => r"\cM".to_string(),
|
||||
'\t' => r"\cI".to_string(),
|
||||
_ => format!(r"\c{ch}"),
|
||||
},
|
||||
CharacterKind::Identifier => {
|
||||
format!(r"\{ch}")
|
||||
}
|
||||
// Not a surrogate, like BMP, or all units in unicode mode
|
||||
CharacterKind::Symbol => format!("{ch}"),
|
||||
CharacterKind::Null => String::from(r"\0"),
|
||||
CharacterKind::UnicodeEscape => {
|
||||
let hex = &format!("{cp:04X}");
|
||||
if hex.len() <= 4 {
|
||||
format!(r"\u{hex}")
|
||||
} else {
|
||||
format!(r"\u{{{hex}}}")
|
||||
}
|
||||
}
|
||||
CharacterKind::HexadecimalEscape => {
|
||||
let hex = &format!("{cp:02X}");
|
||||
format!(r"\x{hex}")
|
||||
}
|
||||
CharacterKind::Octal => {
|
||||
let octal = format!("{cp:o}");
|
||||
format!(r"\{octal}")
|
||||
}
|
||||
CharacterKind::SingleEscape => match ch {
|
||||
'\n' => String::from(r"\n"),
|
||||
'\r' => String::from(r"\r"),
|
||||
'\t' => String::from(r"\t"),
|
||||
'\u{b}' => String::from(r"\v"),
|
||||
'\u{c}' => String::from(r"\f"),
|
||||
'\u{8}' => String::from(r"\b"),
|
||||
'\u{2D}' => String::from(r"\-"),
|
||||
_ => format!(r"\{ch}"),
|
||||
},
|
||||
};
|
||||
|
||||
(result, false)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::{Parser, ParserOptions};
|
||||
use oxc_allocator::Allocator;
|
||||
|
||||
type Case<'a> = (
|
||||
|
|
@ -505,23 +529,24 @@ mod test {
|
|||
(r"/\5/", None),
|
||||
(r"/\6/", None),
|
||||
(r"/\7/", None),
|
||||
// Remove leading zeroes --
|
||||
(r"/\00/", Some(r"/\0/")),
|
||||
(r"/\07/", Some(r"/\7/")),
|
||||
// --
|
||||
(r"/\00/", None),
|
||||
(r"/\07/", None),
|
||||
(r"/\30/", None),
|
||||
(r"/\37/", None),
|
||||
(r"/\40/", None),
|
||||
(r"/\47/", None),
|
||||
(r"/\70/", None),
|
||||
(r"/\77/", None),
|
||||
// Remove leading zeroes --
|
||||
(r"/\000/", Some(r"/\0/")),
|
||||
(r"/\007/", Some(r"/\7/")),
|
||||
(r"/\070/", Some(r"/\70/")),
|
||||
// --
|
||||
(r"/\000/", None),
|
||||
(r"/\007/", None),
|
||||
(r"/\070/", None),
|
||||
(r"/\300/", None),
|
||||
(r"/\307/", None),
|
||||
(r"/\370/", None),
|
||||
(r"/\377/", None),
|
||||
(r"/\0111/", None),
|
||||
(r"/\0022/", None),
|
||||
(r"/\0003/", None),
|
||||
(r"/(.)\1/", None),
|
||||
// Identity escape from: <https://github.com/tc39/test262/blob/d62fa93c8f9ce5e687c0bbaa5d2b59670ab2ff60/test/annexB/language/literals/regexp/identity-escape.js>
|
||||
(r"/\C/", None),
|
||||
|
|
@ -553,7 +578,6 @@ mod test {
|
|||
];
|
||||
|
||||
fn test_display(allocator: &Allocator, (source, expect): &Case) {
|
||||
use crate::{Parser, ParserOptions};
|
||||
let expect = expect.unwrap_or(source);
|
||||
let actual = Parser::new(allocator, source, ParserOptions::default()).parse().unwrap();
|
||||
assert_eq!(expect, actual.to_string());
|
||||
|
|
|
|||
|
|
@ -171,7 +171,9 @@ impl<'alloc> CloneIn<'alloc> for CharacterKind {
|
|||
Self::HexadecimalEscape => CharacterKind::HexadecimalEscape,
|
||||
Self::Identifier => CharacterKind::Identifier,
|
||||
Self::Null => CharacterKind::Null,
|
||||
Self::Octal => CharacterKind::Octal,
|
||||
Self::Octal1 => CharacterKind::Octal1,
|
||||
Self::Octal2 => CharacterKind::Octal2,
|
||||
Self::Octal3 => CharacterKind::Octal3,
|
||||
Self::SingleEscape => CharacterKind::SingleEscape,
|
||||
Self::Symbol => CharacterKind::Symbol,
|
||||
Self::UnicodeEscape => CharacterKind::UnicodeEscape,
|
||||
|
|
|
|||
|
|
@ -2,11 +2,8 @@ commit: d62fa93c
|
|||
|
||||
parser_test262 Summary:
|
||||
AST Parsed : 43765/43765 (100.00%)
|
||||
Positive Passed: 43764/43765 (100.00%)
|
||||
Positive Passed: 43765/43765 (100.00%)
|
||||
Negative Passed: 4237/4237 (100.00%)
|
||||
Expect to Parse: tasks/coverage/test262/test/annexB/language/literals/regexp/legacy-octal-escape.js
|
||||
|
||||
× Regular Expression mismatch: \03 \3
|
||||
|
||||
× '0'-prefixed octal literals and octal escape sequences are deprecated
|
||||
╭─[test262/test/annexB/language/expressions/template-literal/legacy-octal-escape-sequence-strict.js:19:4]
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ commit: d62fa93c
|
|||
|
||||
semantic_test262 Summary:
|
||||
AST Parsed : 43765/43765 (100.00%)
|
||||
Positive Passed: 43564/43765 (99.54%)
|
||||
Positive Passed: 43565/43765 (99.54%)
|
||||
tasks/coverage/test262/test/annexB/language/function-code/if-decl-else-decl-a-func-block-scoping.js
|
||||
semantic error: Symbol scope ID mismatch:
|
||||
after transform: SymbolId(3): ScopeId(4294967294)
|
||||
|
|
@ -1119,9 +1119,6 @@ semantic error: Symbol scope ID mismatch:
|
|||
after transform: SymbolId(0): ScopeId(4294967294)
|
||||
rebuilt : SymbolId(0): ScopeId(4294967294)
|
||||
|
||||
tasks/coverage/test262/test/annexB/language/literals/regexp/legacy-octal-escape.js
|
||||
semantic error: Regular Expression mismatch: \03 \3
|
||||
|
||||
tasks/coverage/test262/test/language/module-code/eval-rqstd-once.js
|
||||
semantic error: Bindings mismatch:
|
||||
after transform: ScopeId(0): ["dflt1", "dflt2", "dflt3", "global", "ns1", "ns3"]
|
||||
|
|
|
|||
Loading…
Reference in a new issue