mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 04:08:41 +00:00
fix(regular_expression): Report more MayContainStrings error in (nested)class (#5661)
Fixes #5632
This commit is contained in:
parent
febb29f7f5
commit
0511d55aa8
8 changed files with 113 additions and 59 deletions
|
|
@ -1507,6 +1507,7 @@ const _: () = {
|
|||
assert!(offset_of!(CharacterClass, span) == 0usize);
|
||||
assert!(offset_of!(CharacterClass, negative) == 8usize);
|
||||
assert!(offset_of!(CharacterClass, kind) == 9usize);
|
||||
assert!(offset_of!(CharacterClass, strings) == 10usize);
|
||||
assert!(offset_of!(CharacterClass, body) == 16usize);
|
||||
|
||||
assert!(size_of::<CharacterClassContentsKind>() == 1usize);
|
||||
|
|
@ -3061,6 +3062,7 @@ const _: () = {
|
|||
assert!(offset_of!(CharacterClass, span) == 0usize);
|
||||
assert!(offset_of!(CharacterClass, negative) == 8usize);
|
||||
assert!(offset_of!(CharacterClass, kind) == 9usize);
|
||||
assert!(offset_of!(CharacterClass, strings) == 10usize);
|
||||
assert!(offset_of!(CharacterClass, body) == 12usize);
|
||||
|
||||
assert!(size_of::<CharacterClassContentsKind>() == 1usize);
|
||||
|
|
|
|||
|
|
@ -238,6 +238,7 @@ pub struct CharacterClass<'a> {
|
|||
pub span: Span,
|
||||
pub negative: bool,
|
||||
pub kind: CharacterClassContentsKind,
|
||||
pub strings: bool,
|
||||
pub body: Vec<'a, CharacterClassContents<'a>>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -221,6 +221,7 @@ mod test {
|
|||
(r"\P{Basic_Emoji}", ParserOptions::default().with_unicode_sets_mode(), true),
|
||||
(r"[^\p{Basic_Emoji}]", ParserOptions::default().with_unicode_sets_mode(), true),
|
||||
(r"[[^\p{Basic_Emoji}]]", ParserOptions::default().with_unicode_sets_mode(), true),
|
||||
(r"[^\q{}]", ParserOptions::default().with_unicode_sets_mode(), true),
|
||||
(r"[[^\q{}]]", ParserOptions::default().with_unicode_sets_mode(), true),
|
||||
(r"[[^\q{ng}]]", ParserOptions::default().with_unicode_sets_mode(), true),
|
||||
(r"[[^\q{a|}]]", ParserOptions::default().with_unicode_sets_mode(), true),
|
||||
|
|
@ -237,6 +238,12 @@ mod test {
|
|||
(r"[[^\q{ng}--\q{o|k}]]", ParserOptions::default().with_unicode_sets_mode(), true),
|
||||
(r"[[^\q{o|k}--\q{ng}]]", ParserOptions::default().with_unicode_sets_mode(), false),
|
||||
(r"[[z-a]]", ParserOptions::default().with_unicode_sets_mode(), true),
|
||||
(r"[[[[[^[[[[\q{ng}]]]]]]]]]", ParserOptions::default().with_unicode_sets_mode(), true),
|
||||
(
|
||||
r"[^[[[[[[[[[[[[[[[[\q{ng}]]]]]]]]]]]]]]]]]",
|
||||
ParserOptions::default().with_unicode_sets_mode(),
|
||||
true,
|
||||
),
|
||||
] {
|
||||
assert_eq!(
|
||||
PatternParser::new(&allocator, source_text, *options).parse().is_err(),
|
||||
|
|
|
|||
|
|
@ -727,20 +727,11 @@ impl<'a> PatternParser<'a> {
|
|||
let (kind, body) = self.parse_class_contents()?;
|
||||
|
||||
if self.reader.eat(']') {
|
||||
let strings = body.iter().any(PatternParser::may_contain_strings_in_class_contents);
|
||||
|
||||
// [SS:EE] CharacterClass :: [^ ClassContents ]
|
||||
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
|
||||
if negative
|
||||
&& body.iter().any(|item| match item {
|
||||
// MayContainStrings is true
|
||||
// - if ClassContents contains UnicodePropertyValueExpression
|
||||
// - and the UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
|
||||
// - and it is binary property of strings(can be true only with `UnicodeSetsMode`)
|
||||
ast::CharacterClassContents::UnicodePropertyEscape(
|
||||
unicode_property_escape,
|
||||
) => unicode_property_escape.strings,
|
||||
_ => false,
|
||||
})
|
||||
{
|
||||
if negative && strings {
|
||||
return Err(diagnostics::invalid_character_class(
|
||||
self.span_factory.create(span_start, self.reader.offset()),
|
||||
));
|
||||
|
|
@ -750,6 +741,7 @@ impl<'a> PatternParser<'a> {
|
|||
span: self.span_factory.create(span_start, self.reader.offset()),
|
||||
negative,
|
||||
kind,
|
||||
strings,
|
||||
body,
|
||||
}));
|
||||
}
|
||||
|
|
@ -1267,55 +1259,37 @@ impl<'a> PatternParser<'a> {
|
|||
let (kind, body) = self.parse_class_contents()?;
|
||||
|
||||
if self.reader.eat(']') {
|
||||
let strings = match kind {
|
||||
// MayContainStrings is true
|
||||
// - if ClassContents is ClassUnion
|
||||
// - && ClassUnion has ClassOperands
|
||||
// - && at least 1 ClassOperand has MayContainStrings: true
|
||||
ast::CharacterClassContentsKind::Union => {
|
||||
body.iter().any(PatternParser::may_contain_strings_in_class_contents)
|
||||
}
|
||||
// MayContainStrings is true
|
||||
// - if ClassContents is ClassIntersection
|
||||
// - && ClassIntersection has ClassOperands
|
||||
// - && all ClassOperands have MayContainStrings: true
|
||||
ast::CharacterClassContentsKind::Intersection => {
|
||||
body.iter().all(PatternParser::may_contain_strings_in_class_contents)
|
||||
}
|
||||
// MayContainStrings is true
|
||||
// - if ClassContents is ClassSubtraction
|
||||
// - && ClassSubtraction has ClassOperands
|
||||
// - && the first ClassOperand has MayContainStrings: true
|
||||
ast::CharacterClassContentsKind::Subtraction => body
|
||||
.iter()
|
||||
.next()
|
||||
.map_or(false, PatternParser::may_contain_strings_in_class_contents),
|
||||
};
|
||||
|
||||
// [SS:EE] NestedClass :: [^ ClassContents ]
|
||||
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
|
||||
if negative {
|
||||
let may_contain_strings = |item: &ast::CharacterClassContents| match item {
|
||||
// MayContainStrings is true
|
||||
// - if ClassContents contains UnicodePropertyValueExpression
|
||||
// - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
|
||||
// - && it is binary property of strings(can be true only with `UnicodeSetsMode`)
|
||||
ast::CharacterClassContents::UnicodePropertyEscape(
|
||||
unicode_property_escape,
|
||||
) => unicode_property_escape.strings,
|
||||
// MayContainStrings is true
|
||||
// - if ClassStringDisjunction is [empty]
|
||||
// - || if ClassStringDisjunction contains ClassString
|
||||
// - && ClassString is [empty]
|
||||
// - || ClassString contains 2 more ClassSetCharacters
|
||||
ast::CharacterClassContents::ClassStringDisjunction(
|
||||
class_string_disjunction,
|
||||
) => class_string_disjunction.strings,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
if match kind {
|
||||
// MayContainStrings is true
|
||||
// - if ClassContents is ClassUnion
|
||||
// - && ClassUnion has ClassOperands
|
||||
// - && at least 1 ClassOperand has MayContainStrings: true
|
||||
ast::CharacterClassContentsKind::Union => {
|
||||
body.iter().any(may_contain_strings)
|
||||
}
|
||||
// MayContainStrings is true
|
||||
// - if ClassContents is ClassIntersection
|
||||
// - && ClassIntersection has ClassOperands
|
||||
// - && all ClassOperands have MayContainStrings: true
|
||||
ast::CharacterClassContentsKind::Intersection => {
|
||||
body.iter().all(may_contain_strings)
|
||||
}
|
||||
// MayContainStrings is true
|
||||
// - if ClassContents is ClassSubtraction
|
||||
// - && ClassSubtraction has ClassOperands
|
||||
// - && the first ClassOperand has MayContainStrings: true
|
||||
ast::CharacterClassContentsKind::Subtraction => {
|
||||
body.iter().next().map_or(false, may_contain_strings)
|
||||
}
|
||||
} {
|
||||
return Err(diagnostics::character_class_contents_invalid_operands(
|
||||
self.span_factory.create(span_start, self.reader.offset()),
|
||||
));
|
||||
}
|
||||
if negative && strings {
|
||||
return Err(diagnostics::character_class_contents_invalid_operands(
|
||||
self.span_factory.create(span_start, self.reader.offset()),
|
||||
));
|
||||
}
|
||||
|
||||
return Ok(Some(ast::CharacterClassContents::NestedCharacterClass(Box::new_in(
|
||||
|
|
@ -1323,6 +1297,7 @@ impl<'a> PatternParser<'a> {
|
|||
span: self.span_factory.create(span_start, self.reader.offset()),
|
||||
negative,
|
||||
kind,
|
||||
strings,
|
||||
body,
|
||||
},
|
||||
self.allocator,
|
||||
|
|
@ -2185,4 +2160,30 @@ impl<'a> PatternParser<'a> {
|
|||
|
||||
Some(value)
|
||||
}
|
||||
|
||||
// ---
|
||||
|
||||
fn may_contain_strings_in_class_contents(item: &ast::CharacterClassContents) -> bool {
|
||||
match item {
|
||||
// MayContainStrings is true
|
||||
// - if ClassContents contains UnicodePropertyValueExpression
|
||||
// - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
|
||||
// - && it is binary property of strings(can be true only with `UnicodeSetsMode`)
|
||||
ast::CharacterClassContents::UnicodePropertyEscape(unicode_property_escape) => {
|
||||
unicode_property_escape.strings
|
||||
}
|
||||
// MayContainStrings is true
|
||||
// - if ClassStringDisjunction is [empty]
|
||||
// - || if ClassStringDisjunction contains ClassString
|
||||
// - && ClassString is [empty]
|
||||
// - || ClassString contains 2 more ClassSetCharacters
|
||||
ast::CharacterClassContents::ClassStringDisjunction(class_string_disjunction) => {
|
||||
class_string_disjunction.strings
|
||||
}
|
||||
// MayContainStrings is true
|
||||
// - if NestedClass has MayContainStrings: true
|
||||
ast::CharacterClassContents::NestedCharacterClass(nested_class) => nested_class.strings,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -230,6 +230,7 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for CharacterClass<'old_alloc>
|
|||
span: CloneIn::clone_in(&self.span, allocator),
|
||||
negative: CloneIn::clone_in(&self.negative, allocator),
|
||||
kind: CloneIn::clone_in(&self.kind, allocator),
|
||||
strings: CloneIn::clone_in(&self.strings, allocator),
|
||||
body: CloneIn::clone_in(&self.body, allocator),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -179,6 +179,7 @@ impl<'a> ContentEq for CharacterClass<'a> {
|
|||
fn content_eq(&self, other: &Self) -> bool {
|
||||
ContentEq::content_eq(&self.negative, &other.negative)
|
||||
&& ContentEq::content_eq(&self.kind, &other.kind)
|
||||
&& ContentEq::content_eq(&self.strings, &other.strings)
|
||||
&& ContentEq::content_eq(&self.body, &other.body)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -144,6 +144,7 @@ impl<'a> ContentHash for CharacterClass<'a> {
|
|||
fn content_hash<H: Hasher>(&self, state: &mut H) {
|
||||
ContentHash::content_hash(&self.negative, state);
|
||||
ContentHash::content_hash(&self.kind, state);
|
||||
ContentHash::content_hash(&self.strings, state);
|
||||
ContentHash::content_hash(&self.body, state);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10957,6 +10957,46 @@ Expect to Parse: tasks/coverage/typescript/tests/cases/conformance/salsa/private
|
|||
43 │ /[^\p{Emoji}[[\p{RGI_Emoji}]]][^\p{Emoji}--[[\p{RGI_Emoji}]]][^\p{Emoji}&&[[\p{RGI_Emoji}]]]/v,
|
||||
╰────
|
||||
|
||||
× Invalid regular expression: Invalid character class with strings unicode property
|
||||
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:43:3]
|
||||
42 │ /[^\p{RGI_Emoji}\q{foo}][^\p{RGI_Emoji}--\q{foo}][^\p{RGI_Emoji}&&\q{foo}]/v,
|
||||
43 │ /[^\p{Emoji}[[\p{RGI_Emoji}]]][^\p{Emoji}--[[\p{RGI_Emoji}]]][^\p{Emoji}&&[[\p{RGI_Emoji}]]]/v,
|
||||
· ─────────────────────────────
|
||||
44 │ /[^[[\p{RGI_Emoji}]]\p{Emoji}][^[[\p{RGI_Emoji}]]--\p{Emoji}][^[[\p{RGI_Emoji}]]&&\p{Emoji}]/v,
|
||||
╰────
|
||||
|
||||
× Invalid regular expression: Invalid character class with strings unicode property
|
||||
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:44:3]
|
||||
43 │ /[^\p{Emoji}[[\p{RGI_Emoji}]]][^\p{Emoji}--[[\p{RGI_Emoji}]]][^\p{Emoji}&&[[\p{RGI_Emoji}]]]/v,
|
||||
44 │ /[^[[\p{RGI_Emoji}]]\p{Emoji}][^[[\p{RGI_Emoji}]]--\p{Emoji}][^[[\p{RGI_Emoji}]]&&\p{Emoji}]/v,
|
||||
· ─────────────────────────────
|
||||
45 │ /[^[[\p{RGI_Emoji}]]\q{foo}][^[[\p{RGI_Emoji}]]--\q{foo}][^[[\p{RGI_Emoji}]]&&\q{foo}]/v,
|
||||
╰────
|
||||
|
||||
× Invalid regular expression: Invalid character class with strings unicode property
|
||||
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:45:3]
|
||||
44 │ /[^[[\p{RGI_Emoji}]]\p{Emoji}][^[[\p{RGI_Emoji}]]--\p{Emoji}][^[[\p{RGI_Emoji}]]&&\p{Emoji}]/v,
|
||||
45 │ /[^[[\p{RGI_Emoji}]]\q{foo}][^[[\p{RGI_Emoji}]]--\q{foo}][^[[\p{RGI_Emoji}]]&&\q{foo}]/v,
|
||||
· ───────────────────────────
|
||||
46 │ /[^\q{foo|bar|baz}--\q{foo}--\q{bar}--\q{baz}][^\p{L}--\q{foo}--[\q{bar}]--[^[\q{baz}]]]/v,
|
||||
╰────
|
||||
|
||||
× Invalid regular expression: Invalid character class with strings unicode property
|
||||
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:46:3]
|
||||
45 │ /[^[[\p{RGI_Emoji}]]\q{foo}][^[[\p{RGI_Emoji}]]--\q{foo}][^[[\p{RGI_Emoji}]]&&\q{foo}]/v,
|
||||
46 │ /[^\q{foo|bar|baz}--\q{foo}--\q{bar}--\q{baz}][^\p{L}--\q{foo}--[\q{bar}]--[^[\q{baz}]]]/v,
|
||||
· ─────────────────────────────────────────────
|
||||
47 │ /[^[[\q{foo|bar|baz}]]--\q{foo}--\q{bar}--\q{baz}][^[^[^\p{L}]]--\q{foo}--[\q{bar}]--[^[\q{baz}]]]/v,
|
||||
╰────
|
||||
|
||||
× Invalid regular expression: Invalid character class with strings unicode property
|
||||
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:47:3]
|
||||
46 │ /[^\q{foo|bar|baz}--\q{foo}--\q{bar}--\q{baz}][^\p{L}--\q{foo}--[\q{bar}]--[^[\q{baz}]]]/v,
|
||||
47 │ /[^[[\q{foo|bar|baz}]]--\q{foo}--\q{bar}--\q{baz}][^[^[^\p{L}]]--\q{foo}--[\q{bar}]--[^[\q{baz}]]]/v,
|
||||
· ─────────────────────────────────────────────────
|
||||
48 │ ];
|
||||
╰────
|
||||
|
||||
× The 'u' and 'v' regular expression flags cannot be enabled at the same time
|
||||
╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:3:2]
|
||||
2 │ // Flags
|
||||
|
|
|
|||
Loading…
Reference in a new issue