mirror of
https://github.com/danbulant/oxc
synced 2026-05-22 21:58:36 +00:00
refactor(regular_expression): Improve AST docs with refactoring may_contain_strings (#5665)
Follow up #5661
This commit is contained in:
parent
c6bbf94f4c
commit
2da42efb6f
6 changed files with 46 additions and 44 deletions
|
|
@ -1506,8 +1506,8 @@ const _: () = {
|
||||||
assert!(align_of::<CharacterClass>() == 8usize);
|
assert!(align_of::<CharacterClass>() == 8usize);
|
||||||
assert!(offset_of!(CharacterClass, span) == 0usize);
|
assert!(offset_of!(CharacterClass, span) == 0usize);
|
||||||
assert!(offset_of!(CharacterClass, negative) == 8usize);
|
assert!(offset_of!(CharacterClass, negative) == 8usize);
|
||||||
assert!(offset_of!(CharacterClass, kind) == 9usize);
|
assert!(offset_of!(CharacterClass, strings) == 9usize);
|
||||||
assert!(offset_of!(CharacterClass, strings) == 10usize);
|
assert!(offset_of!(CharacterClass, kind) == 10usize);
|
||||||
assert!(offset_of!(CharacterClass, body) == 16usize);
|
assert!(offset_of!(CharacterClass, body) == 16usize);
|
||||||
|
|
||||||
assert!(size_of::<CharacterClassContentsKind>() == 1usize);
|
assert!(size_of::<CharacterClassContentsKind>() == 1usize);
|
||||||
|
|
@ -3061,8 +3061,8 @@ const _: () = {
|
||||||
assert!(align_of::<CharacterClass>() == 4usize);
|
assert!(align_of::<CharacterClass>() == 4usize);
|
||||||
assert!(offset_of!(CharacterClass, span) == 0usize);
|
assert!(offset_of!(CharacterClass, span) == 0usize);
|
||||||
assert!(offset_of!(CharacterClass, negative) == 8usize);
|
assert!(offset_of!(CharacterClass, negative) == 8usize);
|
||||||
assert!(offset_of!(CharacterClass, kind) == 9usize);
|
assert!(offset_of!(CharacterClass, strings) == 9usize);
|
||||||
assert!(offset_of!(CharacterClass, strings) == 10usize);
|
assert!(offset_of!(CharacterClass, kind) == 10usize);
|
||||||
assert!(offset_of!(CharacterClass, body) == 12usize);
|
assert!(offset_of!(CharacterClass, body) == 12usize);
|
||||||
|
|
||||||
assert!(size_of::<CharacterClassContentsKind>() == 1usize);
|
assert!(size_of::<CharacterClassContentsKind>() == 1usize);
|
||||||
|
|
|
||||||
|
|
@ -213,7 +213,7 @@ pub enum CharacterClassEscapeKind {
|
||||||
pub struct UnicodePropertyEscape<'a> {
|
pub struct UnicodePropertyEscape<'a> {
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
pub negative: bool,
|
pub negative: bool,
|
||||||
/// `true` if `UnicodeSetsMode` and `name` matched unicode property of strings.
|
/// `true` if `UnicodeSetsMode` and `name` matches unicode property of strings.
|
||||||
pub strings: bool,
|
pub strings: bool,
|
||||||
pub name: Atom<'a>,
|
pub name: Atom<'a>,
|
||||||
pub value: Option<Atom<'a>>,
|
pub value: Option<Atom<'a>>,
|
||||||
|
|
@ -237,8 +237,11 @@ pub struct Dot {
|
||||||
pub struct CharacterClass<'a> {
|
pub struct CharacterClass<'a> {
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
pub negative: bool,
|
pub negative: bool,
|
||||||
pub kind: CharacterClassContentsKind,
|
/// `true` if:
|
||||||
|
/// - `body` contains [`UnicodePropertyEscape`], nested [`CharacterClass`] or [`ClassStringDisjunction`] which `strings` is `true`
|
||||||
|
/// - and matches each logic depends on `kind`
|
||||||
pub strings: bool,
|
pub strings: bool,
|
||||||
|
pub kind: CharacterClassContentsKind,
|
||||||
pub body: Vec<'a, CharacterClassContents<'a>>,
|
pub body: Vec<'a, CharacterClassContents<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -288,7 +291,7 @@ pub struct CharacterClassRange {
|
||||||
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
|
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
|
||||||
pub struct ClassStringDisjunction<'a> {
|
pub struct ClassStringDisjunction<'a> {
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
/// `true` if body is empty or contain [`ClassString`] which `strings` is `true`
|
/// `true` if body is empty or contains [`ClassString`] which `strings` is `true`.
|
||||||
pub strings: bool,
|
pub strings: bool,
|
||||||
pub body: Vec<'a, ClassString<'a>>,
|
pub body: Vec<'a, ClassString<'a>>,
|
||||||
}
|
}
|
||||||
|
|
@ -313,6 +316,7 @@ pub struct ClassString<'a> {
|
||||||
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
|
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
|
||||||
pub struct CapturingGroup<'a> {
|
pub struct CapturingGroup<'a> {
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
|
/// Group name to be referenced by [`NamedReference`].
|
||||||
pub name: Option<Atom<'a>>,
|
pub name: Option<Atom<'a>>,
|
||||||
pub body: Disjunction<'a>,
|
pub body: Disjunction<'a>,
|
||||||
}
|
}
|
||||||
|
|
@ -330,6 +334,8 @@ pub struct IgnoreGroup<'a> {
|
||||||
pub body: Disjunction<'a>,
|
pub body: Disjunction<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Pattern modifiers in [`IgnoreGroup`].
|
||||||
|
/// e.g. `(?i:...)`, `(?-s:...)`
|
||||||
#[ast]
|
#[ast]
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
#[generate_derive(CloneIn, ContentEq, ContentHash)]
|
#[generate_derive(CloneIn, ContentEq, ContentHash)]
|
||||||
|
|
|
||||||
|
|
@ -727,7 +727,7 @@ impl<'a> PatternParser<'a> {
|
||||||
let (kind, body) = self.parse_class_contents()?;
|
let (kind, body) = self.parse_class_contents()?;
|
||||||
|
|
||||||
if self.reader.eat(']') {
|
if self.reader.eat(']') {
|
||||||
let strings = body.iter().any(PatternParser::may_contain_strings_in_class_contents);
|
let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);
|
||||||
|
|
||||||
// [SS:EE] CharacterClass :: [^ ClassContents ]
|
// [SS:EE] CharacterClass :: [^ ClassContents ]
|
||||||
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
|
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
|
||||||
|
|
@ -1259,30 +1259,7 @@ impl<'a> PatternParser<'a> {
|
||||||
let (kind, body) = self.parse_class_contents()?;
|
let (kind, body) = self.parse_class_contents()?;
|
||||||
|
|
||||||
if self.reader.eat(']') {
|
if self.reader.eat(']') {
|
||||||
let strings = match kind {
|
let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);
|
||||||
// MayContainStrings is true
|
|
||||||
// - if ClassContents is ClassUnion
|
|
||||||
// - && ClassUnion has ClassOperands
|
|
||||||
// - && at least 1 ClassOperand has MayContainStrings: true
|
|
||||||
ast::CharacterClassContentsKind::Union => {
|
|
||||||
body.iter().any(PatternParser::may_contain_strings_in_class_contents)
|
|
||||||
}
|
|
||||||
// MayContainStrings is true
|
|
||||||
// - if ClassContents is ClassIntersection
|
|
||||||
// - && ClassIntersection has ClassOperands
|
|
||||||
// - && all ClassOperands have MayContainStrings: true
|
|
||||||
ast::CharacterClassContentsKind::Intersection => {
|
|
||||||
body.iter().all(PatternParser::may_contain_strings_in_class_contents)
|
|
||||||
}
|
|
||||||
// MayContainStrings is true
|
|
||||||
// - if ClassContents is ClassSubtraction
|
|
||||||
// - && ClassSubtraction has ClassOperands
|
|
||||||
// - && the first ClassOperand has MayContainStrings: true
|
|
||||||
ast::CharacterClassContentsKind::Subtraction => body
|
|
||||||
.iter()
|
|
||||||
.next()
|
|
||||||
.map_or(false, PatternParser::may_contain_strings_in_class_contents),
|
|
||||||
};
|
|
||||||
|
|
||||||
// [SS:EE] NestedClass :: [^ ClassContents ]
|
// [SS:EE] NestedClass :: [^ ClassContents ]
|
||||||
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
|
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
|
||||||
|
|
@ -2163,27 +2140,46 @@ impl<'a> PatternParser<'a> {
|
||||||
|
|
||||||
// ---
|
// ---
|
||||||
|
|
||||||
fn may_contain_strings_in_class_contents(item: &ast::CharacterClassContents) -> bool {
|
fn may_contain_strings_in_class_contents(
|
||||||
match item {
|
kind: &ast::CharacterClassContentsKind,
|
||||||
|
body: &Vec<'a, ast::CharacterClassContents<'a>>,
|
||||||
|
) -> bool {
|
||||||
|
let may_contain_strings = |item: &ast::CharacterClassContents<'a>| match item {
|
||||||
// MayContainStrings is true
|
// MayContainStrings is true
|
||||||
// - if ClassContents contains UnicodePropertyValueExpression
|
// - if ClassContents contains UnicodePropertyValueExpression
|
||||||
// - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
|
// - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
|
||||||
// - && it is binary property of strings(can be true only with `UnicodeSetsMode`)
|
// - && it is binary property of strings(can be true only with `UnicodeSetsMode`)
|
||||||
ast::CharacterClassContents::UnicodePropertyEscape(unicode_property_escape) => {
|
ast::CharacterClassContents::UnicodePropertyEscape(item) => item.strings,
|
||||||
unicode_property_escape.strings
|
|
||||||
}
|
|
||||||
// MayContainStrings is true
|
// MayContainStrings is true
|
||||||
// - if ClassStringDisjunction is [empty]
|
// - if ClassStringDisjunction is [empty]
|
||||||
// - || if ClassStringDisjunction contains ClassString
|
// - || if ClassStringDisjunction contains ClassString
|
||||||
// - && ClassString is [empty]
|
// - && ClassString is [empty]
|
||||||
// - || ClassString contains 2 more ClassSetCharacters
|
// - || ClassString contains 2 more ClassSetCharacters
|
||||||
ast::CharacterClassContents::ClassStringDisjunction(class_string_disjunction) => {
|
ast::CharacterClassContents::ClassStringDisjunction(item) => item.strings,
|
||||||
class_string_disjunction.strings
|
|
||||||
}
|
|
||||||
// MayContainStrings is true
|
// MayContainStrings is true
|
||||||
// - if NestedClass has MayContainStrings: true
|
// - if NestedClass has MayContainStrings: true
|
||||||
ast::CharacterClassContents::NestedCharacterClass(nested_class) => nested_class.strings,
|
ast::CharacterClassContents::NestedCharacterClass(item) => item.strings,
|
||||||
_ => false,
|
_ => false,
|
||||||
|
};
|
||||||
|
|
||||||
|
match kind {
|
||||||
|
// MayContainStrings is true
|
||||||
|
// - if ClassContents is ClassUnion
|
||||||
|
// - && ClassUnion has ClassOperands
|
||||||
|
// - && at least 1 ClassOperand has MayContainStrings: true
|
||||||
|
ast::CharacterClassContentsKind::Union => body.iter().any(may_contain_strings),
|
||||||
|
// MayContainStrings is true
|
||||||
|
// - if ClassContents is ClassIntersection
|
||||||
|
// - && ClassIntersection has ClassOperands
|
||||||
|
// - && all ClassOperands have MayContainStrings: true
|
||||||
|
ast::CharacterClassContentsKind::Intersection => body.iter().all(may_contain_strings),
|
||||||
|
// MayContainStrings is true
|
||||||
|
// - if ClassContents is ClassSubtraction
|
||||||
|
// - && ClassSubtraction has ClassOperands
|
||||||
|
// - && the first ClassOperand has MayContainStrings: true
|
||||||
|
ast::CharacterClassContentsKind::Subtraction => {
|
||||||
|
body.iter().next().map_or(false, may_contain_strings)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -229,8 +229,8 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for CharacterClass<'old_alloc>
|
||||||
CharacterClass {
|
CharacterClass {
|
||||||
span: CloneIn::clone_in(&self.span, allocator),
|
span: CloneIn::clone_in(&self.span, allocator),
|
||||||
negative: CloneIn::clone_in(&self.negative, allocator),
|
negative: CloneIn::clone_in(&self.negative, allocator),
|
||||||
kind: CloneIn::clone_in(&self.kind, allocator),
|
|
||||||
strings: CloneIn::clone_in(&self.strings, allocator),
|
strings: CloneIn::clone_in(&self.strings, allocator),
|
||||||
|
kind: CloneIn::clone_in(&self.kind, allocator),
|
||||||
body: CloneIn::clone_in(&self.body, allocator),
|
body: CloneIn::clone_in(&self.body, allocator),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -178,8 +178,8 @@ impl ContentEq for Dot {
|
||||||
impl<'a> ContentEq for CharacterClass<'a> {
|
impl<'a> ContentEq for CharacterClass<'a> {
|
||||||
fn content_eq(&self, other: &Self) -> bool {
|
fn content_eq(&self, other: &Self) -> bool {
|
||||||
ContentEq::content_eq(&self.negative, &other.negative)
|
ContentEq::content_eq(&self.negative, &other.negative)
|
||||||
&& ContentEq::content_eq(&self.kind, &other.kind)
|
|
||||||
&& ContentEq::content_eq(&self.strings, &other.strings)
|
&& ContentEq::content_eq(&self.strings, &other.strings)
|
||||||
|
&& ContentEq::content_eq(&self.kind, &other.kind)
|
||||||
&& ContentEq::content_eq(&self.body, &other.body)
|
&& ContentEq::content_eq(&self.body, &other.body)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -143,8 +143,8 @@ impl ContentHash for Dot {
|
||||||
impl<'a> ContentHash for CharacterClass<'a> {
|
impl<'a> ContentHash for CharacterClass<'a> {
|
||||||
fn content_hash<H: Hasher>(&self, state: &mut H) {
|
fn content_hash<H: Hasher>(&self, state: &mut H) {
|
||||||
ContentHash::content_hash(&self.negative, state);
|
ContentHash::content_hash(&self.negative, state);
|
||||||
ContentHash::content_hash(&self.kind, state);
|
|
||||||
ContentHash::content_hash(&self.strings, state);
|
ContentHash::content_hash(&self.strings, state);
|
||||||
|
ContentHash::content_hash(&self.kind, state);
|
||||||
ContentHash::content_hash(&self.body, state);
|
ContentHash::content_hash(&self.body, state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue