refactor(regular_expression): Improve AST docs with refactoring may_contain_strings (#5665)

Follow up #5661
This commit is contained in:
leaysgur 2024-09-10 07:32:28 +00:00
parent c6bbf94f4c
commit 2da42efb6f
6 changed files with 46 additions and 44 deletions

View file

@ -1506,8 +1506,8 @@ const _: () = {
assert!(align_of::<CharacterClass>() == 8usize); assert!(align_of::<CharacterClass>() == 8usize);
assert!(offset_of!(CharacterClass, span) == 0usize); assert!(offset_of!(CharacterClass, span) == 0usize);
assert!(offset_of!(CharacterClass, negative) == 8usize); assert!(offset_of!(CharacterClass, negative) == 8usize);
assert!(offset_of!(CharacterClass, kind) == 9usize); assert!(offset_of!(CharacterClass, strings) == 9usize);
assert!(offset_of!(CharacterClass, strings) == 10usize); assert!(offset_of!(CharacterClass, kind) == 10usize);
assert!(offset_of!(CharacterClass, body) == 16usize); assert!(offset_of!(CharacterClass, body) == 16usize);
assert!(size_of::<CharacterClassContentsKind>() == 1usize); assert!(size_of::<CharacterClassContentsKind>() == 1usize);
@ -3061,8 +3061,8 @@ const _: () = {
assert!(align_of::<CharacterClass>() == 4usize); assert!(align_of::<CharacterClass>() == 4usize);
assert!(offset_of!(CharacterClass, span) == 0usize); assert!(offset_of!(CharacterClass, span) == 0usize);
assert!(offset_of!(CharacterClass, negative) == 8usize); assert!(offset_of!(CharacterClass, negative) == 8usize);
assert!(offset_of!(CharacterClass, kind) == 9usize); assert!(offset_of!(CharacterClass, strings) == 9usize);
assert!(offset_of!(CharacterClass, strings) == 10usize); assert!(offset_of!(CharacterClass, kind) == 10usize);
assert!(offset_of!(CharacterClass, body) == 12usize); assert!(offset_of!(CharacterClass, body) == 12usize);
assert!(size_of::<CharacterClassContentsKind>() == 1usize); assert!(size_of::<CharacterClassContentsKind>() == 1usize);

View file

@ -213,7 +213,7 @@ pub enum CharacterClassEscapeKind {
pub struct UnicodePropertyEscape<'a> { pub struct UnicodePropertyEscape<'a> {
pub span: Span, pub span: Span,
pub negative: bool, pub negative: bool,
/// `true` if `UnicodeSetsMode` and `name` matched unicode property of strings. /// `true` if `UnicodeSetsMode` and `name` matches unicode property of strings.
pub strings: bool, pub strings: bool,
pub name: Atom<'a>, pub name: Atom<'a>,
pub value: Option<Atom<'a>>, pub value: Option<Atom<'a>>,
@ -237,8 +237,11 @@ pub struct Dot {
pub struct CharacterClass<'a> { pub struct CharacterClass<'a> {
pub span: Span, pub span: Span,
pub negative: bool, pub negative: bool,
pub kind: CharacterClassContentsKind, /// `true` if:
/// - `body` contains [`UnicodePropertyEscape`], nested [`CharacterClass`] or [`ClassStringDisjunction`] which `strings` is `true`
/// - and matches each logic depends on `kind`
pub strings: bool, pub strings: bool,
pub kind: CharacterClassContentsKind,
pub body: Vec<'a, CharacterClassContents<'a>>, pub body: Vec<'a, CharacterClassContents<'a>>,
} }
@ -288,7 +291,7 @@ pub struct CharacterClassRange {
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))] #[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
pub struct ClassStringDisjunction<'a> { pub struct ClassStringDisjunction<'a> {
pub span: Span, pub span: Span,
/// `true` if body is empty or contain [`ClassString`] which `strings` is `true` /// `true` if body is empty or contains [`ClassString`] which `strings` is `true`.
pub strings: bool, pub strings: bool,
pub body: Vec<'a, ClassString<'a>>, pub body: Vec<'a, ClassString<'a>>,
} }
@ -313,6 +316,7 @@ pub struct ClassString<'a> {
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))] #[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
pub struct CapturingGroup<'a> { pub struct CapturingGroup<'a> {
pub span: Span, pub span: Span,
/// Group name to be referenced by [`NamedReference`].
pub name: Option<Atom<'a>>, pub name: Option<Atom<'a>>,
pub body: Disjunction<'a>, pub body: Disjunction<'a>,
} }
@ -330,6 +334,8 @@ pub struct IgnoreGroup<'a> {
pub body: Disjunction<'a>, pub body: Disjunction<'a>,
} }
/// Pattern modifiers in [`IgnoreGroup`].
/// e.g. `(?i:...)`, `(?-s:...)`
#[ast] #[ast]
#[derive(Debug)] #[derive(Debug)]
#[generate_derive(CloneIn, ContentEq, ContentHash)] #[generate_derive(CloneIn, ContentEq, ContentHash)]

View file

@ -727,7 +727,7 @@ impl<'a> PatternParser<'a> {
let (kind, body) = self.parse_class_contents()?; let (kind, body) = self.parse_class_contents()?;
if self.reader.eat(']') { if self.reader.eat(']') {
let strings = body.iter().any(PatternParser::may_contain_strings_in_class_contents); let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);
// [SS:EE] CharacterClass :: [^ ClassContents ] // [SS:EE] CharacterClass :: [^ ClassContents ]
// It is a Syntax Error if MayContainStrings of the ClassContents is true. // It is a Syntax Error if MayContainStrings of the ClassContents is true.
@ -1259,30 +1259,7 @@ impl<'a> PatternParser<'a> {
let (kind, body) = self.parse_class_contents()?; let (kind, body) = self.parse_class_contents()?;
if self.reader.eat(']') { if self.reader.eat(']') {
let strings = match kind { let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);
// MayContainStrings is true
// - if ClassContents is ClassUnion
// - && ClassUnion has ClassOperands
// - && at least 1 ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Union => {
body.iter().any(PatternParser::may_contain_strings_in_class_contents)
}
// MayContainStrings is true
// - if ClassContents is ClassIntersection
// - && ClassIntersection has ClassOperands
// - && all ClassOperands have MayContainStrings: true
ast::CharacterClassContentsKind::Intersection => {
body.iter().all(PatternParser::may_contain_strings_in_class_contents)
}
// MayContainStrings is true
// - if ClassContents is ClassSubtraction
// - && ClassSubtraction has ClassOperands
// - && the first ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Subtraction => body
.iter()
.next()
.map_or(false, PatternParser::may_contain_strings_in_class_contents),
};
// [SS:EE] NestedClass :: [^ ClassContents ] // [SS:EE] NestedClass :: [^ ClassContents ]
// It is a Syntax Error if MayContainStrings of the ClassContents is true. // It is a Syntax Error if MayContainStrings of the ClassContents is true.
@ -2163,27 +2140,46 @@ impl<'a> PatternParser<'a> {
// --- // ---
fn may_contain_strings_in_class_contents(item: &ast::CharacterClassContents) -> bool { fn may_contain_strings_in_class_contents(
match item { kind: &ast::CharacterClassContentsKind,
body: &Vec<'a, ast::CharacterClassContents<'a>>,
) -> bool {
let may_contain_strings = |item: &ast::CharacterClassContents<'a>| match item {
// MayContainStrings is true // MayContainStrings is true
// - if ClassContents contains UnicodePropertyValueExpression // - if ClassContents contains UnicodePropertyValueExpression
// - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue // - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
// - && it is binary property of strings(can be true only with `UnicodeSetsMode`) // - && it is binary property of strings(can be true only with `UnicodeSetsMode`)
ast::CharacterClassContents::UnicodePropertyEscape(unicode_property_escape) => { ast::CharacterClassContents::UnicodePropertyEscape(item) => item.strings,
unicode_property_escape.strings
}
// MayContainStrings is true // MayContainStrings is true
// - if ClassStringDisjunction is [empty] // - if ClassStringDisjunction is [empty]
// - || if ClassStringDisjunction contains ClassString // - || if ClassStringDisjunction contains ClassString
// - && ClassString is [empty] // - && ClassString is [empty]
// - || ClassString contains 2 more ClassSetCharacters // - || ClassString contains 2 more ClassSetCharacters
ast::CharacterClassContents::ClassStringDisjunction(class_string_disjunction) => { ast::CharacterClassContents::ClassStringDisjunction(item) => item.strings,
class_string_disjunction.strings
}
// MayContainStrings is true // MayContainStrings is true
// - if NestedClass has MayContainStrings: true // - if NestedClass has MayContainStrings: true
ast::CharacterClassContents::NestedCharacterClass(nested_class) => nested_class.strings, ast::CharacterClassContents::NestedCharacterClass(item) => item.strings,
_ => false, _ => false,
};
match kind {
// MayContainStrings is true
// - if ClassContents is ClassUnion
// - && ClassUnion has ClassOperands
// - && at least 1 ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Union => body.iter().any(may_contain_strings),
// MayContainStrings is true
// - if ClassContents is ClassIntersection
// - && ClassIntersection has ClassOperands
// - && all ClassOperands have MayContainStrings: true
ast::CharacterClassContentsKind::Intersection => body.iter().all(may_contain_strings),
// MayContainStrings is true
// - if ClassContents is ClassSubtraction
// - && ClassSubtraction has ClassOperands
// - && the first ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Subtraction => {
body.iter().next().map_or(false, may_contain_strings)
}
} }
} }
} }

View file

@ -229,8 +229,8 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for CharacterClass<'old_alloc>
CharacterClass { CharacterClass {
span: CloneIn::clone_in(&self.span, allocator), span: CloneIn::clone_in(&self.span, allocator),
negative: CloneIn::clone_in(&self.negative, allocator), negative: CloneIn::clone_in(&self.negative, allocator),
kind: CloneIn::clone_in(&self.kind, allocator),
strings: CloneIn::clone_in(&self.strings, allocator), strings: CloneIn::clone_in(&self.strings, allocator),
kind: CloneIn::clone_in(&self.kind, allocator),
body: CloneIn::clone_in(&self.body, allocator), body: CloneIn::clone_in(&self.body, allocator),
} }
} }

View file

@ -178,8 +178,8 @@ impl ContentEq for Dot {
impl<'a> ContentEq for CharacterClass<'a> { impl<'a> ContentEq for CharacterClass<'a> {
fn content_eq(&self, other: &Self) -> bool { fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.negative, &other.negative) ContentEq::content_eq(&self.negative, &other.negative)
&& ContentEq::content_eq(&self.kind, &other.kind)
&& ContentEq::content_eq(&self.strings, &other.strings) && ContentEq::content_eq(&self.strings, &other.strings)
&& ContentEq::content_eq(&self.kind, &other.kind)
&& ContentEq::content_eq(&self.body, &other.body) && ContentEq::content_eq(&self.body, &other.body)
} }
} }

View file

@ -143,8 +143,8 @@ impl ContentHash for Dot {
impl<'a> ContentHash for CharacterClass<'a> { impl<'a> ContentHash for CharacterClass<'a> {
fn content_hash<H: Hasher>(&self, state: &mut H) { fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&self.negative, state); ContentHash::content_hash(&self.negative, state);
ContentHash::content_hash(&self.kind, state);
ContentHash::content_hash(&self.strings, state); ContentHash::content_hash(&self.strings, state);
ContentHash::content_hash(&self.kind, state);
ContentHash::content_hash(&self.body, state); ContentHash::content_hash(&self.body, state);
} }
} }