refactor(regular_expression): Improve AST docs with refactoring may_contain_strings (#5665)

Follow up #5661
This commit is contained in:
leaysgur 2024-09-10 07:32:28 +00:00
parent c6bbf94f4c
commit 2da42efb6f
6 changed files with 46 additions and 44 deletions

View file

@ -1506,8 +1506,8 @@ const _: () = {
assert!(align_of::<CharacterClass>() == 8usize);
assert!(offset_of!(CharacterClass, span) == 0usize);
assert!(offset_of!(CharacterClass, negative) == 8usize);
assert!(offset_of!(CharacterClass, kind) == 9usize);
assert!(offset_of!(CharacterClass, strings) == 10usize);
assert!(offset_of!(CharacterClass, strings) == 9usize);
assert!(offset_of!(CharacterClass, kind) == 10usize);
assert!(offset_of!(CharacterClass, body) == 16usize);
assert!(size_of::<CharacterClassContentsKind>() == 1usize);
@ -3061,8 +3061,8 @@ const _: () = {
assert!(align_of::<CharacterClass>() == 4usize);
assert!(offset_of!(CharacterClass, span) == 0usize);
assert!(offset_of!(CharacterClass, negative) == 8usize);
assert!(offset_of!(CharacterClass, kind) == 9usize);
assert!(offset_of!(CharacterClass, strings) == 10usize);
assert!(offset_of!(CharacterClass, strings) == 9usize);
assert!(offset_of!(CharacterClass, kind) == 10usize);
assert!(offset_of!(CharacterClass, body) == 12usize);
assert!(size_of::<CharacterClassContentsKind>() == 1usize);

View file

@ -213,7 +213,7 @@ pub enum CharacterClassEscapeKind {
pub struct UnicodePropertyEscape<'a> {
pub span: Span,
pub negative: bool,
/// `true` if `UnicodeSetsMode` and `name` matched unicode property of strings.
/// `true` if `UnicodeSetsMode` and `name` matches unicode property of strings.
pub strings: bool,
pub name: Atom<'a>,
pub value: Option<Atom<'a>>,
@ -237,8 +237,11 @@ pub struct Dot {
pub struct CharacterClass<'a> {
pub span: Span,
pub negative: bool,
pub kind: CharacterClassContentsKind,
/// `true` if:
/// - `body` contains [`UnicodePropertyEscape`], nested [`CharacterClass`] or [`ClassStringDisjunction`] which `strings` is `true`
/// - and matches each logic depends on `kind`
pub strings: bool,
pub kind: CharacterClassContentsKind,
pub body: Vec<'a, CharacterClassContents<'a>>,
}
@ -288,7 +291,7 @@ pub struct CharacterClassRange {
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
pub struct ClassStringDisjunction<'a> {
pub span: Span,
/// `true` if body is empty or contain [`ClassString`] which `strings` is `true`
/// `true` if body is empty or contains [`ClassString`] which `strings` is `true`.
pub strings: bool,
pub body: Vec<'a, ClassString<'a>>,
}
@ -313,6 +316,7 @@ pub struct ClassString<'a> {
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
pub struct CapturingGroup<'a> {
pub span: Span,
/// Group name to be referenced by [`NamedReference`].
pub name: Option<Atom<'a>>,
pub body: Disjunction<'a>,
}
@ -330,6 +334,8 @@ pub struct IgnoreGroup<'a> {
pub body: Disjunction<'a>,
}
/// Pattern modifiers in [`IgnoreGroup`].
/// e.g. `(?i:...)`, `(?-s:...)`
#[ast]
#[derive(Debug)]
#[generate_derive(CloneIn, ContentEq, ContentHash)]

View file

@ -727,7 +727,7 @@ impl<'a> PatternParser<'a> {
let (kind, body) = self.parse_class_contents()?;
if self.reader.eat(']') {
let strings = body.iter().any(PatternParser::may_contain_strings_in_class_contents);
let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);
// [SS:EE] CharacterClass :: [^ ClassContents ]
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
@ -1259,30 +1259,7 @@ impl<'a> PatternParser<'a> {
let (kind, body) = self.parse_class_contents()?;
if self.reader.eat(']') {
let strings = match kind {
// MayContainStrings is true
// - if ClassContents is ClassUnion
// - && ClassUnion has ClassOperands
// - && at least 1 ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Union => {
body.iter().any(PatternParser::may_contain_strings_in_class_contents)
}
// MayContainStrings is true
// - if ClassContents is ClassIntersection
// - && ClassIntersection has ClassOperands
// - && all ClassOperands have MayContainStrings: true
ast::CharacterClassContentsKind::Intersection => {
body.iter().all(PatternParser::may_contain_strings_in_class_contents)
}
// MayContainStrings is true
// - if ClassContents is ClassSubtraction
// - && ClassSubtraction has ClassOperands
// - && the first ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Subtraction => body
.iter()
.next()
.map_or(false, PatternParser::may_contain_strings_in_class_contents),
};
let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);
// [SS:EE] NestedClass :: [^ ClassContents ]
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
@ -2163,27 +2140,46 @@ impl<'a> PatternParser<'a> {
// ---
fn may_contain_strings_in_class_contents(item: &ast::CharacterClassContents) -> bool {
match item {
fn may_contain_strings_in_class_contents(
kind: &ast::CharacterClassContentsKind,
body: &Vec<'a, ast::CharacterClassContents<'a>>,
) -> bool {
let may_contain_strings = |item: &ast::CharacterClassContents<'a>| match item {
// MayContainStrings is true
// - if ClassContents contains UnicodePropertyValueExpression
// - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue
// - && it is binary property of strings(can be true only with `UnicodeSetsMode`)
ast::CharacterClassContents::UnicodePropertyEscape(unicode_property_escape) => {
unicode_property_escape.strings
}
ast::CharacterClassContents::UnicodePropertyEscape(item) => item.strings,
// MayContainStrings is true
// - if ClassStringDisjunction is [empty]
// - || if ClassStringDisjunction contains ClassString
// - && ClassString is [empty]
// - || ClassString contains 2 more ClassSetCharacters
ast::CharacterClassContents::ClassStringDisjunction(class_string_disjunction) => {
class_string_disjunction.strings
}
ast::CharacterClassContents::ClassStringDisjunction(item) => item.strings,
// MayContainStrings is true
// - if NestedClass has MayContainStrings: true
ast::CharacterClassContents::NestedCharacterClass(nested_class) => nested_class.strings,
ast::CharacterClassContents::NestedCharacterClass(item) => item.strings,
_ => false,
};
match kind {
// MayContainStrings is true
// - if ClassContents is ClassUnion
// - && ClassUnion has ClassOperands
// - && at least 1 ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Union => body.iter().any(may_contain_strings),
// MayContainStrings is true
// - if ClassContents is ClassIntersection
// - && ClassIntersection has ClassOperands
// - && all ClassOperands have MayContainStrings: true
ast::CharacterClassContentsKind::Intersection => body.iter().all(may_contain_strings),
// MayContainStrings is true
// - if ClassContents is ClassSubtraction
// - && ClassSubtraction has ClassOperands
// - && the first ClassOperand has MayContainStrings: true
ast::CharacterClassContentsKind::Subtraction => {
body.iter().next().map_or(false, may_contain_strings)
}
}
}
}

View file

@ -229,8 +229,8 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for CharacterClass<'old_alloc>
CharacterClass {
span: CloneIn::clone_in(&self.span, allocator),
negative: CloneIn::clone_in(&self.negative, allocator),
kind: CloneIn::clone_in(&self.kind, allocator),
strings: CloneIn::clone_in(&self.strings, allocator),
kind: CloneIn::clone_in(&self.kind, allocator),
body: CloneIn::clone_in(&self.body, allocator),
}
}

View file

@ -178,8 +178,8 @@ impl ContentEq for Dot {
impl<'a> ContentEq for CharacterClass<'a> {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.negative, &other.negative)
&& ContentEq::content_eq(&self.kind, &other.kind)
&& ContentEq::content_eq(&self.strings, &other.strings)
&& ContentEq::content_eq(&self.kind, &other.kind)
&& ContentEq::content_eq(&self.body, &other.body)
}
}

View file

@ -143,8 +143,8 @@ impl ContentHash for Dot {
impl<'a> ContentHash for CharacterClass<'a> {
fn content_hash<H: Hasher>(&self, state: &mut H) {
ContentHash::content_hash(&self.negative, state);
ContentHash::content_hash(&self.kind, state);
ContentHash::content_hash(&self.strings, state);
ContentHash::content_hash(&self.kind, state);
ContentHash::content_hash(&self.body, state);
}
}