mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 04:08:41 +00:00
feat(regular_expression): implement visitor pattern trait for regex AST (#6055)
- resolves https://github.com/oxc-project/oxc/issues/5977 - supersedes https://github.com/oxc-project/oxc/pull/5951 To facilitate easier traversal of the Regex AST, this PR defines a `Visit` trait with default implementations that will walk the entirety of the Regex AST. Methods in the `Visit` trait can be overridden with custom implementations to do things like analyzing only certain nodes in a regular expression, which will be useful for regex-related `oxc_linter` rules. In the future, we should consider automatically generating this code as it is very repetitive, but for now a handwritten visitor is sufficient.
This commit is contained in:
parent
3da3845f24
commit
77647931e4
3 changed files with 418 additions and 0 deletions
28
crates/oxc_regular_expression/examples/visitor.rs
Normal file
28
crates/oxc_regular_expression/examples/visitor.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#![allow(clippy::print_stdout)]
|
||||
|
||||
use oxc_allocator::Allocator;
|
||||
use oxc_regular_expression::{
|
||||
visit::{RegExpAstKind, Visit},
|
||||
Parser, ParserOptions,
|
||||
};
|
||||
|
||||
struct TestVisitor;
|
||||
|
||||
impl Visit<'_> for TestVisitor {
|
||||
fn enter_node(&mut self, kind: RegExpAstKind) {
|
||||
println!("enter_node: {kind:?}");
|
||||
}
|
||||
|
||||
fn leave_node(&mut self, kind: RegExpAstKind) {
|
||||
println!("leave_node: {kind:?}");
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let source_text = r"/(https?:\/\/github\.com\/(([^\s]+)\/([^\s]+))\/([^\s]+\/)?(issues|pull)\/([0-9]+))|(([^\s]+)\/([^\s]+))?#([1-9][0-9]*)($|[\s\:\;\-\(\=])/";
|
||||
let allocator = Allocator::default();
|
||||
let parser = Parser::new(&allocator, source_text, ParserOptions::default());
|
||||
let pattern = parser.parse().unwrap().pattern;
|
||||
let mut visitor = TestVisitor;
|
||||
visitor.visit_pattern(&pattern);
|
||||
}
|
||||
|
|
@ -9,6 +9,7 @@ mod literal_parser;
|
|||
mod options;
|
||||
mod span;
|
||||
mod surrogate_pair;
|
||||
pub mod visit;
|
||||
|
||||
mod generated {
|
||||
mod derive_clone_in;
|
||||
|
|
|
|||
389
crates/oxc_regular_expression/src/visit.rs
Normal file
389
crates/oxc_regular_expression/src/visit.rs
Normal file
|
|
@ -0,0 +1,389 @@
|
|||
#![allow(unused_variables, clippy::wildcard_imports)]
|
||||
use walk::walk_pattern;
|
||||
|
||||
use crate::ast::{
|
||||
Alternative, BoundaryAssertion, CapturingGroup, Character, CharacterClass,
|
||||
CharacterClassContents, CharacterClassEscape, CharacterClassRange, ClassString,
|
||||
ClassStringDisjunction, Disjunction, Dot, IgnoreGroup, IndexedReference, LookAroundAssertion,
|
||||
NamedReference, Pattern, Quantifier, Term, UnicodePropertyEscape,
|
||||
};
|
||||
use walk::*;
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum RegExpAstKind<'a> {
|
||||
Pattern(&'a Pattern<'a>),
|
||||
Disjunction(&'a Disjunction<'a>),
|
||||
Alternative(&'a Alternative<'a>),
|
||||
Term(&'a Term<'a>),
|
||||
LookAroundAssertion(&'a LookAroundAssertion<'a>),
|
||||
Quantifier(&'a Quantifier<'a>),
|
||||
CapturingGroup(&'a CapturingGroup<'a>),
|
||||
IgnoreGroup(&'a IgnoreGroup<'a>),
|
||||
BoundaryAssertion(&'a BoundaryAssertion),
|
||||
Character(&'a Character),
|
||||
Dot(&'a Dot),
|
||||
CharacterClassEscape(&'a CharacterClassEscape),
|
||||
UnicodePropertyEscape(&'a UnicodePropertyEscape<'a>),
|
||||
CharacterClass(&'a CharacterClass<'a>),
|
||||
CharacterClassContents(&'a CharacterClassContents<'a>),
|
||||
CharacterClassRange(&'a CharacterClassRange),
|
||||
CharacterClassStringDisjunction(&'a ClassStringDisjunction<'a>),
|
||||
CharacterClassString(&'a ClassString<'a>),
|
||||
IndexedReference(&'a IndexedReference),
|
||||
NamedReference(&'a NamedReference<'a>),
|
||||
}
|
||||
|
||||
/// RegEx syntax tree traversal
|
||||
pub trait Visit<'a>: Sized {
|
||||
#[inline]
|
||||
fn enter_node(&mut self, kind: RegExpAstKind<'a>) {}
|
||||
#[inline]
|
||||
fn leave_node(&mut self, kind: RegExpAstKind<'a>) {}
|
||||
|
||||
#[inline]
|
||||
fn alloc<T>(&self, t: &T) -> &'a T {
|
||||
// SAFETY:
|
||||
// This should be safe as long as `src` is an reference from the allocator.
|
||||
// But honestly, I'm not really sure if this is safe.
|
||||
unsafe { std::mem::transmute(t) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_pattern(&mut self, it: &Pattern<'a>) {
|
||||
walk_pattern(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_disjunction(&mut self, it: &Disjunction<'a>) {
|
||||
walk_disjunction(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_alternative(&mut self, it: &Alternative<'a>) {
|
||||
walk_alternative(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_term(&mut self, it: &Term<'a>) {
|
||||
walk_term(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_lookaround_assertion(&mut self, it: &LookAroundAssertion<'a>) {
|
||||
walk_lookaround_assertion(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_quantifier(&mut self, it: &Quantifier<'a>) {
|
||||
walk_quantifier(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_capturing_group(&mut self, it: &CapturingGroup<'a>) {
|
||||
walk_capturing_group(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_ignore_group(&mut self, it: &IgnoreGroup<'a>) {
|
||||
walk_ignore_group(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_boundary_assertion(&mut self, it: &BoundaryAssertion) {
|
||||
walk_boundary_assertion(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_character(&mut self, it: &Character) {
|
||||
walk_character(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_dot(&mut self, it: &Dot) {
|
||||
walk_dot(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_character_class_escape(&mut self, it: &CharacterClassEscape) {
|
||||
walk_character_class_escape(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_unicode_property_escape(&mut self, it: &UnicodePropertyEscape<'a>) {
|
||||
walk_unicode_property_escape(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_character_class(&mut self, it: &CharacterClass<'a>) {
|
||||
walk_character_class(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_character_class_contents(&mut self, it: &CharacterClassContents<'a>) {
|
||||
walk_character_class_contents(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_character_class_range(&mut self, it: &CharacterClassRange) {
|
||||
walk_character_class_range(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_character_class_string_disjunction(&mut self, it: &ClassStringDisjunction<'a>) {
|
||||
walk_character_class_string_disjunction(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_character_class_string(&mut self, it: &ClassString<'a>) {
|
||||
walk_character_class_string(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_indexed_reference(&mut self, it: &IndexedReference) {
|
||||
walk_indexed_reference(self, it);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_named_reference(&mut self, it: &NamedReference<'a>) {
|
||||
walk_named_reference(self, it);
|
||||
}
|
||||
}
|
||||
|
||||
pub mod walk {
|
||||
use super::*;
|
||||
|
||||
#[inline]
|
||||
pub fn walk_pattern<'a, V: Visit<'a>>(visitor: &mut V, it: &Pattern<'a>) {
|
||||
let kind = RegExpAstKind::Pattern(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.visit_disjunction(&it.body);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_disjunction<'a, V: Visit<'a>>(visitor: &mut V, it: &Disjunction<'a>) {
|
||||
let kind = RegExpAstKind::Disjunction(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
for alt in &it.body {
|
||||
visitor.visit_alternative(alt);
|
||||
}
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_alternative<'a, V: Visit<'a>>(visitor: &mut V, it: &Alternative<'a>) {
|
||||
let kind = RegExpAstKind::Alternative(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
for term in &it.body {
|
||||
visitor.visit_term(term);
|
||||
}
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_term<'a, V: Visit<'a>>(visitor: &mut V, it: &Term<'a>) {
|
||||
let kind = RegExpAstKind::Term(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
match it {
|
||||
Term::LookAroundAssertion(lookaround) => {
|
||||
visitor.visit_lookaround_assertion(lookaround);
|
||||
}
|
||||
Term::Quantifier(quant) => {
|
||||
visitor.visit_quantifier(quant);
|
||||
}
|
||||
Term::CapturingGroup(group) => {
|
||||
visitor.visit_capturing_group(group);
|
||||
}
|
||||
Term::IgnoreGroup(group) => {
|
||||
visitor.visit_ignore_group(group);
|
||||
}
|
||||
Term::BoundaryAssertion(boundary_assertion) => {
|
||||
visitor.visit_boundary_assertion(boundary_assertion);
|
||||
}
|
||||
Term::Character(character) => {
|
||||
visitor.visit_character(character);
|
||||
}
|
||||
Term::Dot(dot) => {
|
||||
visitor.visit_dot(dot);
|
||||
}
|
||||
Term::CharacterClassEscape(character_class_escape) => {
|
||||
visitor.visit_character_class_escape(character_class_escape);
|
||||
}
|
||||
Term::UnicodePropertyEscape(unicode_property_escape) => {
|
||||
visitor.visit_unicode_property_escape(unicode_property_escape);
|
||||
}
|
||||
Term::CharacterClass(character_class) => {
|
||||
visitor.visit_character_class(character_class);
|
||||
}
|
||||
Term::IndexedReference(indexed_reference) => {
|
||||
visitor.visit_indexed_reference(indexed_reference);
|
||||
}
|
||||
Term::NamedReference(named_reference) => {
|
||||
visitor.visit_named_reference(named_reference);
|
||||
}
|
||||
}
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_lookaround_assertion<'a, V: Visit<'a>>(
|
||||
visitor: &mut V,
|
||||
it: &LookAroundAssertion<'a>,
|
||||
) {
|
||||
let kind = RegExpAstKind::LookAroundAssertion(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.visit_disjunction(&it.body);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_quantifier<'a, V: Visit<'a>>(visitor: &mut V, it: &Quantifier<'a>) {
|
||||
let kind = RegExpAstKind::Quantifier(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.visit_term(&it.body);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_capturing_group<'a, V: Visit<'a>>(visitor: &mut V, it: &CapturingGroup<'a>) {
|
||||
let kind = RegExpAstKind::CapturingGroup(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.visit_disjunction(&it.body);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_ignore_group<'a, V: Visit<'a>>(visitor: &mut V, it: &IgnoreGroup<'a>) {
|
||||
let kind = RegExpAstKind::IgnoreGroup(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.visit_disjunction(&it.body);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_boundary_assertion<'a, V: Visit<'a>>(visitor: &mut V, it: &BoundaryAssertion) {
|
||||
let kind = RegExpAstKind::BoundaryAssertion(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_character<'a, V: Visit<'a>>(visitor: &mut V, it: &Character) {
|
||||
let kind = RegExpAstKind::Character(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_dot<'a, V: Visit<'a>>(visitor: &mut V, it: &Dot) {
|
||||
let kind = RegExpAstKind::Dot(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_character_class_escape<'a, V: Visit<'a>>(
|
||||
visitor: &mut V,
|
||||
it: &CharacterClassEscape,
|
||||
) {
|
||||
let kind = RegExpAstKind::CharacterClassEscape(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_unicode_property_escape<'a, V: Visit<'a>>(
|
||||
visitor: &mut V,
|
||||
it: &UnicodePropertyEscape<'a>,
|
||||
) {
|
||||
let kind = RegExpAstKind::UnicodePropertyEscape(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_character_class<'a, V: Visit<'a>>(visitor: &mut V, it: &CharacterClass<'a>) {
|
||||
let kind = RegExpAstKind::CharacterClass(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
for content in &it.body {
|
||||
visitor.visit_character_class_contents(content);
|
||||
}
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_character_class_contents<'a, V: Visit<'a>>(
|
||||
visitor: &mut V,
|
||||
it: &CharacterClassContents<'a>,
|
||||
) {
|
||||
let kind = RegExpAstKind::CharacterClassContents(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
match it {
|
||||
CharacterClassContents::CharacterClassRange(character_class_range) => {
|
||||
visitor.visit_character_class_range(character_class_range);
|
||||
}
|
||||
CharacterClassContents::CharacterClassEscape(character_class_escape) => {
|
||||
visitor.visit_character_class_escape(character_class_escape);
|
||||
}
|
||||
CharacterClassContents::UnicodePropertyEscape(unicode_property_escape) => {
|
||||
visitor.visit_unicode_property_escape(unicode_property_escape);
|
||||
}
|
||||
CharacterClassContents::Character(character) => {
|
||||
visitor.visit_character(character);
|
||||
}
|
||||
CharacterClassContents::NestedCharacterClass(character_class) => {
|
||||
visitor.visit_character_class(character_class);
|
||||
}
|
||||
CharacterClassContents::ClassStringDisjunction(class_string_disjunction) => {
|
||||
visitor.visit_character_class_string_disjunction(class_string_disjunction);
|
||||
}
|
||||
}
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_character_class_range<'a, V: Visit<'a>>(visitor: &mut V, it: &CharacterClassRange) {
|
||||
let kind = RegExpAstKind::CharacterClassRange(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.visit_character(&it.min);
|
||||
visitor.visit_character(&it.max);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_character_class_string_disjunction<'a, V: Visit<'a>>(
|
||||
visitor: &mut V,
|
||||
it: &ClassStringDisjunction<'a>,
|
||||
) {
|
||||
let kind = RegExpAstKind::CharacterClassStringDisjunction(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
for string in &it.body {
|
||||
visitor.visit_character_class_string(string);
|
||||
}
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_character_class_string<'a, V: Visit<'a>>(visitor: &mut V, it: &ClassString<'a>) {
|
||||
let kind = RegExpAstKind::CharacterClassString(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
for character in &it.body {
|
||||
visitor.visit_character(character);
|
||||
}
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_indexed_reference<'a, V: Visit<'a>>(visitor: &mut V, it: &IndexedReference) {
|
||||
let kind = RegExpAstKind::IndexedReference(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn walk_named_reference<'a, V: Visit<'a>>(visitor: &mut V, it: &NamedReference<'a>) {
|
||||
let kind = RegExpAstKind::NamedReference(visitor.alloc(it));
|
||||
visitor.enter_node(kind);
|
||||
visitor.leave_node(kind);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue