mirror of
https://github.com/danbulant/oxc
synced 2026-05-25 04:42:10 +00:00
This PR does not upgrade rustc. Only changes are applied. We cannot upgrade to the lastet Rust version yet due to wasm-bindgen breaking some generated types. THere's also some elided lifetimes in `**/generated/**`, which requires modification to ast tools.
363 lines
9.9 KiB
Rust
363 lines
9.9 KiB
Rust
use oxc_allocator::{Box, CloneIn, GetAddress, Vec};
|
|
use oxc_ast_macros::ast;
|
|
use oxc_estree::ESTree;
|
|
use oxc_span::{cmp::ContentEq, hash::ContentHash, Atom, GetSpan, Span};
|
|
|
|
/// The root of the `PatternParser` result.
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct Pattern<'a> {
|
|
pub span: Span,
|
|
pub body: Disjunction<'a>,
|
|
}
|
|
|
|
/// Pile of [`Alternative`]s separated by `|`.
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct Disjunction<'a> {
|
|
pub span: Span,
|
|
pub body: Vec<'a, Alternative<'a>>,
|
|
}
|
|
|
|
/// Single unit of `|` separated alternatives.
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct Alternative<'a> {
|
|
pub span: Span,
|
|
pub body: Vec<'a, Term<'a>>,
|
|
}
|
|
|
|
/// Single unit of [`Alternative`], containing various kinds.
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub enum Term<'a> {
|
|
// Assertion, QuantifiableAssertion
|
|
BoundaryAssertion(Box<'a, BoundaryAssertion>) = 0,
|
|
LookAroundAssertion(Box<'a, LookAroundAssertion<'a>>) = 1,
|
|
// Quantifier
|
|
Quantifier(Box<'a, Quantifier<'a>>) = 2,
|
|
// Atom, ExtendedAtom
|
|
Character(Box<'a, Character>) = 3,
|
|
Dot(Dot) = 4,
|
|
CharacterClassEscape(Box<'a, CharacterClassEscape>) = 5,
|
|
UnicodePropertyEscape(Box<'a, UnicodePropertyEscape<'a>>) = 6,
|
|
CharacterClass(Box<'a, CharacterClass<'a>>) = 7,
|
|
CapturingGroup(Box<'a, CapturingGroup<'a>>) = 8,
|
|
IgnoreGroup(Box<'a, IgnoreGroup<'a>>) = 9,
|
|
IndexedReference(Box<'a, IndexedReference>) = 10,
|
|
NamedReference(Box<'a, NamedReference<'a>>) = 11,
|
|
}
|
|
|
|
impl GetSpan for Term<'_> {
|
|
#[inline]
|
|
fn span(&self) -> Span {
|
|
match self {
|
|
Term::BoundaryAssertion(it) => it.span,
|
|
Term::LookAroundAssertion(it) => it.span,
|
|
Term::Quantifier(it) => it.span,
|
|
Term::Character(it) => it.span,
|
|
Term::Dot(it) => it.span,
|
|
Term::CharacterClassEscape(it) => it.span,
|
|
Term::UnicodePropertyEscape(it) => it.span,
|
|
Term::CharacterClass(it) => it.span,
|
|
Term::CapturingGroup(it) => it.span,
|
|
Term::IgnoreGroup(it) => it.span,
|
|
Term::IndexedReference(it) => it.span,
|
|
Term::NamedReference(it) => it.span,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Simple form of assertion.
|
|
/// e.g. `^`, `$`, `\b`, `\B`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct BoundaryAssertion {
|
|
pub span: Span,
|
|
pub kind: BoundaryAssertionKind,
|
|
}
|
|
|
|
#[ast]
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub enum BoundaryAssertionKind {
|
|
Start = 0,
|
|
End = 1,
|
|
Boundary = 2,
|
|
NegativeBoundary = 3,
|
|
}
|
|
|
|
/// Lookaround assertion.
|
|
/// e.g. `(?=...)`, `(?!...)`, `(?<=...)`, `(?<!...)`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct LookAroundAssertion<'a> {
|
|
pub span: Span,
|
|
pub kind: LookAroundAssertionKind,
|
|
pub body: Disjunction<'a>,
|
|
}
|
|
|
|
#[ast]
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub enum LookAroundAssertionKind {
|
|
Lookahead = 0,
|
|
NegativeLookahead = 1,
|
|
Lookbehind = 2,
|
|
NegativeLookbehind = 3,
|
|
}
|
|
|
|
/// Quantifier holding a [`Term`] and its repetition count.
|
|
/// e.g. `a*`, `b+`, `c?`, `d{3}`, `e{4,}`, `f{5,6}`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct Quantifier<'a> {
|
|
pub span: Span,
|
|
pub min: u64,
|
|
/// `None` means no upper bound.
|
|
pub max: Option<u64>,
|
|
pub greedy: bool,
|
|
pub body: Term<'a>,
|
|
}
|
|
|
|
/// Single character.
|
|
#[ast]
|
|
#[derive(Debug, Clone, Copy)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct Character {
|
|
/// This will be invalid position when `UnicodeMode` is disabled and `value` is a surrogate pair.
|
|
pub span: Span,
|
|
pub kind: CharacterKind,
|
|
/// Unicode code point or UTF-16 code unit.
|
|
pub value: u32,
|
|
}
|
|
|
|
#[ast]
|
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub enum CharacterKind {
|
|
ControlLetter = 0,
|
|
HexadecimalEscape = 1,
|
|
Identifier = 2,
|
|
Null = 3,
|
|
// To distinguish leading 0 cases like `\00` and `\000`
|
|
Octal1 = 4,
|
|
Octal2 = 5,
|
|
Octal3 = 6,
|
|
SingleEscape = 7,
|
|
Symbol = 8,
|
|
UnicodeEscape = 9,
|
|
}
|
|
|
|
/// Character class.
|
|
/// e.g. `\d`, `\D`, `\s`, `\S`, `\w`, `\W`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct CharacterClassEscape {
|
|
pub span: Span,
|
|
pub kind: CharacterClassEscapeKind,
|
|
}
|
|
|
|
#[ast]
|
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub enum CharacterClassEscapeKind {
|
|
D = 0,
|
|
NegativeD = 1,
|
|
S = 2,
|
|
NegativeS = 3,
|
|
W = 4,
|
|
NegativeW = 5,
|
|
}
|
|
|
|
/// Unicode property.
|
|
/// e.g. `\p{ASCII}`, `\P{ASCII}`, `\p{sc=Hiragana}`, `\P{sc=Hiragana}`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct UnicodePropertyEscape<'a> {
|
|
pub span: Span,
|
|
pub negative: bool,
|
|
/// `true` if `UnicodeSetsMode` and `name` matches unicode property of strings.
|
|
pub strings: bool,
|
|
pub name: Atom<'a>,
|
|
pub value: Option<Atom<'a>>,
|
|
}
|
|
|
|
/// The `.`.
|
|
#[ast]
|
|
#[derive(Debug, Clone, Copy)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct Dot {
|
|
pub span: Span,
|
|
}
|
|
|
|
/// Character class wrapped by `[]`.
|
|
/// e.g. `[a-z]`, `[^A-Z]`, `[abc]`, `[a&&b&&c]`, `[[a-z]--x--y]`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct CharacterClass<'a> {
|
|
pub span: Span,
|
|
pub negative: bool,
|
|
/// `true` if:
|
|
/// - `body` contains [`UnicodePropertyEscape`], nested [`CharacterClass`] or [`ClassStringDisjunction`] which `strings` is `true`
|
|
/// - and matches each logic depends on `kind`
|
|
pub strings: bool,
|
|
pub kind: CharacterClassContentsKind,
|
|
pub body: Vec<'a, CharacterClassContents<'a>>,
|
|
}
|
|
|
|
#[ast]
|
|
#[derive(Debug, PartialEq)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub enum CharacterClassContentsKind {
|
|
Union = 0,
|
|
/// `UnicodeSetsMode` only.
|
|
Intersection = 1,
|
|
/// `UnicodeSetsMode` only.
|
|
Subtraction = 2,
|
|
}
|
|
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, GetAddress, ESTree)]
|
|
pub enum CharacterClassContents<'a> {
|
|
CharacterClassRange(Box<'a, CharacterClassRange>) = 0,
|
|
CharacterClassEscape(Box<'a, CharacterClassEscape>) = 1,
|
|
UnicodePropertyEscape(Box<'a, UnicodePropertyEscape<'a>>) = 2,
|
|
Character(Box<'a, Character>) = 3,
|
|
/// `UnicodeSetsMode` only
|
|
NestedCharacterClass(Box<'a, CharacterClass<'a>>) = 4,
|
|
/// `UnicodeSetsMode` only
|
|
ClassStringDisjunction(Box<'a, ClassStringDisjunction<'a>>) = 5,
|
|
}
|
|
|
|
impl GetSpan for CharacterClassContents<'_> {
|
|
#[inline]
|
|
fn span(&self) -> Span {
|
|
match self {
|
|
CharacterClassContents::CharacterClassRange(it) => it.span,
|
|
CharacterClassContents::CharacterClassEscape(it) => it.span,
|
|
CharacterClassContents::UnicodePropertyEscape(it) => it.span,
|
|
CharacterClassContents::Character(it) => it.span,
|
|
CharacterClassContents::NestedCharacterClass(it) => it.span,
|
|
CharacterClassContents::ClassStringDisjunction(it) => it.span,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// `-` separated range of characters.
|
|
/// e.g. `a-z`, `A-Z`, `0-9`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct CharacterClassRange {
|
|
pub span: Span,
|
|
pub min: Character,
|
|
pub max: Character,
|
|
}
|
|
|
|
/// `|` separated string of characters wrapped by `\q{}`.
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct ClassStringDisjunction<'a> {
|
|
pub span: Span,
|
|
/// `true` if body is empty or contains [`ClassString`] which `strings` is `true`.
|
|
pub strings: bool,
|
|
pub body: Vec<'a, ClassString<'a>>,
|
|
}
|
|
|
|
/// Single unit of [`ClassStringDisjunction`].
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct ClassString<'a> {
|
|
pub span: Span,
|
|
/// `true` if body is empty or contain 2 more characters.
|
|
pub strings: bool,
|
|
pub body: Vec<'a, Character>,
|
|
}
|
|
|
|
/// Named or unnamed capturing group.
|
|
/// e.g. `(...)`, `(?<name>...)`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct CapturingGroup<'a> {
|
|
pub span: Span,
|
|
/// Group name to be referenced by [`NamedReference`].
|
|
pub name: Option<Atom<'a>>,
|
|
pub body: Disjunction<'a>,
|
|
}
|
|
|
|
/// Pseudo-group for ignoring.
|
|
/// e.g. `(?:...)`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct IgnoreGroup<'a> {
|
|
pub span: Span,
|
|
pub modifiers: Option<Modifiers>,
|
|
pub body: Disjunction<'a>,
|
|
}
|
|
|
|
/// Modifiers in [`IgnoreGroup`].
|
|
/// e.g. `i` in `(?i:...)`, `-s` in `(?-s:...)`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct Modifiers {
|
|
pub span: Span,
|
|
pub enabling: Option<Modifier>,
|
|
pub disabling: Option<Modifier>,
|
|
}
|
|
|
|
/// Each part of modifier in [`Modifiers`].
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct Modifier {
|
|
pub ignore_case: bool,
|
|
pub multiline: bool,
|
|
pub sticky: bool,
|
|
}
|
|
|
|
/// Backreference by index.
|
|
/// e.g. `\1`, `\2`, `\3`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct IndexedReference {
|
|
pub span: Span,
|
|
pub index: u32,
|
|
}
|
|
|
|
/// Backreference by name.
|
|
/// e.g. `\k<name>`
|
|
#[ast]
|
|
#[derive(Debug)]
|
|
#[generate_derive(CloneIn, ContentEq, ContentHash, ESTree)]
|
|
pub struct NamedReference<'a> {
|
|
pub span: Span,
|
|
pub name: Atom<'a>,
|
|
}
|
|
|
|
// See `oxc_ast/src/lib.rs` for the details
|
|
#[cfg(target_pointer_width = "64")]
|
|
#[test]
|
|
fn size_asserts() {
|
|
use std::mem::size_of;
|
|
|
|
assert!(size_of::<Term>() == 16);
|
|
assert!(size_of::<CharacterClassContents>() == 16);
|
|
}
|