mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 20:32:10 +00:00
refactor(parser): remove TokenValue::RegExp from Token (#1926)
This PR is part of #1880. `Token` size is reduced from 48 to 40 bytes. To reconstruct the regex pattern and flags within the parser , the regex string is re-parsed from the end by reading all valid flags. In order to make things work nicely, the lexer will no longer recover from a invalid regex.
This commit is contained in:
parent
73a5b7f7da
commit
08438e04ba
7 changed files with 95 additions and 54 deletions
|
|
@ -163,8 +163,13 @@ impl<'a> AstBuilder<'a> {
|
||||||
TemplateElementValue { raw, cooked }
|
TemplateElementValue { raw, cooked }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reg_exp_literal(&self, span: Span, pattern: Atom, flags: RegExpFlags) -> RegExpLiteral {
|
pub fn reg_exp_literal(
|
||||||
RegExpLiteral { span, value: EmptyObject, regex: RegExp { pattern, flags } }
|
&self,
|
||||||
|
span: Span,
|
||||||
|
pattern: &'a str,
|
||||||
|
flags: RegExpFlags,
|
||||||
|
) -> RegExpLiteral {
|
||||||
|
RegExpLiteral { span, value: EmptyObject, regex: RegExp { pattern: pattern.into(), flags } }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn literal_string_expression(&self, literal: StringLiteral) -> Expression<'a> {
|
pub fn literal_string_expression(&self, literal: StringLiteral) -> Expression<'a> {
|
||||||
|
|
|
||||||
|
|
@ -185,8 +185,8 @@ impl<'a> Parser<'a> {
|
||||||
Kind::LParen => self.parse_parenthesized_expression(span),
|
Kind::LParen => self.parse_parenthesized_expression(span),
|
||||||
Kind::Slash | Kind::SlashEq => {
|
Kind::Slash | Kind::SlashEq => {
|
||||||
self.read_regex();
|
self.read_regex();
|
||||||
self.parse_literal_regexp()
|
let literal = self.parse_literal_regexp();
|
||||||
.map(|literal| self.ast.literal_regexp_expression(literal))
|
Ok(self.ast.literal_regexp_expression(literal))
|
||||||
}
|
}
|
||||||
// JSXElement, JSXFragment
|
// JSXElement, JSXFragment
|
||||||
Kind::LAngle if self.source_type.is_jsx() => self.parse_jsx_expression(),
|
Kind::LAngle if self.source_type.is_jsx() => self.parse_jsx_expression(),
|
||||||
|
|
@ -315,20 +315,28 @@ impl<'a> Parser<'a> {
|
||||||
Ok(self.ast.bigint_literal(self.end_span(span), value, base))
|
Ok(self.ast.bigint_literal(self.end_span(span), value, base))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse_literal_regexp(&mut self) -> Result<RegExpLiteral> {
|
pub(crate) fn parse_literal_regexp(&mut self) -> RegExpLiteral {
|
||||||
let span = self.start_span();
|
let span = self.start_span();
|
||||||
let r = match self.cur_kind() {
|
|
||||||
Kind::RegExp => self.cur_token().value.as_regex(),
|
// split out the flag part of `/regex/flag` by looking for `/` from the end
|
||||||
_ => return Err(self.unexpected()),
|
let regex_src = self.cur_src();
|
||||||
};
|
let mut flags = RegExpFlags::empty();
|
||||||
let pattern = Atom::from(r.pattern);
|
|
||||||
let flags = r.flags;
|
let mut split_index = None;
|
||||||
|
for (i, c) in regex_src.char_indices().rev() {
|
||||||
|
if let Ok(flag) = RegExpFlags::try_from(c) {
|
||||||
|
flags |= flag;
|
||||||
|
} else {
|
||||||
|
split_index.replace(i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// `/` are omitted from the pattern
|
||||||
|
let pattern = split_index.map_or(regex_src, |i| regex_src.get(1..i).unwrap_or(""));
|
||||||
|
|
||||||
self.bump_any();
|
self.bump_any();
|
||||||
Ok(RegExpLiteral {
|
self.ast.reg_exp_literal(self.end_span(span), pattern, flags)
|
||||||
span: self.end_span(span),
|
|
||||||
value: EmptyObject {},
|
|
||||||
regex: RegExp { pattern, flags },
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse_literal_string(&mut self) -> Result<StringLiteral> {
|
pub(crate) fn parse_literal_string(&mut self) -> Result<StringLiteral> {
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ use oxc_syntax::{
|
||||||
},
|
},
|
||||||
unicode_id_start::is_id_start_unicode,
|
unicode_id_start::is_id_start_unicode,
|
||||||
};
|
};
|
||||||
pub use token::{RegExp, Token, TokenValue};
|
pub use token::{Token, TokenValue};
|
||||||
|
|
||||||
pub use self::{kind::Kind, number::parse_big_int};
|
pub use self::{kind::Kind, number::parse_big_int};
|
||||||
use self::{
|
use self::{
|
||||||
|
|
@ -819,7 +819,6 @@ impl<'a> Lexer<'a> {
|
||||||
|
|
||||||
/// 12.9.5 Regular Expression Literals
|
/// 12.9.5 Regular Expression Literals
|
||||||
fn read_regex(&mut self) -> Kind {
|
fn read_regex(&mut self) -> Kind {
|
||||||
let start = self.current.token.start + 1; // +1 to exclude `/`
|
|
||||||
let mut in_escape = false;
|
let mut in_escape = false;
|
||||||
let mut in_character_class = false;
|
let mut in_character_class = false;
|
||||||
loop {
|
loop {
|
||||||
|
|
@ -848,40 +847,27 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let end = self.offset() - 1; // -1 to exclude `/`
|
|
||||||
let pattern = &self.source[start as usize..end as usize];
|
|
||||||
|
|
||||||
let mut flags = RegExpFlags::empty();
|
let mut flags = RegExpFlags::empty();
|
||||||
|
|
||||||
while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
|
while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
|
||||||
self.current.chars.next();
|
self.current.chars.next();
|
||||||
if !ch.is_ascii_lowercase() {
|
if !ch.is_ascii_lowercase() {
|
||||||
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
|
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
|
||||||
continue;
|
return Kind::Undetermined;
|
||||||
}
|
}
|
||||||
let flag = match ch {
|
let flag = if let Ok(flag) = RegExpFlags::try_from(ch) {
|
||||||
'g' => RegExpFlags::G,
|
flag
|
||||||
'i' => RegExpFlags::I,
|
} else {
|
||||||
'm' => RegExpFlags::M,
|
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
|
||||||
's' => RegExpFlags::S,
|
return Kind::Undetermined;
|
||||||
'u' => RegExpFlags::U,
|
|
||||||
'y' => RegExpFlags::Y,
|
|
||||||
'd' => RegExpFlags::D,
|
|
||||||
'v' => RegExpFlags::V,
|
|
||||||
_ => {
|
|
||||||
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
if flags.contains(flag) {
|
if flags.contains(flag) {
|
||||||
self.error(diagnostics::RegExpFlagTwice(ch, self.current_offset()));
|
self.error(diagnostics::RegExpFlagTwice(ch, self.current_offset()));
|
||||||
continue;
|
return Kind::Undetermined;
|
||||||
}
|
}
|
||||||
flags |= flag;
|
flags |= flag;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.current.token.value = TokenValue::RegExp(RegExp { pattern, flags });
|
|
||||||
|
|
||||||
Kind::RegExp
|
Kind::RegExp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
//! Token
|
//! Token
|
||||||
|
|
||||||
use oxc_ast::ast::RegExpFlags;
|
|
||||||
use oxc_span::Span;
|
use oxc_span::Span;
|
||||||
|
|
||||||
use super::kind::Kind;
|
use super::kind::Kind;
|
||||||
|
|
@ -29,7 +28,7 @@ pub struct Token<'a> {
|
||||||
mod size_asserts {
|
mod size_asserts {
|
||||||
use oxc_index::assert_eq_size;
|
use oxc_index::assert_eq_size;
|
||||||
|
|
||||||
assert_eq_size!(super::Token, [u8; 48]);
|
assert_eq_size!(super::Token, [u8; 40]);
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Token<'a> {
|
impl<'a> Token<'a> {
|
||||||
|
|
@ -43,13 +42,6 @@ pub enum TokenValue<'a> {
|
||||||
None,
|
None,
|
||||||
Number(f64),
|
Number(f64),
|
||||||
String(&'a str),
|
String(&'a str),
|
||||||
RegExp(RegExp<'a>),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
|
||||||
pub struct RegExp<'a> {
|
|
||||||
pub pattern: &'a str,
|
|
||||||
pub flags: RegExpFlags,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Default for TokenValue<'a> {
|
impl<'a> Default for TokenValue<'a> {
|
||||||
|
|
@ -66,13 +58,6 @@ impl<'a> TokenValue<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn as_regex(&self) -> &RegExp<'a> {
|
|
||||||
match self {
|
|
||||||
Self::RegExp(regex) => regex,
|
|
||||||
_ => unreachable!("expected regex!"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_string(&self) -> Option<&str> {
|
pub fn get_string(&self) -> Option<&str> {
|
||||||
match self {
|
match self {
|
||||||
Self::String(s) => Some(s),
|
Self::String(s) => Some(s),
|
||||||
|
|
|
||||||
|
|
@ -1141,6 +1141,12 @@ Expect to Parse: "typescript/types/const-type-parameters-babel-7/input.ts"
|
||||||
╭─[core/uncategorised/380/input.js:1:1]
|
╭─[core/uncategorised/380/input.js:1:1]
|
||||||
1 │ var x = /
|
1 │ var x = /
|
||||||
· ──
|
· ──
|
||||||
|
2 │ /
|
||||||
|
╰────
|
||||||
|
|
||||||
|
× Unexpected token
|
||||||
|
╭─[core/uncategorised/380/input.js:1:1]
|
||||||
|
1 │ var x = /
|
||||||
2 │ /
|
2 │ /
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
|
@ -1523,6 +1529,12 @@ Expect to Parse: "typescript/types/const-type-parameters-babel-7/input.ts"
|
||||||
╭─[core/uncategorised/441/input.js:1:1]
|
╭─[core/uncategorised/441/input.js:1:1]
|
||||||
1 │ /a\
|
1 │ /a\
|
||||||
· ────
|
· ────
|
||||||
|
2 │ /
|
||||||
|
╰────
|
||||||
|
|
||||||
|
× Unexpected token
|
||||||
|
╭─[core/uncategorised/441/input.js:1:1]
|
||||||
|
1 │ /a\
|
||||||
2 │ /
|
2 │ /
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
|
@ -7967,6 +7979,11 @@ Expect to Parse: "typescript/types/const-type-parameters-babel-7/input.ts"
|
||||||
2 │ /
|
2 │ /
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
× Unexpected token
|
||||||
|
╭─[esprima/invalid-syntax/migrated_0040/input.js:2:1]
|
||||||
|
2 │ /
|
||||||
|
╰────
|
||||||
|
|
||||||
× Invalid Unicode escape sequence
|
× Invalid Unicode escape sequence
|
||||||
╭─[esprima/invalid-syntax/migrated_0041/input.js:1:1]
|
╭─[esprima/invalid-syntax/migrated_0041/input.js:1:1]
|
||||||
1 │ var x = /[a-z]/\ux
|
1 │ var x = /[a-z]/\ux
|
||||||
|
|
@ -8141,6 +8158,11 @@ Expect to Parse: "typescript/types/const-type-parameters-babel-7/input.ts"
|
||||||
2 │ /
|
2 │ /
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
× Unexpected token
|
||||||
|
╭─[esprima/invalid-syntax/migrated_0062/input.js:2:1]
|
||||||
|
2 │ /
|
||||||
|
╰────
|
||||||
|
|
||||||
× Unterminated string
|
× Unterminated string
|
||||||
╭─[esprima/invalid-syntax/migrated_0063/input.js:1:1]
|
╭─[esprima/invalid-syntax/migrated_0063/input.js:1:1]
|
||||||
1 │ var x = "
|
1 │ var x = "
|
||||||
|
|
@ -8681,6 +8703,11 @@ Expect to Parse: "typescript/types/const-type-parameters-babel-7/input.ts"
|
||||||
2 │ /
|
2 │ /
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
× Unexpected token
|
||||||
|
╭─[esprima/invalid-syntax/migrated_0157/input.js:2:1]
|
||||||
|
2 │ /
|
||||||
|
╰────
|
||||||
|
|
||||||
× Unexpected token
|
× Unexpected token
|
||||||
╭─[esprima/invalid-syntax/migrated_0158/input.js:2:1]
|
╭─[esprima/invalid-syntax/migrated_0158/input.js:2:1]
|
||||||
2 │
|
2 │
|
||||||
|
|
|
||||||
|
|
@ -18251,6 +18251,11 @@ Expect Syntax Error: "language/import/import-attributes/json-named-bindings.js"
|
||||||
· ─
|
· ─
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
× Unexpected token
|
||||||
|
╭─[language/line-terminators/invalid-regexp-cr.js:18:1]
|
||||||
|
18 │ /
|
||||||
|
╰────
|
||||||
|
|
||||||
× Unterminated regular expression
|
× Unterminated regular expression
|
||||||
╭─[language/line-terminators/invalid-regexp-lf.js:16:1]
|
╭─[language/line-terminators/invalid-regexp-lf.js:16:1]
|
||||||
16 │
|
16 │
|
||||||
|
|
@ -18259,6 +18264,11 @@ Expect Syntax Error: "language/import/import-attributes/json-named-bindings.js"
|
||||||
18 │ /
|
18 │ /
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
× Unexpected token
|
||||||
|
╭─[language/line-terminators/invalid-regexp-lf.js:18:1]
|
||||||
|
18 │ /
|
||||||
|
╰────
|
||||||
|
|
||||||
× Unterminated regular expression
|
× Unterminated regular expression
|
||||||
╭─[language/line-terminators/invalid-regexp-ls.js:16:1]
|
╭─[language/line-terminators/invalid-regexp-ls.js:16:1]
|
||||||
16 │
|
16 │
|
||||||
|
|
@ -18266,6 +18276,11 @@ Expect Syntax Error: "language/import/import-attributes/json-named-bindings.js"
|
||||||
· ──
|
· ──
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
× Unexpected token
|
||||||
|
╭─[language/line-terminators/invalid-regexp-ls.js:17:1]
|
||||||
|
17 │ /
/
|
||||||
|
╰────
|
||||||
|
|
||||||
× Unterminated regular expression
|
× Unterminated regular expression
|
||||||
╭─[language/line-terminators/invalid-regexp-ps.js:16:1]
|
╭─[language/line-terminators/invalid-regexp-ps.js:16:1]
|
||||||
16 │
|
16 │
|
||||||
|
|
@ -18273,6 +18288,11 @@ Expect Syntax Error: "language/import/import-attributes/json-named-bindings.js"
|
||||||
· ──
|
· ──
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
× Unexpected token
|
||||||
|
╭─[language/line-terminators/invalid-regexp-ps.js:17:1]
|
||||||
|
17 │ /
/
|
||||||
|
╰────
|
||||||
|
|
||||||
× Unterminated string
|
× Unterminated string
|
||||||
╭─[language/line-terminators/invalid-string-cr.js:15:1]
|
╭─[language/line-terminators/invalid-string-cr.js:15:1]
|
||||||
15 │
|
15 │
|
||||||
|
|
@ -31537,6 +31557,11 @@ Expect Syntax Error: "language/import/import-attributes/json-named-bindings.js"
|
||||||
· ───────
|
· ───────
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
× Expected `}` but found `EOF`
|
||||||
|
╭─[language/statements/function/invalid-function-body-1.js:17:1]
|
||||||
|
17 │ function __func(){/ ABC}
|
||||||
|
╰────
|
||||||
|
|
||||||
× Unexpected token
|
× Unexpected token
|
||||||
╭─[language/statements/function/invalid-function-body-2.js:16:1]
|
╭─[language/statements/function/invalid-function-body-2.js:16:1]
|
||||||
16 │
|
16 │
|
||||||
|
|
|
||||||
|
|
@ -17136,6 +17136,11 @@ Expect to Parse: "conformance/salsa/plainJSRedeclare3.ts"
|
||||||
· ────────────
|
· ────────────
|
||||||
╰────
|
╰────
|
||||||
|
|
||||||
|
× Expected `)` but found `EOF`
|
||||||
|
╭─[conformance/parser/ecmascript5/RegularExpressions/parserRegularExpressionDivideAmbiguity4.ts:1:1]
|
||||||
|
1 │ foo(/notregexp);
|
||||||
|
╰────
|
||||||
|
|
||||||
× Expected a semicolon or an implicit semicolon after a statement, but found none
|
× Expected a semicolon or an implicit semicolon after a statement, but found none
|
||||||
╭─[conformance/parser/ecmascript5/RegularExpressions/parserRegularExpressionDivideAmbiguity7.ts:1:1]
|
╭─[conformance/parser/ecmascript5/RegularExpressions/parserRegularExpressionDivideAmbiguity7.ts:1:1]
|
||||||
1 │ (a/8
|
1 │ (a/8
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue