mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 20:32:10 +00:00
refactor(parser): reduce work parsing regexps (#1999)
#1926 produced a small performance regression because when parsing a regexp, some work is repeated.
This commit is contained in:
parent
74dfa3be8b
commit
c7316856db
3 changed files with 23 additions and 29 deletions
|
|
@ -1,5 +1,6 @@
|
|||
//! Code related to navigating `Token`s from the lexer
|
||||
|
||||
use oxc_ast::ast::RegExpFlags;
|
||||
use oxc_diagnostics::Result;
|
||||
use oxc_span::Span;
|
||||
|
||||
|
|
@ -200,8 +201,10 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
|
||||
/// Tell lexer to read a regex
|
||||
pub(crate) fn read_regex(&mut self) {
|
||||
self.token = self.lexer.next_regex(self.cur_kind());
|
||||
pub(crate) fn read_regex(&mut self) -> (u32, RegExpFlags) {
|
||||
let (token, pattern_end, flags) = self.lexer.next_regex(self.cur_kind());
|
||||
self.token = token;
|
||||
(pattern_end, flags)
|
||||
}
|
||||
|
||||
/// Tell lexer to read a template substitution tail
|
||||
|
|
|
|||
|
|
@ -180,7 +180,6 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
Kind::LParen => self.parse_parenthesized_expression(span),
|
||||
Kind::Slash | Kind::SlashEq => {
|
||||
self.read_regex();
|
||||
let literal = self.parse_literal_regexp();
|
||||
Ok(self.ast.literal_regexp_expression(literal))
|
||||
}
|
||||
|
|
@ -320,22 +319,10 @@ impl<'a> Parser<'a> {
|
|||
pub(crate) fn parse_literal_regexp(&mut self) -> RegExpLiteral {
|
||||
let span = self.start_span();
|
||||
|
||||
// split out the flag part of `/regex/flag` by looking for `/` from the end
|
||||
let regex_src = self.cur_src();
|
||||
let mut flags = RegExpFlags::empty();
|
||||
|
||||
let mut split_index = None;
|
||||
for (i, c) in regex_src.char_indices().rev() {
|
||||
if let Ok(flag) = RegExpFlags::try_from(c) {
|
||||
flags |= flag;
|
||||
} else {
|
||||
split_index.replace(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// `/` are omitted from the pattern
|
||||
let pattern = split_index.map_or(regex_src, |i| regex_src.get(1..i).unwrap_or(""));
|
||||
// split out pattern
|
||||
let (pattern_end, flags) = self.read_regex();
|
||||
let pattern_start = self.cur_token().start + 1; // +1 to exclude `/`
|
||||
let pattern = &self.source_text[pattern_start as usize..pattern_end as usize];
|
||||
|
||||
self.bump_any();
|
||||
self.ast.reg_exp_literal(self.end_span(span), pattern, flags)
|
||||
|
|
|
|||
|
|
@ -192,16 +192,17 @@ impl<'a> Lexer<'a> {
|
|||
/// where a `RegularExpressionLiteral` is permitted
|
||||
/// Which means the parser needs to re-tokenize on `PrimaryExpression`,
|
||||
/// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression`
|
||||
pub fn next_regex(&mut self, kind: Kind) -> Token {
|
||||
pub fn next_regex(&mut self, kind: Kind) -> (Token, u32, RegExpFlags) {
|
||||
self.current.token.start = self.offset()
|
||||
- match kind {
|
||||
Kind::Slash => 1,
|
||||
Kind::SlashEq => 2,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let kind = self.read_regex();
|
||||
let (pattern_end, flags) = self.read_regex();
|
||||
self.lookahead.clear();
|
||||
self.finish_next(kind)
|
||||
let token = self.finish_next(Kind::RegExp);
|
||||
(token, pattern_end, flags)
|
||||
}
|
||||
|
||||
pub fn next_right_angle(&mut self) -> Token {
|
||||
|
|
@ -828,18 +829,20 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
|
||||
/// 12.9.5 Regular Expression Literals
|
||||
fn read_regex(&mut self) -> Kind {
|
||||
fn read_regex(&mut self) -> (u32, RegExpFlags) {
|
||||
let mut in_escape = false;
|
||||
let mut in_character_class = false;
|
||||
loop {
|
||||
match self.current.chars.next() {
|
||||
None => {
|
||||
self.error(diagnostics::UnterminatedRegExp(self.unterminated_range()));
|
||||
return Kind::Undetermined;
|
||||
return (self.offset(), RegExpFlags::empty());
|
||||
}
|
||||
Some(c) if is_line_terminator(c) => {
|
||||
self.error(diagnostics::UnterminatedRegExp(self.unterminated_range()));
|
||||
return Kind::Undetermined;
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
let pattern_end = self.offset() - c.len_utf8() as u32;
|
||||
return (pattern_end, RegExpFlags::empty());
|
||||
}
|
||||
Some(c) => {
|
||||
if in_escape {
|
||||
|
|
@ -857,28 +860,29 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
let pattern_end = self.offset() - 1; // -1 to exclude `/`
|
||||
let mut flags = RegExpFlags::empty();
|
||||
|
||||
while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
|
||||
self.current.chars.next();
|
||||
if !ch.is_ascii_lowercase() {
|
||||
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
|
||||
return Kind::Undetermined;
|
||||
break;
|
||||
}
|
||||
let flag = if let Ok(flag) = RegExpFlags::try_from(ch) {
|
||||
flag
|
||||
} else {
|
||||
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
|
||||
return Kind::Undetermined;
|
||||
break;
|
||||
};
|
||||
if flags.contains(flag) {
|
||||
self.error(diagnostics::RegExpFlagTwice(ch, self.current_offset()));
|
||||
return Kind::Undetermined;
|
||||
break;
|
||||
}
|
||||
flags |= flag;
|
||||
}
|
||||
|
||||
Kind::RegExp
|
||||
(pattern_end, flags)
|
||||
}
|
||||
|
||||
/// 12.8.6 Template Literal Lexical Components
|
||||
|
|
|
|||
Loading…
Reference in a new issue