refactor(parser): reduce work parsing regexps (#1999)

#1926 produced a small performance regression because when parsing a
regexp, some work is repeated.
This commit is contained in:
overlookmotel 2024-01-12 03:36:30 +00:00 committed by GitHub
parent 74dfa3be8b
commit c7316856db
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 23 additions and 29 deletions

View file

@ -1,5 +1,6 @@
//! Code related to navigating `Token`s from the lexer
use oxc_ast::ast::RegExpFlags;
use oxc_diagnostics::Result;
use oxc_span::Span;
@ -200,8 +201,10 @@ impl<'a> Parser<'a> {
}
/// Tell lexer to read a regex
pub(crate) fn read_regex(&mut self) {
self.token = self.lexer.next_regex(self.cur_kind());
pub(crate) fn read_regex(&mut self) -> (u32, RegExpFlags) {
let (token, pattern_end, flags) = self.lexer.next_regex(self.cur_kind());
self.token = token;
(pattern_end, flags)
}
/// Tell lexer to read a template substitution tail

View file

@ -180,7 +180,6 @@ impl<'a> Parser<'a> {
}
Kind::LParen => self.parse_parenthesized_expression(span),
Kind::Slash | Kind::SlashEq => {
self.read_regex();
let literal = self.parse_literal_regexp();
Ok(self.ast.literal_regexp_expression(literal))
}
@ -320,22 +319,10 @@ impl<'a> Parser<'a> {
pub(crate) fn parse_literal_regexp(&mut self) -> RegExpLiteral {
let span = self.start_span();
// split out the flag part of `/regex/flag` by looking for `/` from the end
let regex_src = self.cur_src();
let mut flags = RegExpFlags::empty();
let mut split_index = None;
for (i, c) in regex_src.char_indices().rev() {
if let Ok(flag) = RegExpFlags::try_from(c) {
flags |= flag;
} else {
split_index.replace(i);
break;
}
}
// `/` are omitted from the pattern
let pattern = split_index.map_or(regex_src, |i| regex_src.get(1..i).unwrap_or(""));
// split out pattern
let (pattern_end, flags) = self.read_regex();
let pattern_start = self.cur_token().start + 1; // +1 to exclude `/`
let pattern = &self.source_text[pattern_start as usize..pattern_end as usize];
self.bump_any();
self.ast.reg_exp_literal(self.end_span(span), pattern, flags)

View file

@ -192,16 +192,17 @@ impl<'a> Lexer<'a> {
/// where a `RegularExpressionLiteral` is permitted
/// Which means the parser needs to re-tokenize on `PrimaryExpression`,
/// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression`
pub fn next_regex(&mut self, kind: Kind) -> Token {
pub fn next_regex(&mut self, kind: Kind) -> (Token, u32, RegExpFlags) {
self.current.token.start = self.offset()
- match kind {
Kind::Slash => 1,
Kind::SlashEq => 2,
_ => unreachable!(),
};
let kind = self.read_regex();
let (pattern_end, flags) = self.read_regex();
self.lookahead.clear();
self.finish_next(kind)
let token = self.finish_next(Kind::RegExp);
(token, pattern_end, flags)
}
pub fn next_right_angle(&mut self) -> Token {
@ -828,18 +829,20 @@ impl<'a> Lexer<'a> {
}
/// 12.9.5 Regular Expression Literals
fn read_regex(&mut self) -> Kind {
fn read_regex(&mut self) -> (u32, RegExpFlags) {
let mut in_escape = false;
let mut in_character_class = false;
loop {
match self.current.chars.next() {
None => {
self.error(diagnostics::UnterminatedRegExp(self.unterminated_range()));
return Kind::Undetermined;
return (self.offset(), RegExpFlags::empty());
}
Some(c) if is_line_terminator(c) => {
self.error(diagnostics::UnterminatedRegExp(self.unterminated_range()));
return Kind::Undetermined;
#[allow(clippy::cast_possible_truncation)]
let pattern_end = self.offset() - c.len_utf8() as u32;
return (pattern_end, RegExpFlags::empty());
}
Some(c) => {
if in_escape {
@ -857,28 +860,29 @@ impl<'a> Lexer<'a> {
}
}
let pattern_end = self.offset() - 1; // -1 to exclude `/`
let mut flags = RegExpFlags::empty();
while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
self.current.chars.next();
if !ch.is_ascii_lowercase() {
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
return Kind::Undetermined;
break;
}
let flag = if let Ok(flag) = RegExpFlags::try_from(ch) {
flag
} else {
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
return Kind::Undetermined;
break;
};
if flags.contains(flag) {
self.error(diagnostics::RegExpFlagTwice(ch, self.current_offset()));
return Kind::Undetermined;
break;
}
flags |= flag;
}
Kind::RegExp
(pattern_end, flags)
}
/// 12.8.6 Template Literal Lexical Components