From aa91fde1d956c0bff21036d834a1901597248fc5 Mon Sep 17 00:00:00 2001 From: Boshen Date: Fri, 12 Jan 2024 18:56:36 +0800 Subject: [PATCH] refactor(parser): only allocate for escaped template strings (#2005) --- crates/oxc_parser/src/cursor.rs | 5 ++ crates/oxc_parser/src/js/expression.rs | 5 +- crates/oxc_parser/src/lexer/mod.rs | 79 ++++++++++++++++++-------- crates/oxc_parser/src/lexer/token.rs | 8 +-- 4 files changed, 68 insertions(+), 29 deletions(-) diff --git a/crates/oxc_parser/src/cursor.rs b/crates/oxc_parser/src/cursor.rs index 79511adcb..12ee4bdee 100644 --- a/crates/oxc_parser/src/cursor.rs +++ b/crates/oxc_parser/src/cursor.rs @@ -52,6 +52,11 @@ impl<'a> Parser<'a> { self.lexer.get_string(self.token) } + /// Get current template string + pub(crate) fn cur_template_string(&self) -> Option<&'a str> { + self.lexer.get_template_string(self.token) + } + /// Peek next token, returns EOF for final peek pub(crate) fn peek_token(&mut self) -> Token { self.lexer.lookahead(1) diff --git a/crates/oxc_parser/src/js/expression.rs b/crates/oxc_parser/src/js/expression.rs index ab8f069da..ff349f8a4 100644 --- a/crates/oxc_parser/src/js/expression.rs +++ b/crates/oxc_parser/src/js/expression.rs @@ -437,9 +437,10 @@ impl<'a> Parser<'a> { // `cooked = None` when template literal has invalid escape sequence // This is matched by `is_valid_escape_sequence` in `Lexer::read_template_literal` - let cooked = self.cur_token().escaped_string_id.map(|_| self.cur_string()); + let cooked = self.cur_template_string(); - let raw = &self.cur_src()[1..self.cur_src().len() - end_offset as usize]; + let cur_src = self.cur_src(); + let raw = &cur_src[1..cur_src.len() - end_offset as usize]; let raw = Atom::from(if cooked.is_some() && raw.contains('\r') { self.ast.new_str(raw.replace("\r\n", "\n").replace('\r', "\n").as_str()) } else { diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 45da9a39c..215a3ff49 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -30,7 +30,7 @@ pub use self::{ number::{parse_big_int, parse_float, parse_int}, token::Token, }; -use self::{string_builder::AutoCow, token::EscapedStringId, trivia_builder::TriviaBuilder}; +use self::{string_builder::AutoCow, token::EscapedId, trivia_builder::TriviaBuilder}; use crate::{diagnostics, MAX_LEN}; #[derive(Debug, Clone)] @@ -69,6 +69,9 @@ pub struct Lexer<'a> { /// Data store for escaped strings, indexed by `Token.escaped_string_id` escaped_strings: Vec<&'a str>, + /// Data store for escaped templates, indexed by `Token.escaped_string_id` + /// `None` is saved when the string contains an invalid escape sequence. + escaped_templates: Vec>, } #[allow(clippy::unused_self)] @@ -95,6 +98,7 @@ impl<'a> Lexer<'a> { context: LexerContext::Regular, trivia_builder: TriviaBuilder::default(), escaped_strings: vec![], + escaped_templates: vec![], } } @@ -313,33 +317,59 @@ impl<'a> Lexer<'a> { self.escaped_strings.push(s); let escaped_string_id = self.escaped_strings.len() as u32; // SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0 - let escaped_string_id = unsafe { EscapedStringId::new_unchecked(escaped_string_id) }; - self.current.token.escaped_string_id.replace(escaped_string_id); + let escaped_string_id = unsafe { EscapedId::new_unchecked(escaped_string_id) }; + self.current.token.escaped_id.replace(escaped_string_id); } pub(crate) fn get_string(&self, token: Token) -> &'a str { - if let Some(escaped_string_id) = token.escaped_string_id { - return self.escaped_strings[escaped_string_id.get() as usize - 1]; + if let Some(escaped_id) = token.escaped_id { + return self.escaped_strings[escaped_id.get() as usize - 1]; } let raw = &self.source[token.start as usize..token.end as usize]; match token.kind { - Kind::Str | Kind::NoSubstitutionTemplate => { - // omit surrounding quotes - &raw[1..raw.len() - 1] - } - Kind::TemplateHead => { - // omit leading "`${" - &raw[3..] - } - Kind::TemplateTail => { - // omit trailing "$`" - &raw[..raw.len() - 2] + Kind::Str => { + &raw[1..raw.len() - 1] // omit surrounding quotes } _ => raw, } } + /// Save the template if it is escaped + #[allow(clippy::cast_possible_truncation)] + fn save_template_string( + &mut self, + is_valid_escape_sequence: bool, + has_escape: bool, + s: &'a str, + ) { + if !has_escape { + return; + } + self.escaped_templates.push(is_valid_escape_sequence.then(|| s)); + let escaped_template_id = self.escaped_templates.len() as u32; + // SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0 + let escaped_template_id = unsafe { EscapedId::new_unchecked(escaped_template_id) }; + self.current.token.escaped_id.replace(escaped_template_id); + } + + pub(crate) fn get_template_string(&self, token: Token) -> Option<&'a str> { + if let Some(escaped_id) = token.escaped_id { + return self.escaped_templates[escaped_id.get() as usize - 1]; + } + + let raw = &self.source[token.start as usize..token.end as usize]; + Some(match token.kind { + Kind::NoSubstitutionTemplate | Kind::TemplateTail => { + &raw[1..raw.len() - 1] // omit surrounding quotes or leading "}" and trailing "`" + } + Kind::TemplateHead | Kind::TemplateMiddle => { + &raw[1..raw.len() - 2] // omit leading "`" or "}" and trailing "${" + } + _ => raw, + }) + } + /// Read each char and set the current token /// Whitespace and line terminators are skipped fn read_next_token(&mut self) -> Kind { @@ -867,16 +897,20 @@ impl<'a> Lexer<'a> { while let Some(c) = self.current.chars.next() { match c { '$' if self.peek() == Some('{') => { - if is_valid_escape_sequence { - self.save_string(true, builder.finish_without_push(self)); - } + self.save_template_string( + is_valid_escape_sequence, + builder.has_escape(), + builder.finish_without_push(self), + ); self.current.chars.next(); return substitute; } '`' => { - if is_valid_escape_sequence { - self.save_string(true, builder.finish_without_push(self)); - } + self.save_template_string( + is_valid_escape_sequence, + builder.has_escape(), + builder.finish_without_push(self), + ); return tail; } CR => { @@ -888,7 +922,6 @@ impl<'a> Lexer<'a> { '\\' => { let text = builder.get_mut_string_without_current_ascii_char(self); self.read_string_escape_sequence(text, true, &mut is_valid_escape_sequence); - if !is_valid_escape_sequence {} } _ => builder.push_matching(c), } diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index e68869262..a3fdae4f5 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -4,7 +4,7 @@ use oxc_span::Span; use super::kind::Kind; -pub type EscapedStringId = std::num::NonZeroU32; +pub type EscapedId = std::num::NonZeroU32; #[derive(Debug, Clone, Copy, Default)] pub struct Token { @@ -20,9 +20,9 @@ pub struct Token { /// Indicates the token is on a newline pub is_on_new_line: bool, - /// A index handle to `Lexer::escaped_strings` + /// A index handle to `Lexer::escaped_strings` or `Lexer::escaped_templates` /// See https://floooh.github.io/2018/06/17/handles-vs-pointers.html for some background reading - pub escaped_string_id: Option, + pub escaped_id: Option, } #[cfg(target_pointer_width = "64")] @@ -36,6 +36,6 @@ impl Token { } pub fn escaped(&self) -> bool { - self.escaped_string_id.is_some() + self.escaped_id.is_some() } }