mirror of
https://github.com/danbulant/oxc
synced 2026-05-25 12:51:57 +00:00
refactor(parser): only allocate for escaped template strings (#2005)
This commit is contained in:
parent
38f86b0cac
commit
aa91fde1d9
4 changed files with 68 additions and 29 deletions
|
|
@ -52,6 +52,11 @@ impl<'a> Parser<'a> {
|
||||||
self.lexer.get_string(self.token)
|
self.lexer.get_string(self.token)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get current template string
|
||||||
|
pub(crate) fn cur_template_string(&self) -> Option<&'a str> {
|
||||||
|
self.lexer.get_template_string(self.token)
|
||||||
|
}
|
||||||
|
|
||||||
/// Peek next token, returns EOF for final peek
|
/// Peek next token, returns EOF for final peek
|
||||||
pub(crate) fn peek_token(&mut self) -> Token {
|
pub(crate) fn peek_token(&mut self) -> Token {
|
||||||
self.lexer.lookahead(1)
|
self.lexer.lookahead(1)
|
||||||
|
|
|
||||||
|
|
@ -437,9 +437,10 @@ impl<'a> Parser<'a> {
|
||||||
|
|
||||||
// `cooked = None` when template literal has invalid escape sequence
|
// `cooked = None` when template literal has invalid escape sequence
|
||||||
// This is matched by `is_valid_escape_sequence` in `Lexer::read_template_literal`
|
// This is matched by `is_valid_escape_sequence` in `Lexer::read_template_literal`
|
||||||
let cooked = self.cur_token().escaped_string_id.map(|_| self.cur_string());
|
let cooked = self.cur_template_string();
|
||||||
|
|
||||||
let raw = &self.cur_src()[1..self.cur_src().len() - end_offset as usize];
|
let cur_src = self.cur_src();
|
||||||
|
let raw = &cur_src[1..cur_src.len() - end_offset as usize];
|
||||||
let raw = Atom::from(if cooked.is_some() && raw.contains('\r') {
|
let raw = Atom::from(if cooked.is_some() && raw.contains('\r') {
|
||||||
self.ast.new_str(raw.replace("\r\n", "\n").replace('\r', "\n").as_str())
|
self.ast.new_str(raw.replace("\r\n", "\n").replace('\r', "\n").as_str())
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ pub use self::{
|
||||||
number::{parse_big_int, parse_float, parse_int},
|
number::{parse_big_int, parse_float, parse_int},
|
||||||
token::Token,
|
token::Token,
|
||||||
};
|
};
|
||||||
use self::{string_builder::AutoCow, token::EscapedStringId, trivia_builder::TriviaBuilder};
|
use self::{string_builder::AutoCow, token::EscapedId, trivia_builder::TriviaBuilder};
|
||||||
use crate::{diagnostics, MAX_LEN};
|
use crate::{diagnostics, MAX_LEN};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
|
@ -69,6 +69,9 @@ pub struct Lexer<'a> {
|
||||||
|
|
||||||
/// Data store for escaped strings, indexed by `Token.escaped_string_id`
|
/// Data store for escaped strings, indexed by `Token.escaped_string_id`
|
||||||
escaped_strings: Vec<&'a str>,
|
escaped_strings: Vec<&'a str>,
|
||||||
|
/// Data store for escaped templates, indexed by `Token.escaped_string_id`
|
||||||
|
/// `None` is saved when the string contains an invalid escape sequence.
|
||||||
|
escaped_templates: Vec<Option<&'a str>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::unused_self)]
|
#[allow(clippy::unused_self)]
|
||||||
|
|
@ -95,6 +98,7 @@ impl<'a> Lexer<'a> {
|
||||||
context: LexerContext::Regular,
|
context: LexerContext::Regular,
|
||||||
trivia_builder: TriviaBuilder::default(),
|
trivia_builder: TriviaBuilder::default(),
|
||||||
escaped_strings: vec![],
|
escaped_strings: vec![],
|
||||||
|
escaped_templates: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -313,33 +317,59 @@ impl<'a> Lexer<'a> {
|
||||||
self.escaped_strings.push(s);
|
self.escaped_strings.push(s);
|
||||||
let escaped_string_id = self.escaped_strings.len() as u32;
|
let escaped_string_id = self.escaped_strings.len() as u32;
|
||||||
// SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0
|
// SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0
|
||||||
let escaped_string_id = unsafe { EscapedStringId::new_unchecked(escaped_string_id) };
|
let escaped_string_id = unsafe { EscapedId::new_unchecked(escaped_string_id) };
|
||||||
self.current.token.escaped_string_id.replace(escaped_string_id);
|
self.current.token.escaped_id.replace(escaped_string_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_string(&self, token: Token) -> &'a str {
|
pub(crate) fn get_string(&self, token: Token) -> &'a str {
|
||||||
if let Some(escaped_string_id) = token.escaped_string_id {
|
if let Some(escaped_id) = token.escaped_id {
|
||||||
return self.escaped_strings[escaped_string_id.get() as usize - 1];
|
return self.escaped_strings[escaped_id.get() as usize - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
let raw = &self.source[token.start as usize..token.end as usize];
|
let raw = &self.source[token.start as usize..token.end as usize];
|
||||||
match token.kind {
|
match token.kind {
|
||||||
Kind::Str | Kind::NoSubstitutionTemplate => {
|
Kind::Str => {
|
||||||
// omit surrounding quotes
|
&raw[1..raw.len() - 1] // omit surrounding quotes
|
||||||
&raw[1..raw.len() - 1]
|
|
||||||
}
|
|
||||||
Kind::TemplateHead => {
|
|
||||||
// omit leading "`${"
|
|
||||||
&raw[3..]
|
|
||||||
}
|
|
||||||
Kind::TemplateTail => {
|
|
||||||
// omit trailing "$`"
|
|
||||||
&raw[..raw.len() - 2]
|
|
||||||
}
|
}
|
||||||
_ => raw,
|
_ => raw,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Save the template if it is escaped
|
||||||
|
#[allow(clippy::cast_possible_truncation)]
|
||||||
|
fn save_template_string(
|
||||||
|
&mut self,
|
||||||
|
is_valid_escape_sequence: bool,
|
||||||
|
has_escape: bool,
|
||||||
|
s: &'a str,
|
||||||
|
) {
|
||||||
|
if !has_escape {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
self.escaped_templates.push(is_valid_escape_sequence.then(|| s));
|
||||||
|
let escaped_template_id = self.escaped_templates.len() as u32;
|
||||||
|
// SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0
|
||||||
|
let escaped_template_id = unsafe { EscapedId::new_unchecked(escaped_template_id) };
|
||||||
|
self.current.token.escaped_id.replace(escaped_template_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_template_string(&self, token: Token) -> Option<&'a str> {
|
||||||
|
if let Some(escaped_id) = token.escaped_id {
|
||||||
|
return self.escaped_templates[escaped_id.get() as usize - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
let raw = &self.source[token.start as usize..token.end as usize];
|
||||||
|
Some(match token.kind {
|
||||||
|
Kind::NoSubstitutionTemplate | Kind::TemplateTail => {
|
||||||
|
&raw[1..raw.len() - 1] // omit surrounding quotes or leading "}" and trailing "`"
|
||||||
|
}
|
||||||
|
Kind::TemplateHead | Kind::TemplateMiddle => {
|
||||||
|
&raw[1..raw.len() - 2] // omit leading "`" or "}" and trailing "${"
|
||||||
|
}
|
||||||
|
_ => raw,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// Read each char and set the current token
|
/// Read each char and set the current token
|
||||||
/// Whitespace and line terminators are skipped
|
/// Whitespace and line terminators are skipped
|
||||||
fn read_next_token(&mut self) -> Kind {
|
fn read_next_token(&mut self) -> Kind {
|
||||||
|
|
@ -867,16 +897,20 @@ impl<'a> Lexer<'a> {
|
||||||
while let Some(c) = self.current.chars.next() {
|
while let Some(c) = self.current.chars.next() {
|
||||||
match c {
|
match c {
|
||||||
'$' if self.peek() == Some('{') => {
|
'$' if self.peek() == Some('{') => {
|
||||||
if is_valid_escape_sequence {
|
self.save_template_string(
|
||||||
self.save_string(true, builder.finish_without_push(self));
|
is_valid_escape_sequence,
|
||||||
}
|
builder.has_escape(),
|
||||||
|
builder.finish_without_push(self),
|
||||||
|
);
|
||||||
self.current.chars.next();
|
self.current.chars.next();
|
||||||
return substitute;
|
return substitute;
|
||||||
}
|
}
|
||||||
'`' => {
|
'`' => {
|
||||||
if is_valid_escape_sequence {
|
self.save_template_string(
|
||||||
self.save_string(true, builder.finish_without_push(self));
|
is_valid_escape_sequence,
|
||||||
}
|
builder.has_escape(),
|
||||||
|
builder.finish_without_push(self),
|
||||||
|
);
|
||||||
return tail;
|
return tail;
|
||||||
}
|
}
|
||||||
CR => {
|
CR => {
|
||||||
|
|
@ -888,7 +922,6 @@ impl<'a> Lexer<'a> {
|
||||||
'\\' => {
|
'\\' => {
|
||||||
let text = builder.get_mut_string_without_current_ascii_char(self);
|
let text = builder.get_mut_string_without_current_ascii_char(self);
|
||||||
self.read_string_escape_sequence(text, true, &mut is_valid_escape_sequence);
|
self.read_string_escape_sequence(text, true, &mut is_valid_escape_sequence);
|
||||||
if !is_valid_escape_sequence {}
|
|
||||||
}
|
}
|
||||||
_ => builder.push_matching(c),
|
_ => builder.push_matching(c),
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ use oxc_span::Span;
|
||||||
|
|
||||||
use super::kind::Kind;
|
use super::kind::Kind;
|
||||||
|
|
||||||
pub type EscapedStringId = std::num::NonZeroU32;
|
pub type EscapedId = std::num::NonZeroU32;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Default)]
|
#[derive(Debug, Clone, Copy, Default)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
|
|
@ -20,9 +20,9 @@ pub struct Token {
|
||||||
/// Indicates the token is on a newline
|
/// Indicates the token is on a newline
|
||||||
pub is_on_new_line: bool,
|
pub is_on_new_line: bool,
|
||||||
|
|
||||||
/// A index handle to `Lexer::escaped_strings`
|
/// A index handle to `Lexer::escaped_strings` or `Lexer::escaped_templates`
|
||||||
/// See https://floooh.github.io/2018/06/17/handles-vs-pointers.html for some background reading
|
/// See https://floooh.github.io/2018/06/17/handles-vs-pointers.html for some background reading
|
||||||
pub escaped_string_id: Option<EscapedStringId>,
|
pub escaped_id: Option<EscapedId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_pointer_width = "64")]
|
#[cfg(target_pointer_width = "64")]
|
||||||
|
|
@ -36,6 +36,6 @@ impl Token {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn escaped(&self) -> bool {
|
pub fn escaped(&self) -> bool {
|
||||||
self.escaped_string_id.is_some()
|
self.escaped_id.is_some()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue