mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
perf(parser): reduce Token size from 16 to 12 bytes (#2010)
I also had to change how the string for private identifiers are built, otherwise they will always be allocated.
This commit is contained in:
parent
6996948825
commit
1886a5b838
2 changed files with 31 additions and 37 deletions
|
|
@ -11,6 +11,7 @@ mod string_builder;
|
||||||
mod token;
|
mod token;
|
||||||
mod trivia_builder;
|
mod trivia_builder;
|
||||||
|
|
||||||
|
use rustc_hash::FxHashMap;
|
||||||
use std::{collections::VecDeque, str::Chars};
|
use std::{collections::VecDeque, str::Chars};
|
||||||
|
|
||||||
use oxc_allocator::{Allocator, String};
|
use oxc_allocator::{Allocator, String};
|
||||||
|
|
@ -30,7 +31,7 @@ pub use self::{
|
||||||
number::{parse_big_int, parse_float, parse_int},
|
number::{parse_big_int, parse_float, parse_int},
|
||||||
token::Token,
|
token::Token,
|
||||||
};
|
};
|
||||||
use self::{string_builder::AutoCow, token::EscapedId, trivia_builder::TriviaBuilder};
|
use self::{string_builder::AutoCow, trivia_builder::TriviaBuilder};
|
||||||
use crate::{diagnostics, MAX_LEN};
|
use crate::{diagnostics, MAX_LEN};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
|
@ -67,11 +68,12 @@ pub struct Lexer<'a> {
|
||||||
|
|
||||||
pub(crate) trivia_builder: TriviaBuilder,
|
pub(crate) trivia_builder: TriviaBuilder,
|
||||||
|
|
||||||
/// Data store for escaped strings, indexed by `Token.escaped_string_id`
|
/// Data store for escaped strings, indexed by [Token::start] when [Token::escaped] is true
|
||||||
escaped_strings: Vec<&'a str>,
|
pub escaped_strings: FxHashMap<u32, &'a str>,
|
||||||
/// Data store for escaped templates, indexed by `Token.escaped_string_id`
|
|
||||||
|
/// Data store for escaped templates, indexed by [Token::start] when [Token::escaped] is true
|
||||||
/// `None` is saved when the string contains an invalid escape sequence.
|
/// `None` is saved when the string contains an invalid escape sequence.
|
||||||
escaped_templates: Vec<Option<&'a str>>,
|
pub escaped_templates: FxHashMap<u32, Option<&'a str>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::unused_self)]
|
#[allow(clippy::unused_self)]
|
||||||
|
|
@ -97,8 +99,8 @@ impl<'a> Lexer<'a> {
|
||||||
lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript
|
lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript
|
||||||
context: LexerContext::Regular,
|
context: LexerContext::Regular,
|
||||||
trivia_builder: TriviaBuilder::default(),
|
trivia_builder: TriviaBuilder::default(),
|
||||||
escaped_strings: vec![],
|
escaped_strings: FxHashMap::default(),
|
||||||
escaped_templates: vec![],
|
escaped_templates: FxHashMap::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -309,21 +311,17 @@ impl<'a> Lexer<'a> {
|
||||||
/// Save the string if it is escaped
|
/// Save the string if it is escaped
|
||||||
/// This reduces the overall memory consumption while keeping the `Token` size small
|
/// This reduces the overall memory consumption while keeping the `Token` size small
|
||||||
/// Strings without escaped values can be retrieved as is from the token span
|
/// Strings without escaped values can be retrieved as is from the token span
|
||||||
#[allow(clippy::cast_possible_truncation)]
|
|
||||||
fn save_string(&mut self, has_escape: bool, s: &'a str) {
|
fn save_string(&mut self, has_escape: bool, s: &'a str) {
|
||||||
if !has_escape {
|
if !has_escape {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
self.escaped_strings.push(s);
|
self.escaped_strings.insert(self.current.token.start, s);
|
||||||
let escaped_string_id = self.escaped_strings.len() as u32;
|
self.current.token.escaped = true;
|
||||||
// SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0
|
|
||||||
let escaped_string_id = unsafe { EscapedId::new_unchecked(escaped_string_id) };
|
|
||||||
self.current.token.escaped_id.replace(escaped_string_id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_string(&self, token: Token) -> &'a str {
|
pub(crate) fn get_string(&self, token: Token) -> &'a str {
|
||||||
if let Some(escaped_id) = token.escaped_id {
|
if token.escaped {
|
||||||
return self.escaped_strings[escaped_id.get() as usize - 1];
|
return self.escaped_strings[&token.start];
|
||||||
}
|
}
|
||||||
|
|
||||||
let raw = &self.source[token.start as usize..token.end as usize];
|
let raw = &self.source[token.start as usize..token.end as usize];
|
||||||
|
|
@ -331,12 +329,14 @@ impl<'a> Lexer<'a> {
|
||||||
Kind::Str => {
|
Kind::Str => {
|
||||||
&raw[1..raw.len() - 1] // omit surrounding quotes
|
&raw[1..raw.len() - 1] // omit surrounding quotes
|
||||||
}
|
}
|
||||||
|
Kind::PrivateIdentifier => {
|
||||||
|
&raw[1..] // omit leading `#`
|
||||||
|
}
|
||||||
_ => raw,
|
_ => raw,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Save the template if it is escaped
|
/// Save the template if it is escaped
|
||||||
#[allow(clippy::cast_possible_truncation)]
|
|
||||||
fn save_template_string(
|
fn save_template_string(
|
||||||
&mut self,
|
&mut self,
|
||||||
is_valid_escape_sequence: bool,
|
is_valid_escape_sequence: bool,
|
||||||
|
|
@ -346,18 +346,15 @@ impl<'a> Lexer<'a> {
|
||||||
if !has_escape {
|
if !has_escape {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
self.escaped_templates.push(is_valid_escape_sequence.then(|| s));
|
self.escaped_templates
|
||||||
let escaped_template_id = self.escaped_templates.len() as u32;
|
.insert(self.current.token.start, is_valid_escape_sequence.then(|| s));
|
||||||
// SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0
|
self.current.token.escaped = true;
|
||||||
let escaped_template_id = unsafe { EscapedId::new_unchecked(escaped_template_id) };
|
|
||||||
self.current.token.escaped_id.replace(escaped_template_id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_template_string(&self, token: Token) -> Option<&'a str> {
|
pub(crate) fn get_template_string(&self, token: Token) -> Option<&'a str> {
|
||||||
if let Some(escaped_id) = token.escaped_id {
|
if token.escaped {
|
||||||
return self.escaped_templates[escaped_id.get() as usize - 1];
|
return self.escaped_templates[&token.start];
|
||||||
}
|
}
|
||||||
|
|
||||||
let raw = &self.source[token.start as usize..token.end as usize];
|
let raw = &self.source[token.start as usize..token.end as usize];
|
||||||
Some(match token.kind {
|
Some(match token.kind {
|
||||||
Kind::NoSubstitutionTemplate | Kind::TemplateTail => {
|
Kind::NoSubstitutionTemplate | Kind::TemplateTail => {
|
||||||
|
|
@ -580,7 +577,8 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn private_identifier(&mut self, mut builder: AutoCow<'a>) -> Kind {
|
fn private_identifier(&mut self) -> Kind {
|
||||||
|
let mut builder = AutoCow::new(self);
|
||||||
let start = self.offset();
|
let start = self.offset();
|
||||||
match self.current.chars.next() {
|
match self.current.chars.next() {
|
||||||
Some(c) if is_identifier_start_all(c) => {
|
Some(c) if is_identifier_start_all(c) => {
|
||||||
|
|
@ -1365,16 +1363,13 @@ const QOT: ByteHandler = |lexer| {
|
||||||
|
|
||||||
// #
|
// #
|
||||||
const HAS: ByteHandler = |lexer| {
|
const HAS: ByteHandler = |lexer| {
|
||||||
let mut builder = AutoCow::new(lexer);
|
lexer.consume_char();
|
||||||
let c = lexer.consume_char();
|
|
||||||
builder.push_matching(c);
|
|
||||||
// HashbangComment ::
|
// HashbangComment ::
|
||||||
// `#!` SingleLineCommentChars?
|
// `#!` SingleLineCommentChars?
|
||||||
if lexer.current.token.start == 0 && lexer.next_eq('!') {
|
if lexer.current.token.start == 0 && lexer.next_eq('!') {
|
||||||
lexer.read_hashbang_comment()
|
lexer.read_hashbang_comment()
|
||||||
} else {
|
} else {
|
||||||
builder.get_mut_string_without_current_ascii_char(lexer);
|
lexer.private_identifier()
|
||||||
lexer.private_identifier(builder)
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,6 @@ use oxc_span::Span;
|
||||||
|
|
||||||
use super::kind::Kind;
|
use super::kind::Kind;
|
||||||
|
|
||||||
pub type EscapedId = std::num::NonZeroU32;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Default)]
|
#[derive(Debug, Clone, Copy, Default)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
/// Token Kind
|
/// Token Kind
|
||||||
|
|
@ -20,14 +18,15 @@ pub struct Token {
|
||||||
/// Indicates the token is on a newline
|
/// Indicates the token is on a newline
|
||||||
pub is_on_new_line: bool,
|
pub is_on_new_line: bool,
|
||||||
|
|
||||||
/// A index handle to `Lexer::escaped_strings` or `Lexer::escaped_templates`
|
/// True if the identifier / string / template kinds has escaped strings.
|
||||||
/// See https://floooh.github.io/2018/06/17/handles-vs-pointers.html for some background reading
|
/// The escaped strings are saved in [Lexer::escaped_strings] and [Lexer::escaped_templates] by
|
||||||
pub escaped_id: Option<EscapedId>,
|
/// [Token::start]
|
||||||
|
pub escaped: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_pointer_width = "64")]
|
#[cfg(target_pointer_width = "64")]
|
||||||
mod size_asserts {
|
mod size_asserts {
|
||||||
oxc_index::assert_eq_size!(super::Token, [u8; 16]);
|
oxc_index::assert_eq_size!(super::Token, [u8; 12]);
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
|
|
@ -36,6 +35,6 @@ impl Token {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn escaped(&self) -> bool {
|
pub fn escaped(&self) -> bool {
|
||||||
self.escaped_id.is_some()
|
self.escaped
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue