refactor(parser): reduce Token size from 32 to 16 bytes (#1962)

Part of #1880

`Token` size is reduced from 32 to 16 bytes by changing the previous
token value `Option<&'a str>` to a u32 index handle.

It would be nice if this handle is eliminated entirely because
the normal case for a string is always
`&source_text[token.span.start.token.span.end]`

Unfortunately, JavaScript allows escaped characters to appear in
identifiers, strings and templates. These strings need to be unescaped
for equality checks, i.e. `"\a"  === "a"`.

This leads us to adding a `escaped_strings[]` vec for storing these
unescaped and allocated
strings.

Performance regression for adding this vec should be minimal because
escaped strings are rare.

Background Reading:

* https://floooh.github.io/2018/06/17/handles-vs-pointers.html
This commit is contained in:
Boshen 2024-01-09 15:17:02 +08:00 committed by GitHub
parent 66e95a5968
commit 4706765d2a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 111 additions and 104 deletions

View file

@ -11,7 +11,7 @@ use crate::{
pub struct ParserCheckpoint<'a> { pub struct ParserCheckpoint<'a> {
lexer: LexerCheckpoint<'a>, lexer: LexerCheckpoint<'a>,
cur_token: Token<'a>, cur_token: Token,
prev_span_end: u32, prev_span_end: u32,
errors_pos: usize, errors_pos: usize,
} }
@ -29,8 +29,8 @@ impl<'a> Parser<'a> {
} }
/// Get current token /// Get current token
pub(crate) fn cur_token(&self) -> &Token<'a> { pub(crate) fn cur_token(&self) -> Token {
&self.token self.token
} }
/// Get current Kind /// Get current Kind
@ -47,12 +47,12 @@ impl<'a> Parser<'a> {
} }
/// Get current string /// Get current string
pub(crate) fn cur_string(&self) -> Option<&str> { pub(crate) fn cur_string(&self) -> &'a str {
self.cur_token().value.get_string() self.lexer.get_string(self.token)
} }
/// Peek next token, returns EOF for final peek /// Peek next token, returns EOF for final peek
pub(crate) fn peek_token(&mut self) -> &Token { pub(crate) fn peek_token(&mut self) -> Token {
self.lexer.lookahead(1) self.lexer.lookahead(1)
} }
@ -67,7 +67,7 @@ impl<'a> Parser<'a> {
} }
/// Peek nth token /// Peek nth token
pub(crate) fn nth(&mut self, n: u8) -> &Token { pub(crate) fn nth(&mut self, n: u8) -> Token {
if n == 0 { if n == 0 {
return self.cur_token(); return self.cur_token();
} }
@ -94,7 +94,7 @@ impl<'a> Parser<'a> {
/// whose code point sequence is the same as a `ReservedWord`. /// whose code point sequence is the same as a `ReservedWord`.
#[inline] #[inline]
fn test_escaped_keyword(&mut self, kind: Kind) { fn test_escaped_keyword(&mut self, kind: Kind) {
if self.cur_token().escaped && kind.is_all_keyword() { if self.cur_token().escaped() && kind.is_all_keyword() {
let span = self.cur_token().span(); let span = self.cur_token().span();
self.error(diagnostics::EscapedKeyword(span)); self.error(diagnostics::EscapedKeyword(span));
} }

View file

@ -17,8 +17,7 @@ use super::{
}; };
use crate::{ use crate::{
diagnostics, diagnostics,
lexer::{parse_big_int, parse_float, parse_int}, lexer::{parse_big_int, parse_float, parse_int, Kind},
lexer::{Kind, TokenValue},
list::SeparatedList, list::SeparatedList,
Context, Parser, Context, Parser,
}; };
@ -96,10 +95,7 @@ impl<'a> Parser<'a> {
pub(crate) fn parse_identifier_kind(&mut self, kind: Kind) -> (Span, Atom) { pub(crate) fn parse_identifier_kind(&mut self, kind: Kind) -> (Span, Atom) {
let span = self.start_span(); let span = self.start_span();
let name = match std::mem::take(&mut self.token.value) { let name = self.cur_string();
TokenValue::String(value) => value,
TokenValue::None => "",
};
self.bump_remap(kind); self.bump_remap(kind);
(self.end_span(span), Atom::from(name)) (self.end_span(span), Atom::from(name))
} }
@ -121,7 +117,7 @@ impl<'a> Parser<'a> {
/// # Panics /// # Panics
pub(crate) fn parse_private_identifier(&mut self) -> PrivateIdentifier { pub(crate) fn parse_private_identifier(&mut self) -> PrivateIdentifier {
let span = self.start_span(); let span = self.start_span();
let name = Atom::from(self.cur_string().unwrap()); let name = Atom::from(self.cur_string());
self.bump_any(); self.bump_any();
PrivateIdentifier { span: self.end_span(span), name } PrivateIdentifier { span: self.end_span(span), name }
} }
@ -349,9 +345,7 @@ impl<'a> Parser<'a> {
if !self.at(Kind::Str) { if !self.at(Kind::Str) {
return Err(self.unexpected()); return Err(self.unexpected());
} }
let TokenValue::String(value) = std::mem::take(&mut self.token.value) else { let value = self.cur_string();
unreachable!()
};
let span = self.start_span(); let span = self.start_span();
self.bump_any(); self.bump_any();
Ok(StringLiteral { span: self.end_span(span), value: value.into() }) Ok(StringLiteral { span: self.end_span(span), value: value.into() })
@ -454,8 +448,9 @@ impl<'a> Parser<'a> {
_ => unreachable!(), _ => unreachable!(),
}; };
// cooked = None when template literal has invalid escape sequence // `cooked = None` when template literal has invalid escape sequence
let cooked = self.cur_string().map(Atom::from); // This is matched by `is_valid_escape_sequence` in `Lexer::read_template_literal`
let cooked = self.cur_token().escaped_string_id.map(|_| self.cur_string());
let raw = &self.cur_src()[1..self.cur_src().len() - end_offset as usize]; let raw = &self.cur_src()[1..self.cur_src().len() - end_offset as usize];
let raw = Atom::from(if cooked.is_some() && raw.contains('\r') { let raw = Atom::from(if cooked.is_some() && raw.contains('\r') {
@ -475,7 +470,11 @@ impl<'a> Parser<'a> {
} }
let tail = matches!(cur_kind, Kind::TemplateTail | Kind::NoSubstitutionTemplate); let tail = matches!(cur_kind, Kind::TemplateTail | Kind::NoSubstitutionTemplate);
TemplateElement { span, tail, value: TemplateElementValue { raw, cooked } } TemplateElement {
span,
tail,
value: TemplateElementValue { raw, cooked: cooked.map(Atom::from) },
}
} }
/// Section 13.3 Meta Property /// Section 13.3 Meta Property

View file

@ -50,7 +50,7 @@ impl<'a> Parser<'a> {
} }
pub(crate) fn at_async_no_new_line(&mut self) -> bool { pub(crate) fn at_async_no_new_line(&mut self) -> bool {
self.at(Kind::Async) && !self.cur_token().escaped && !self.peek_token().is_on_new_line self.at(Kind::Async) && !self.cur_token().escaped() && !self.peek_token().is_on_new_line
} }
pub(crate) fn parse_function_body(&mut self) -> Result<Box<'a, FunctionBody<'a>>> { pub(crate) fn parse_function_body(&mut self) -> Result<Box<'a, FunctionBody<'a>>> {

View file

@ -127,7 +127,7 @@ impl<'a> Parser<'a> {
Kind::Const if !(self.ts_enabled() && self.is_at_enum_declaration()) => { Kind::Const if !(self.ts_enabled() && self.is_at_enum_declaration()) => {
self.parse_variable_statement(stmt_ctx) self.parse_variable_statement(stmt_ctx)
} }
Kind::Let if !self.cur_token().escaped => self.parse_let(stmt_ctx), Kind::Let if !self.cur_token().escaped() => self.parse_let(stmt_ctx),
Kind::Await Kind::Await
if self.peek_kind() == Kind::Using && self.nth_kind(2).is_binding_identifier() => if self.peek_kind() == Kind::Using && self.nth_kind(2).is_binding_identifier() =>
{ {
@ -276,7 +276,7 @@ impl<'a> Parser<'a> {
let is_let_of = self.at(Kind::Let) && self.peek_at(Kind::Of); let is_let_of = self.at(Kind::Let) && self.peek_at(Kind::Of);
let is_async_of = let is_async_of =
self.at(Kind::Async) && !self.cur_token().escaped && self.peek_at(Kind::Of); self.at(Kind::Async) && !self.cur_token().escaped() && self.peek_at(Kind::Of);
let expr_span = self.start_span(); let expr_span = self.start_span();
if self.at(Kind::RParen) { if self.at(Kind::RParen) {

View file

@ -360,14 +360,15 @@ impl<'a> Parser<'a> {
} }
// we are at a valid normal Ident or Keyword, let's keep on lexing for `-` // we are at a valid normal Ident or Keyword, let's keep on lexing for `-`
self.re_lex_jsx_identifier(); self.re_lex_jsx_identifier();
let name = Atom::from(self.cur_string().unwrap());
self.bump_any(); self.bump_any();
Ok(self.ast.jsx_identifier(self.end_span(span), name)) let span = self.end_span(span);
let name = span.source_text(self.source_text);
Ok(self.ast.jsx_identifier(span, name.into()))
} }
fn parse_jsx_text(&mut self) -> JSXText { fn parse_jsx_text(&mut self) -> JSXText {
let span = self.start_span(); let span = self.start_span();
let value = Atom::from(self.cur_string().unwrap()); let value = Atom::from(self.cur_string());
self.bump_any(); self.bump_any();
self.ast.jsx_text(self.end_span(span), value) self.ast.jsx_text(self.end_span(span), value)
} }

View file

@ -24,13 +24,13 @@ use oxc_syntax::{
}, },
unicode_id_start::is_id_start_unicode, unicode_id_start::is_id_start_unicode,
}; };
pub use token::{Token, TokenValue};
pub use self::{ pub use self::{
kind::Kind, kind::Kind,
number::{parse_big_int, parse_float, parse_int}, number::{parse_big_int, parse_float, parse_int},
token::Token,
}; };
use self::{string_builder::AutoCow, trivia_builder::TriviaBuilder}; use self::{string_builder::AutoCow, token::EscapedStringId, trivia_builder::TriviaBuilder};
use crate::{diagnostics, MAX_LEN}; use crate::{diagnostics, MAX_LEN};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -38,7 +38,7 @@ pub struct LexerCheckpoint<'a> {
/// Remaining chars to be tokenized /// Remaining chars to be tokenized
chars: Chars<'a>, chars: Chars<'a>,
token: Token<'a>, token: Token,
errors_pos: usize, errors_pos: usize,
} }
@ -66,6 +66,9 @@ pub struct Lexer<'a> {
context: LexerContext, context: LexerContext,
pub(crate) trivia_builder: TriviaBuilder, pub(crate) trivia_builder: TriviaBuilder,
/// Data store for escaped strings, indexed by `Token.escaped_string_id`
escaped_strings: Vec<&'a str>,
} }
#[allow(clippy::unused_self)] #[allow(clippy::unused_self)]
@ -91,6 +94,7 @@ impl<'a> Lexer<'a> {
lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript
context: LexerContext::Regular, context: LexerContext::Regular,
trivia_builder: TriviaBuilder::default(), trivia_builder: TriviaBuilder::default(),
escaped_strings: vec![],
} }
} }
@ -117,12 +121,12 @@ impl<'a> Lexer<'a> {
} }
/// Find the nth lookahead token lazily /// Find the nth lookahead token lazily
pub fn lookahead(&mut self, n: u8) -> &Token<'a> { pub fn lookahead(&mut self, n: u8) -> Token {
let n = n as usize; let n = n as usize;
debug_assert!(n > 0); debug_assert!(n > 0);
if self.lookahead.len() > n - 1 { if self.lookahead.len() > n - 1 {
return &self.lookahead[n - 1].token; return self.lookahead[n - 1].token;
} }
let checkpoint = self.checkpoint(); let checkpoint = self.checkpoint();
@ -148,7 +152,7 @@ impl<'a> Lexer<'a> {
self.current = checkpoint; self.current = checkpoint;
&self.lookahead[n - 1].token self.lookahead[n - 1].token
} }
/// Set context /// Set context
@ -157,7 +161,7 @@ impl<'a> Lexer<'a> {
} }
/// Main entry point /// Main entry point
pub fn next_token(&mut self) -> Token<'a> { pub fn next_token(&mut self) -> Token {
if let Some(checkpoint) = self.lookahead.pop_front() { if let Some(checkpoint) = self.lookahead.pop_front() {
self.current.chars = checkpoint.chars; self.current.chars = checkpoint.chars;
self.current.errors_pos = checkpoint.errors_pos; self.current.errors_pos = checkpoint.errors_pos;
@ -167,13 +171,13 @@ impl<'a> Lexer<'a> {
self.finish_next(kind) self.finish_next(kind)
} }
pub fn next_jsx_child(&mut self) -> Token<'a> { pub fn next_jsx_child(&mut self) -> Token {
self.current.token.start = self.offset(); self.current.token.start = self.offset();
let kind = self.read_jsx_child(); let kind = self.read_jsx_child();
self.finish_next(kind) self.finish_next(kind)
} }
fn finish_next(&mut self, kind: Kind) -> Token<'a> { fn finish_next(&mut self, kind: Kind) -> Token {
self.current.token.kind = kind; self.current.token.kind = kind;
self.current.token.end = self.offset(); self.current.token.end = self.offset();
debug_assert!(self.current.token.start <= self.current.token.end); debug_assert!(self.current.token.start <= self.current.token.end);
@ -188,7 +192,7 @@ impl<'a> Lexer<'a> {
/// where a `RegularExpressionLiteral` is permitted /// where a `RegularExpressionLiteral` is permitted
/// Which means the parser needs to re-tokenize on `PrimaryExpression`, /// Which means the parser needs to re-tokenize on `PrimaryExpression`,
/// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression` /// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression`
pub fn next_regex(&mut self, kind: Kind) -> Token<'a> { pub fn next_regex(&mut self, kind: Kind) -> Token {
self.current.token.start = self.offset() self.current.token.start = self.offset()
- match kind { - match kind {
Kind::Slash => 1, Kind::Slash => 1,
@ -200,7 +204,7 @@ impl<'a> Lexer<'a> {
self.finish_next(kind) self.finish_next(kind)
} }
pub fn next_right_angle(&mut self) -> Token<'a> { pub fn next_right_angle(&mut self) -> Token {
let kind = self.read_right_angle(); let kind = self.read_right_angle();
self.lookahead.clear(); self.lookahead.clear();
self.finish_next(kind) self.finish_next(kind)
@ -208,7 +212,7 @@ impl<'a> Lexer<'a> {
/// Re-tokenize the current `}` token for `TemplateSubstitutionTail` /// Re-tokenize the current `}` token for `TemplateSubstitutionTail`
/// See Section 12, the parser needs to re-tokenize on `TemplateSubstitutionTail`, /// See Section 12, the parser needs to re-tokenize on `TemplateSubstitutionTail`,
pub fn next_template_substitution_tail(&mut self) -> Token<'a> { pub fn next_template_substitution_tail(&mut self) -> Token {
self.current.token.start = self.offset() - 1; self.current.token.start = self.offset() - 1;
let kind = self.read_template_literal(Kind::TemplateMiddle, Kind::TemplateTail); let kind = self.read_template_literal(Kind::TemplateMiddle, Kind::TemplateTail);
self.lookahead.clear(); self.lookahead.clear();
@ -216,14 +220,14 @@ impl<'a> Lexer<'a> {
} }
/// Expand the current token for `JSXIdentifier` /// Expand the current token for `JSXIdentifier`
pub fn next_jsx_identifier(&mut self, start_offset: u32) -> Token<'a> { pub fn next_jsx_identifier(&mut self, start_offset: u32) -> Token {
let kind = self.read_jsx_identifier(start_offset); let kind = self.read_jsx_identifier(start_offset);
self.lookahead.clear(); self.lookahead.clear();
self.finish_next(kind) self.finish_next(kind)
} }
/// Re-tokenize '<<' or '<=' or '<<=' to '<' /// Re-tokenize '<<' or '<=' or '<<=' to '<'
pub fn re_lex_as_typescript_l_angle(&mut self, kind: Kind) -> Token<'a> { pub fn re_lex_as_typescript_l_angle(&mut self, kind: Kind) -> Token {
let offset = match kind { let offset = match kind {
Kind::ShiftLeft | Kind::LtEq => 2, Kind::ShiftLeft | Kind::LtEq => 2,
Kind::ShiftLeftEq => 3, Kind::ShiftLeftEq => 3,
@ -297,6 +301,44 @@ impl<'a> Lexer<'a> {
} }
} }
/// Save the string if it is escaped
/// This reduces the overall memory consumption while keeping the `Token` size small
/// Strings without escaped values can be retrieved as is from the token span
#[allow(clippy::cast_possible_truncation)]
fn save_string(&mut self, has_escape: bool, s: &'a str) {
if !has_escape {
return;
}
self.escaped_strings.push(s);
let escaped_string_id = self.escaped_strings.len() as u32;
// SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0
let escaped_string_id = unsafe { EscapedStringId::new_unchecked(escaped_string_id) };
self.current.token.escaped_string_id.replace(escaped_string_id);
}
pub(crate) fn get_string(&self, token: Token) -> &'a str {
if let Some(escaped_string_id) = token.escaped_string_id {
return self.escaped_strings[escaped_string_id.get() as usize - 1];
}
let raw = &self.source[token.start as usize..token.end as usize];
match token.kind {
Kind::Str | Kind::NoSubstitutionTemplate => {
// omit surrounding quotes
&raw[1..raw.len() - 1]
}
Kind::TemplateHead => {
// omit leading "`${"
&raw[3..]
}
Kind::TemplateTail => {
// omit trailing "$`"
&raw[..raw.len() - 2]
}
_ => raw,
}
}
/// Read each char and set the current token /// Read each char and set the current token
/// Whitespace and line terminators are skipped /// Whitespace and line terminators are skipped
fn read_next_token(&mut self) -> Kind { fn read_next_token(&mut self) -> Kind {
@ -402,7 +444,7 @@ impl<'a> Lexer<'a> {
} }
/// Section 12.7.1 Identifier Names /// Section 12.7.1 Identifier Names
fn identifier_tail(&mut self, mut builder: AutoCow<'a>) -> (bool, &'a str) { fn identifier_tail(&mut self, mut builder: AutoCow<'a>) -> &'a str {
// ident tail // ident tail
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
if !is_identifier_part(c) { if !is_identifier_part(c) {
@ -418,14 +460,13 @@ impl<'a> Lexer<'a> {
builder.push_matching(c); builder.push_matching(c);
} }
let has_escape = builder.has_escape(); let has_escape = builder.has_escape();
(has_escape, builder.finish(self)) let text = builder.finish(self);
self.save_string(has_escape, text);
text
} }
fn identifier_name(&mut self, builder: AutoCow<'a>) -> &'a str { fn identifier_name(&mut self, builder: AutoCow<'a>) -> &'a str {
let (has_escape, text) = self.identifier_tail(builder); self.identifier_tail(builder)
self.current.token.escaped = has_escape;
self.current.token.value = TokenValue::String(text);
text
} }
fn identifier_name_handler(&mut self) -> &'a str { fn identifier_name_handler(&mut self) -> &'a str {
@ -532,8 +573,7 @@ impl<'a> Lexer<'a> {
return Kind::Undetermined; return Kind::Undetermined;
} }
} }
let (_, name) = self.identifier_tail(builder); self.identifier_tail(builder);
self.current.token.value = TokenValue::String(name);
Kind::PrivateIdentifier Kind::PrivateIdentifier
} }
@ -765,8 +805,7 @@ impl<'a> Lexer<'a> {
} }
Some(c @ ('"' | '\'')) => { Some(c @ ('"' | '\'')) => {
if c == delimiter { if c == delimiter {
self.current.token.value = self.save_string(builder.has_escape(), builder.finish_without_push(self));
TokenValue::String(builder.finish_without_push(self));
return Kind::Str; return Kind::Str;
} }
builder.push_matching(c); builder.push_matching(c);
@ -850,16 +889,14 @@ impl<'a> Lexer<'a> {
match c { match c {
'$' if self.peek() == Some('{') => { '$' if self.peek() == Some('{') => {
if is_valid_escape_sequence { if is_valid_escape_sequence {
self.current.token.value = self.save_string(true, builder.finish_without_push(self));
TokenValue::String(builder.finish_without_push(self));
} }
self.current.chars.next(); self.current.chars.next();
return substitute; return substitute;
} }
'`' => { '`' => {
if is_valid_escape_sequence { if is_valid_escape_sequence {
self.current.token.value = self.save_string(true, builder.finish_without_push(self));
TokenValue::String(builder.finish_without_push(self));
} }
return tail; return tail;
} }
@ -872,6 +909,7 @@ impl<'a> Lexer<'a> {
'\\' => { '\\' => {
let text = builder.get_mut_string_without_current_ascii_char(self); let text = builder.get_mut_string_without_current_ascii_char(self);
self.read_string_escape_sequence(text, true, &mut is_valid_escape_sequence); self.read_string_escape_sequence(text, true, &mut is_valid_escape_sequence);
if !is_valid_escape_sequence {}
} }
_ => builder.push_matching(c), _ => builder.push_matching(c),
} }
@ -884,18 +922,13 @@ impl<'a> Lexer<'a> {
/// `IdentifierStart` /// `IdentifierStart`
/// `JSXIdentifier` `IdentifierPart` /// `JSXIdentifier` `IdentifierPart`
/// `JSXIdentifier` [no `WhiteSpace` or Comment here] - /// `JSXIdentifier` [no `WhiteSpace` or Comment here] -
fn read_jsx_identifier(&mut self, start_offset: u32) -> Kind { fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind {
let prev_str = &self.source[start_offset as usize..self.offset() as usize];
let mut builder = AutoCow::new(self);
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
if c == '-' || is_identifier_start_all(c) { if c == '-' || is_identifier_start_all(c) {
self.current.chars.next(); self.current.chars.next();
builder.push_matching(c);
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
if is_identifier_part(c) { if is_identifier_part(c) {
let c = self.current.chars.next().unwrap(); self.current.chars.next().unwrap();
builder.push_matching(c);
} else { } else {
break; break;
} }
@ -904,9 +937,6 @@ impl<'a> Lexer<'a> {
break; break;
} }
} }
let mut s = String::from_str_in(prev_str, self.allocator);
s.push_str(builder.finish(self));
self.current.token.value = TokenValue::String(s.into_bump_str());
Kind::Ident Kind::Ident
} }
@ -941,7 +971,6 @@ impl<'a> Lexer<'a> {
break; break;
} }
} }
self.current.token.value = TokenValue::String(builder.finish(self));
Kind::JSXText Kind::JSXText
} }
None => Kind::Eof, None => Kind::Eof,
@ -964,8 +993,7 @@ impl<'a> Lexer<'a> {
match self.current.chars.next() { match self.current.chars.next() {
Some(c @ ('"' | '\'')) => { Some(c @ ('"' | '\'')) => {
if c == delimiter { if c == delimiter {
self.current.token.value = self.save_string(builder.has_escape(), builder.finish_without_push(self));
TokenValue::String(builder.finish_without_push(self));
return Kind::Str; return Kind::Str;
} }
builder.push_matching(c); builder.push_matching(c);

View file

@ -33,14 +33,14 @@ impl<'a> AutoCow<'a> {
// and return the reference to it // and return the reference to it
pub fn get_mut_string_without_current_ascii_char<'b>( pub fn get_mut_string_without_current_ascii_char<'b>(
&'b mut self, &'b mut self,
lexer: &'_ Lexer<'a>, lexer: &Lexer<'a>,
) -> &'b mut String<'a> { ) -> &'b mut String<'a> {
self.force_allocation_without_current_ascii_char(lexer); self.force_allocation_without_current_ascii_char(lexer);
self.value.as_mut().unwrap() self.value.as_mut().unwrap()
} }
// Force allocation of a String, excluding the current ASCII character. // Force allocation of a String, excluding the current ASCII character.
pub fn force_allocation_without_current_ascii_char(&mut self, lexer: &'_ Lexer<'a>) { pub fn force_allocation_without_current_ascii_char(&mut self, lexer: &Lexer<'a>) {
if self.value.is_some() { if self.value.is_some() {
return; return;
} }

View file

@ -4,8 +4,10 @@ use oxc_span::Span;
use super::kind::Kind; use super::kind::Kind;
pub type EscapedStringId = std::num::NonZeroU32;
#[derive(Debug, Clone, Copy, Default)] #[derive(Debug, Clone, Copy, Default)]
pub struct Token<'a> { pub struct Token {
/// Token Kind /// Token Kind
pub kind: Kind, pub kind: Kind,
@ -18,40 +20,22 @@ pub struct Token<'a> {
/// Indicates the token is on a newline /// Indicates the token is on a newline
pub is_on_new_line: bool, pub is_on_new_line: bool,
/// Is the original string escaped? /// A index handle to `Lexer::escaped_strings`
pub escaped: bool, /// See https://floooh.github.io/2018/06/17/handles-vs-pointers.html for some background reading
pub escaped_string_id: Option<EscapedStringId>,
pub value: TokenValue<'a>,
} }
#[cfg(target_pointer_width = "64")] #[cfg(target_pointer_width = "64")]
mod size_asserts { mod size_asserts {
oxc_index::assert_eq_size!(super::Token, [u8; 32]); oxc_index::assert_eq_size!(super::Token, [u8; 16]);
} }
impl<'a> Token<'a> { impl Token {
pub fn span(&self) -> Span { pub fn span(&self) -> Span {
Span::new(self.start, self.end) Span::new(self.start, self.end)
} }
}
#[derive(Debug, Copy, Clone)] pub fn escaped(&self) -> bool {
pub enum TokenValue<'a> { self.escaped_string_id.is_some()
None,
String(&'a str),
}
impl<'a> Default for TokenValue<'a> {
fn default() -> Self {
Self::None
}
}
impl<'a> TokenValue<'a> {
pub fn get_string(&self) -> Option<&str> {
match self {
Self::String(s) => Some(s),
Self::None => None,
}
} }
} }

View file

@ -117,7 +117,7 @@ pub struct Parser<'a> {
errors: Vec<Error>, errors: Vec<Error>,
/// The current parsing token /// The current parsing token
token: Token<'a>, token: Token,
/// The end range of the previous token /// The end range of the previous token
prev_token_end: u32, prev_token_end: u32,

View file

@ -302,13 +302,8 @@ impl<'a> Parser<'a> {
return self.parse_ts_infer_type(); return self.parse_ts_infer_type();
} }
let mut operator = None; let operator =
if self.at(Kind::Str) { None } else { TSTypeOperator::from_src(self.cur_string()) };
if !self.at(Kind::Str) {
if let Some(atom) = self.cur_string() {
operator = TSTypeOperator::from_src(atom);
}
}
// test ts ts_type_operator // test ts ts_type_operator
// type B = keyof A; // type B = keyof A;