mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 20:32:10 +00:00
refactor(parser): reduce Token size from 32 to 16 bytes (#1962)
Part of #1880 `Token` size is reduced from 32 to 16 bytes by changing the previous token value `Option<&'a str>` to a u32 index handle. It would be nice if this handle is eliminated entirely because the normal case for a string is always `&source_text[token.span.start.token.span.end]` Unfortunately, JavaScript allows escaped characters to appear in identifiers, strings and templates. These strings need to be unescaped for equality checks, i.e. `"\a" === "a"`. This leads us to adding a `escaped_strings[]` vec for storing these unescaped and allocated strings. Performance regression for adding this vec should be minimal because escaped strings are rare. Background Reading: * https://floooh.github.io/2018/06/17/handles-vs-pointers.html
This commit is contained in:
parent
66e95a5968
commit
4706765d2a
10 changed files with 111 additions and 104 deletions
|
|
@ -11,7 +11,7 @@ use crate::{
|
||||||
|
|
||||||
pub struct ParserCheckpoint<'a> {
|
pub struct ParserCheckpoint<'a> {
|
||||||
lexer: LexerCheckpoint<'a>,
|
lexer: LexerCheckpoint<'a>,
|
||||||
cur_token: Token<'a>,
|
cur_token: Token,
|
||||||
prev_span_end: u32,
|
prev_span_end: u32,
|
||||||
errors_pos: usize,
|
errors_pos: usize,
|
||||||
}
|
}
|
||||||
|
|
@ -29,8 +29,8 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get current token
|
/// Get current token
|
||||||
pub(crate) fn cur_token(&self) -> &Token<'a> {
|
pub(crate) fn cur_token(&self) -> Token {
|
||||||
&self.token
|
self.token
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get current Kind
|
/// Get current Kind
|
||||||
|
|
@ -47,12 +47,12 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get current string
|
/// Get current string
|
||||||
pub(crate) fn cur_string(&self) -> Option<&str> {
|
pub(crate) fn cur_string(&self) -> &'a str {
|
||||||
self.cur_token().value.get_string()
|
self.lexer.get_string(self.token)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Peek next token, returns EOF for final peek
|
/// Peek next token, returns EOF for final peek
|
||||||
pub(crate) fn peek_token(&mut self) -> &Token {
|
pub(crate) fn peek_token(&mut self) -> Token {
|
||||||
self.lexer.lookahead(1)
|
self.lexer.lookahead(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -67,7 +67,7 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Peek nth token
|
/// Peek nth token
|
||||||
pub(crate) fn nth(&mut self, n: u8) -> &Token {
|
pub(crate) fn nth(&mut self, n: u8) -> Token {
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
return self.cur_token();
|
return self.cur_token();
|
||||||
}
|
}
|
||||||
|
|
@ -94,7 +94,7 @@ impl<'a> Parser<'a> {
|
||||||
/// whose code point sequence is the same as a `ReservedWord`.
|
/// whose code point sequence is the same as a `ReservedWord`.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn test_escaped_keyword(&mut self, kind: Kind) {
|
fn test_escaped_keyword(&mut self, kind: Kind) {
|
||||||
if self.cur_token().escaped && kind.is_all_keyword() {
|
if self.cur_token().escaped() && kind.is_all_keyword() {
|
||||||
let span = self.cur_token().span();
|
let span = self.cur_token().span();
|
||||||
self.error(diagnostics::EscapedKeyword(span));
|
self.error(diagnostics::EscapedKeyword(span));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,8 +17,7 @@ use super::{
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
diagnostics,
|
diagnostics,
|
||||||
lexer::{parse_big_int, parse_float, parse_int},
|
lexer::{parse_big_int, parse_float, parse_int, Kind},
|
||||||
lexer::{Kind, TokenValue},
|
|
||||||
list::SeparatedList,
|
list::SeparatedList,
|
||||||
Context, Parser,
|
Context, Parser,
|
||||||
};
|
};
|
||||||
|
|
@ -96,10 +95,7 @@ impl<'a> Parser<'a> {
|
||||||
|
|
||||||
pub(crate) fn parse_identifier_kind(&mut self, kind: Kind) -> (Span, Atom) {
|
pub(crate) fn parse_identifier_kind(&mut self, kind: Kind) -> (Span, Atom) {
|
||||||
let span = self.start_span();
|
let span = self.start_span();
|
||||||
let name = match std::mem::take(&mut self.token.value) {
|
let name = self.cur_string();
|
||||||
TokenValue::String(value) => value,
|
|
||||||
TokenValue::None => "",
|
|
||||||
};
|
|
||||||
self.bump_remap(kind);
|
self.bump_remap(kind);
|
||||||
(self.end_span(span), Atom::from(name))
|
(self.end_span(span), Atom::from(name))
|
||||||
}
|
}
|
||||||
|
|
@ -121,7 +117,7 @@ impl<'a> Parser<'a> {
|
||||||
/// # Panics
|
/// # Panics
|
||||||
pub(crate) fn parse_private_identifier(&mut self) -> PrivateIdentifier {
|
pub(crate) fn parse_private_identifier(&mut self) -> PrivateIdentifier {
|
||||||
let span = self.start_span();
|
let span = self.start_span();
|
||||||
let name = Atom::from(self.cur_string().unwrap());
|
let name = Atom::from(self.cur_string());
|
||||||
self.bump_any();
|
self.bump_any();
|
||||||
PrivateIdentifier { span: self.end_span(span), name }
|
PrivateIdentifier { span: self.end_span(span), name }
|
||||||
}
|
}
|
||||||
|
|
@ -349,9 +345,7 @@ impl<'a> Parser<'a> {
|
||||||
if !self.at(Kind::Str) {
|
if !self.at(Kind::Str) {
|
||||||
return Err(self.unexpected());
|
return Err(self.unexpected());
|
||||||
}
|
}
|
||||||
let TokenValue::String(value) = std::mem::take(&mut self.token.value) else {
|
let value = self.cur_string();
|
||||||
unreachable!()
|
|
||||||
};
|
|
||||||
let span = self.start_span();
|
let span = self.start_span();
|
||||||
self.bump_any();
|
self.bump_any();
|
||||||
Ok(StringLiteral { span: self.end_span(span), value: value.into() })
|
Ok(StringLiteral { span: self.end_span(span), value: value.into() })
|
||||||
|
|
@ -454,8 +448,9 @@ impl<'a> Parser<'a> {
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// cooked = None when template literal has invalid escape sequence
|
// `cooked = None` when template literal has invalid escape sequence
|
||||||
let cooked = self.cur_string().map(Atom::from);
|
// This is matched by `is_valid_escape_sequence` in `Lexer::read_template_literal`
|
||||||
|
let cooked = self.cur_token().escaped_string_id.map(|_| self.cur_string());
|
||||||
|
|
||||||
let raw = &self.cur_src()[1..self.cur_src().len() - end_offset as usize];
|
let raw = &self.cur_src()[1..self.cur_src().len() - end_offset as usize];
|
||||||
let raw = Atom::from(if cooked.is_some() && raw.contains('\r') {
|
let raw = Atom::from(if cooked.is_some() && raw.contains('\r') {
|
||||||
|
|
@ -475,7 +470,11 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let tail = matches!(cur_kind, Kind::TemplateTail | Kind::NoSubstitutionTemplate);
|
let tail = matches!(cur_kind, Kind::TemplateTail | Kind::NoSubstitutionTemplate);
|
||||||
TemplateElement { span, tail, value: TemplateElementValue { raw, cooked } }
|
TemplateElement {
|
||||||
|
span,
|
||||||
|
tail,
|
||||||
|
value: TemplateElementValue { raw, cooked: cooked.map(Atom::from) },
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Section 13.3 Meta Property
|
/// Section 13.3 Meta Property
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn at_async_no_new_line(&mut self) -> bool {
|
pub(crate) fn at_async_no_new_line(&mut self) -> bool {
|
||||||
self.at(Kind::Async) && !self.cur_token().escaped && !self.peek_token().is_on_new_line
|
self.at(Kind::Async) && !self.cur_token().escaped() && !self.peek_token().is_on_new_line
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse_function_body(&mut self) -> Result<Box<'a, FunctionBody<'a>>> {
|
pub(crate) fn parse_function_body(&mut self) -> Result<Box<'a, FunctionBody<'a>>> {
|
||||||
|
|
|
||||||
|
|
@ -127,7 +127,7 @@ impl<'a> Parser<'a> {
|
||||||
Kind::Const if !(self.ts_enabled() && self.is_at_enum_declaration()) => {
|
Kind::Const if !(self.ts_enabled() && self.is_at_enum_declaration()) => {
|
||||||
self.parse_variable_statement(stmt_ctx)
|
self.parse_variable_statement(stmt_ctx)
|
||||||
}
|
}
|
||||||
Kind::Let if !self.cur_token().escaped => self.parse_let(stmt_ctx),
|
Kind::Let if !self.cur_token().escaped() => self.parse_let(stmt_ctx),
|
||||||
Kind::Await
|
Kind::Await
|
||||||
if self.peek_kind() == Kind::Using && self.nth_kind(2).is_binding_identifier() =>
|
if self.peek_kind() == Kind::Using && self.nth_kind(2).is_binding_identifier() =>
|
||||||
{
|
{
|
||||||
|
|
@ -276,7 +276,7 @@ impl<'a> Parser<'a> {
|
||||||
|
|
||||||
let is_let_of = self.at(Kind::Let) && self.peek_at(Kind::Of);
|
let is_let_of = self.at(Kind::Let) && self.peek_at(Kind::Of);
|
||||||
let is_async_of =
|
let is_async_of =
|
||||||
self.at(Kind::Async) && !self.cur_token().escaped && self.peek_at(Kind::Of);
|
self.at(Kind::Async) && !self.cur_token().escaped() && self.peek_at(Kind::Of);
|
||||||
let expr_span = self.start_span();
|
let expr_span = self.start_span();
|
||||||
|
|
||||||
if self.at(Kind::RParen) {
|
if self.at(Kind::RParen) {
|
||||||
|
|
|
||||||
|
|
@ -360,14 +360,15 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
// we are at a valid normal Ident or Keyword, let's keep on lexing for `-`
|
// we are at a valid normal Ident or Keyword, let's keep on lexing for `-`
|
||||||
self.re_lex_jsx_identifier();
|
self.re_lex_jsx_identifier();
|
||||||
let name = Atom::from(self.cur_string().unwrap());
|
|
||||||
self.bump_any();
|
self.bump_any();
|
||||||
Ok(self.ast.jsx_identifier(self.end_span(span), name))
|
let span = self.end_span(span);
|
||||||
|
let name = span.source_text(self.source_text);
|
||||||
|
Ok(self.ast.jsx_identifier(span, name.into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_jsx_text(&mut self) -> JSXText {
|
fn parse_jsx_text(&mut self) -> JSXText {
|
||||||
let span = self.start_span();
|
let span = self.start_span();
|
||||||
let value = Atom::from(self.cur_string().unwrap());
|
let value = Atom::from(self.cur_string());
|
||||||
self.bump_any();
|
self.bump_any();
|
||||||
self.ast.jsx_text(self.end_span(span), value)
|
self.ast.jsx_text(self.end_span(span), value)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,13 +24,13 @@ use oxc_syntax::{
|
||||||
},
|
},
|
||||||
unicode_id_start::is_id_start_unicode,
|
unicode_id_start::is_id_start_unicode,
|
||||||
};
|
};
|
||||||
pub use token::{Token, TokenValue};
|
|
||||||
|
|
||||||
pub use self::{
|
pub use self::{
|
||||||
kind::Kind,
|
kind::Kind,
|
||||||
number::{parse_big_int, parse_float, parse_int},
|
number::{parse_big_int, parse_float, parse_int},
|
||||||
|
token::Token,
|
||||||
};
|
};
|
||||||
use self::{string_builder::AutoCow, trivia_builder::TriviaBuilder};
|
use self::{string_builder::AutoCow, token::EscapedStringId, trivia_builder::TriviaBuilder};
|
||||||
use crate::{diagnostics, MAX_LEN};
|
use crate::{diagnostics, MAX_LEN};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
|
@ -38,7 +38,7 @@ pub struct LexerCheckpoint<'a> {
|
||||||
/// Remaining chars to be tokenized
|
/// Remaining chars to be tokenized
|
||||||
chars: Chars<'a>,
|
chars: Chars<'a>,
|
||||||
|
|
||||||
token: Token<'a>,
|
token: Token,
|
||||||
|
|
||||||
errors_pos: usize,
|
errors_pos: usize,
|
||||||
}
|
}
|
||||||
|
|
@ -66,6 +66,9 @@ pub struct Lexer<'a> {
|
||||||
context: LexerContext,
|
context: LexerContext,
|
||||||
|
|
||||||
pub(crate) trivia_builder: TriviaBuilder,
|
pub(crate) trivia_builder: TriviaBuilder,
|
||||||
|
|
||||||
|
/// Data store for escaped strings, indexed by `Token.escaped_string_id`
|
||||||
|
escaped_strings: Vec<&'a str>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::unused_self)]
|
#[allow(clippy::unused_self)]
|
||||||
|
|
@ -91,6 +94,7 @@ impl<'a> Lexer<'a> {
|
||||||
lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript
|
lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript
|
||||||
context: LexerContext::Regular,
|
context: LexerContext::Regular,
|
||||||
trivia_builder: TriviaBuilder::default(),
|
trivia_builder: TriviaBuilder::default(),
|
||||||
|
escaped_strings: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -117,12 +121,12 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Find the nth lookahead token lazily
|
/// Find the nth lookahead token lazily
|
||||||
pub fn lookahead(&mut self, n: u8) -> &Token<'a> {
|
pub fn lookahead(&mut self, n: u8) -> Token {
|
||||||
let n = n as usize;
|
let n = n as usize;
|
||||||
debug_assert!(n > 0);
|
debug_assert!(n > 0);
|
||||||
|
|
||||||
if self.lookahead.len() > n - 1 {
|
if self.lookahead.len() > n - 1 {
|
||||||
return &self.lookahead[n - 1].token;
|
return self.lookahead[n - 1].token;
|
||||||
}
|
}
|
||||||
|
|
||||||
let checkpoint = self.checkpoint();
|
let checkpoint = self.checkpoint();
|
||||||
|
|
@ -148,7 +152,7 @@ impl<'a> Lexer<'a> {
|
||||||
|
|
||||||
self.current = checkpoint;
|
self.current = checkpoint;
|
||||||
|
|
||||||
&self.lookahead[n - 1].token
|
self.lookahead[n - 1].token
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set context
|
/// Set context
|
||||||
|
|
@ -157,7 +161,7 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Main entry point
|
/// Main entry point
|
||||||
pub fn next_token(&mut self) -> Token<'a> {
|
pub fn next_token(&mut self) -> Token {
|
||||||
if let Some(checkpoint) = self.lookahead.pop_front() {
|
if let Some(checkpoint) = self.lookahead.pop_front() {
|
||||||
self.current.chars = checkpoint.chars;
|
self.current.chars = checkpoint.chars;
|
||||||
self.current.errors_pos = checkpoint.errors_pos;
|
self.current.errors_pos = checkpoint.errors_pos;
|
||||||
|
|
@ -167,13 +171,13 @@ impl<'a> Lexer<'a> {
|
||||||
self.finish_next(kind)
|
self.finish_next(kind)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next_jsx_child(&mut self) -> Token<'a> {
|
pub fn next_jsx_child(&mut self) -> Token {
|
||||||
self.current.token.start = self.offset();
|
self.current.token.start = self.offset();
|
||||||
let kind = self.read_jsx_child();
|
let kind = self.read_jsx_child();
|
||||||
self.finish_next(kind)
|
self.finish_next(kind)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finish_next(&mut self, kind: Kind) -> Token<'a> {
|
fn finish_next(&mut self, kind: Kind) -> Token {
|
||||||
self.current.token.kind = kind;
|
self.current.token.kind = kind;
|
||||||
self.current.token.end = self.offset();
|
self.current.token.end = self.offset();
|
||||||
debug_assert!(self.current.token.start <= self.current.token.end);
|
debug_assert!(self.current.token.start <= self.current.token.end);
|
||||||
|
|
@ -188,7 +192,7 @@ impl<'a> Lexer<'a> {
|
||||||
/// where a `RegularExpressionLiteral` is permitted
|
/// where a `RegularExpressionLiteral` is permitted
|
||||||
/// Which means the parser needs to re-tokenize on `PrimaryExpression`,
|
/// Which means the parser needs to re-tokenize on `PrimaryExpression`,
|
||||||
/// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression`
|
/// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression`
|
||||||
pub fn next_regex(&mut self, kind: Kind) -> Token<'a> {
|
pub fn next_regex(&mut self, kind: Kind) -> Token {
|
||||||
self.current.token.start = self.offset()
|
self.current.token.start = self.offset()
|
||||||
- match kind {
|
- match kind {
|
||||||
Kind::Slash => 1,
|
Kind::Slash => 1,
|
||||||
|
|
@ -200,7 +204,7 @@ impl<'a> Lexer<'a> {
|
||||||
self.finish_next(kind)
|
self.finish_next(kind)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next_right_angle(&mut self) -> Token<'a> {
|
pub fn next_right_angle(&mut self) -> Token {
|
||||||
let kind = self.read_right_angle();
|
let kind = self.read_right_angle();
|
||||||
self.lookahead.clear();
|
self.lookahead.clear();
|
||||||
self.finish_next(kind)
|
self.finish_next(kind)
|
||||||
|
|
@ -208,7 +212,7 @@ impl<'a> Lexer<'a> {
|
||||||
|
|
||||||
/// Re-tokenize the current `}` token for `TemplateSubstitutionTail`
|
/// Re-tokenize the current `}` token for `TemplateSubstitutionTail`
|
||||||
/// See Section 12, the parser needs to re-tokenize on `TemplateSubstitutionTail`,
|
/// See Section 12, the parser needs to re-tokenize on `TemplateSubstitutionTail`,
|
||||||
pub fn next_template_substitution_tail(&mut self) -> Token<'a> {
|
pub fn next_template_substitution_tail(&mut self) -> Token {
|
||||||
self.current.token.start = self.offset() - 1;
|
self.current.token.start = self.offset() - 1;
|
||||||
let kind = self.read_template_literal(Kind::TemplateMiddle, Kind::TemplateTail);
|
let kind = self.read_template_literal(Kind::TemplateMiddle, Kind::TemplateTail);
|
||||||
self.lookahead.clear();
|
self.lookahead.clear();
|
||||||
|
|
@ -216,14 +220,14 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Expand the current token for `JSXIdentifier`
|
/// Expand the current token for `JSXIdentifier`
|
||||||
pub fn next_jsx_identifier(&mut self, start_offset: u32) -> Token<'a> {
|
pub fn next_jsx_identifier(&mut self, start_offset: u32) -> Token {
|
||||||
let kind = self.read_jsx_identifier(start_offset);
|
let kind = self.read_jsx_identifier(start_offset);
|
||||||
self.lookahead.clear();
|
self.lookahead.clear();
|
||||||
self.finish_next(kind)
|
self.finish_next(kind)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Re-tokenize '<<' or '<=' or '<<=' to '<'
|
/// Re-tokenize '<<' or '<=' or '<<=' to '<'
|
||||||
pub fn re_lex_as_typescript_l_angle(&mut self, kind: Kind) -> Token<'a> {
|
pub fn re_lex_as_typescript_l_angle(&mut self, kind: Kind) -> Token {
|
||||||
let offset = match kind {
|
let offset = match kind {
|
||||||
Kind::ShiftLeft | Kind::LtEq => 2,
|
Kind::ShiftLeft | Kind::LtEq => 2,
|
||||||
Kind::ShiftLeftEq => 3,
|
Kind::ShiftLeftEq => 3,
|
||||||
|
|
@ -297,6 +301,44 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Save the string if it is escaped
|
||||||
|
/// This reduces the overall memory consumption while keeping the `Token` size small
|
||||||
|
/// Strings without escaped values can be retrieved as is from the token span
|
||||||
|
#[allow(clippy::cast_possible_truncation)]
|
||||||
|
fn save_string(&mut self, has_escape: bool, s: &'a str) {
|
||||||
|
if !has_escape {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
self.escaped_strings.push(s);
|
||||||
|
let escaped_string_id = self.escaped_strings.len() as u32;
|
||||||
|
// SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0
|
||||||
|
let escaped_string_id = unsafe { EscapedStringId::new_unchecked(escaped_string_id) };
|
||||||
|
self.current.token.escaped_string_id.replace(escaped_string_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_string(&self, token: Token) -> &'a str {
|
||||||
|
if let Some(escaped_string_id) = token.escaped_string_id {
|
||||||
|
return self.escaped_strings[escaped_string_id.get() as usize - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
let raw = &self.source[token.start as usize..token.end as usize];
|
||||||
|
match token.kind {
|
||||||
|
Kind::Str | Kind::NoSubstitutionTemplate => {
|
||||||
|
// omit surrounding quotes
|
||||||
|
&raw[1..raw.len() - 1]
|
||||||
|
}
|
||||||
|
Kind::TemplateHead => {
|
||||||
|
// omit leading "`${"
|
||||||
|
&raw[3..]
|
||||||
|
}
|
||||||
|
Kind::TemplateTail => {
|
||||||
|
// omit trailing "$`"
|
||||||
|
&raw[..raw.len() - 2]
|
||||||
|
}
|
||||||
|
_ => raw,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Read each char and set the current token
|
/// Read each char and set the current token
|
||||||
/// Whitespace and line terminators are skipped
|
/// Whitespace and line terminators are skipped
|
||||||
fn read_next_token(&mut self) -> Kind {
|
fn read_next_token(&mut self) -> Kind {
|
||||||
|
|
@ -402,7 +444,7 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Section 12.7.1 Identifier Names
|
/// Section 12.7.1 Identifier Names
|
||||||
fn identifier_tail(&mut self, mut builder: AutoCow<'a>) -> (bool, &'a str) {
|
fn identifier_tail(&mut self, mut builder: AutoCow<'a>) -> &'a str {
|
||||||
// ident tail
|
// ident tail
|
||||||
while let Some(c) = self.peek() {
|
while let Some(c) = self.peek() {
|
||||||
if !is_identifier_part(c) {
|
if !is_identifier_part(c) {
|
||||||
|
|
@ -418,14 +460,13 @@ impl<'a> Lexer<'a> {
|
||||||
builder.push_matching(c);
|
builder.push_matching(c);
|
||||||
}
|
}
|
||||||
let has_escape = builder.has_escape();
|
let has_escape = builder.has_escape();
|
||||||
(has_escape, builder.finish(self))
|
let text = builder.finish(self);
|
||||||
|
self.save_string(has_escape, text);
|
||||||
|
text
|
||||||
}
|
}
|
||||||
|
|
||||||
fn identifier_name(&mut self, builder: AutoCow<'a>) -> &'a str {
|
fn identifier_name(&mut self, builder: AutoCow<'a>) -> &'a str {
|
||||||
let (has_escape, text) = self.identifier_tail(builder);
|
self.identifier_tail(builder)
|
||||||
self.current.token.escaped = has_escape;
|
|
||||||
self.current.token.value = TokenValue::String(text);
|
|
||||||
text
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn identifier_name_handler(&mut self) -> &'a str {
|
fn identifier_name_handler(&mut self) -> &'a str {
|
||||||
|
|
@ -532,8 +573,7 @@ impl<'a> Lexer<'a> {
|
||||||
return Kind::Undetermined;
|
return Kind::Undetermined;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let (_, name) = self.identifier_tail(builder);
|
self.identifier_tail(builder);
|
||||||
self.current.token.value = TokenValue::String(name);
|
|
||||||
Kind::PrivateIdentifier
|
Kind::PrivateIdentifier
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -765,8 +805,7 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
Some(c @ ('"' | '\'')) => {
|
Some(c @ ('"' | '\'')) => {
|
||||||
if c == delimiter {
|
if c == delimiter {
|
||||||
self.current.token.value =
|
self.save_string(builder.has_escape(), builder.finish_without_push(self));
|
||||||
TokenValue::String(builder.finish_without_push(self));
|
|
||||||
return Kind::Str;
|
return Kind::Str;
|
||||||
}
|
}
|
||||||
builder.push_matching(c);
|
builder.push_matching(c);
|
||||||
|
|
@ -850,16 +889,14 @@ impl<'a> Lexer<'a> {
|
||||||
match c {
|
match c {
|
||||||
'$' if self.peek() == Some('{') => {
|
'$' if self.peek() == Some('{') => {
|
||||||
if is_valid_escape_sequence {
|
if is_valid_escape_sequence {
|
||||||
self.current.token.value =
|
self.save_string(true, builder.finish_without_push(self));
|
||||||
TokenValue::String(builder.finish_without_push(self));
|
|
||||||
}
|
}
|
||||||
self.current.chars.next();
|
self.current.chars.next();
|
||||||
return substitute;
|
return substitute;
|
||||||
}
|
}
|
||||||
'`' => {
|
'`' => {
|
||||||
if is_valid_escape_sequence {
|
if is_valid_escape_sequence {
|
||||||
self.current.token.value =
|
self.save_string(true, builder.finish_without_push(self));
|
||||||
TokenValue::String(builder.finish_without_push(self));
|
|
||||||
}
|
}
|
||||||
return tail;
|
return tail;
|
||||||
}
|
}
|
||||||
|
|
@ -872,6 +909,7 @@ impl<'a> Lexer<'a> {
|
||||||
'\\' => {
|
'\\' => {
|
||||||
let text = builder.get_mut_string_without_current_ascii_char(self);
|
let text = builder.get_mut_string_without_current_ascii_char(self);
|
||||||
self.read_string_escape_sequence(text, true, &mut is_valid_escape_sequence);
|
self.read_string_escape_sequence(text, true, &mut is_valid_escape_sequence);
|
||||||
|
if !is_valid_escape_sequence {}
|
||||||
}
|
}
|
||||||
_ => builder.push_matching(c),
|
_ => builder.push_matching(c),
|
||||||
}
|
}
|
||||||
|
|
@ -884,18 +922,13 @@ impl<'a> Lexer<'a> {
|
||||||
/// `IdentifierStart`
|
/// `IdentifierStart`
|
||||||
/// `JSXIdentifier` `IdentifierPart`
|
/// `JSXIdentifier` `IdentifierPart`
|
||||||
/// `JSXIdentifier` [no `WhiteSpace` or Comment here] -
|
/// `JSXIdentifier` [no `WhiteSpace` or Comment here] -
|
||||||
fn read_jsx_identifier(&mut self, start_offset: u32) -> Kind {
|
fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind {
|
||||||
let prev_str = &self.source[start_offset as usize..self.offset() as usize];
|
|
||||||
|
|
||||||
let mut builder = AutoCow::new(self);
|
|
||||||
while let Some(c) = self.peek() {
|
while let Some(c) = self.peek() {
|
||||||
if c == '-' || is_identifier_start_all(c) {
|
if c == '-' || is_identifier_start_all(c) {
|
||||||
self.current.chars.next();
|
self.current.chars.next();
|
||||||
builder.push_matching(c);
|
|
||||||
while let Some(c) = self.peek() {
|
while let Some(c) = self.peek() {
|
||||||
if is_identifier_part(c) {
|
if is_identifier_part(c) {
|
||||||
let c = self.current.chars.next().unwrap();
|
self.current.chars.next().unwrap();
|
||||||
builder.push_matching(c);
|
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -904,9 +937,6 @@ impl<'a> Lexer<'a> {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut s = String::from_str_in(prev_str, self.allocator);
|
|
||||||
s.push_str(builder.finish(self));
|
|
||||||
self.current.token.value = TokenValue::String(s.into_bump_str());
|
|
||||||
Kind::Ident
|
Kind::Ident
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -941,7 +971,6 @@ impl<'a> Lexer<'a> {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.current.token.value = TokenValue::String(builder.finish(self));
|
|
||||||
Kind::JSXText
|
Kind::JSXText
|
||||||
}
|
}
|
||||||
None => Kind::Eof,
|
None => Kind::Eof,
|
||||||
|
|
@ -964,8 +993,7 @@ impl<'a> Lexer<'a> {
|
||||||
match self.current.chars.next() {
|
match self.current.chars.next() {
|
||||||
Some(c @ ('"' | '\'')) => {
|
Some(c @ ('"' | '\'')) => {
|
||||||
if c == delimiter {
|
if c == delimiter {
|
||||||
self.current.token.value =
|
self.save_string(builder.has_escape(), builder.finish_without_push(self));
|
||||||
TokenValue::String(builder.finish_without_push(self));
|
|
||||||
return Kind::Str;
|
return Kind::Str;
|
||||||
}
|
}
|
||||||
builder.push_matching(c);
|
builder.push_matching(c);
|
||||||
|
|
|
||||||
|
|
@ -33,14 +33,14 @@ impl<'a> AutoCow<'a> {
|
||||||
// and return the reference to it
|
// and return the reference to it
|
||||||
pub fn get_mut_string_without_current_ascii_char<'b>(
|
pub fn get_mut_string_without_current_ascii_char<'b>(
|
||||||
&'b mut self,
|
&'b mut self,
|
||||||
lexer: &'_ Lexer<'a>,
|
lexer: &Lexer<'a>,
|
||||||
) -> &'b mut String<'a> {
|
) -> &'b mut String<'a> {
|
||||||
self.force_allocation_without_current_ascii_char(lexer);
|
self.force_allocation_without_current_ascii_char(lexer);
|
||||||
self.value.as_mut().unwrap()
|
self.value.as_mut().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Force allocation of a String, excluding the current ASCII character.
|
// Force allocation of a String, excluding the current ASCII character.
|
||||||
pub fn force_allocation_without_current_ascii_char(&mut self, lexer: &'_ Lexer<'a>) {
|
pub fn force_allocation_without_current_ascii_char(&mut self, lexer: &Lexer<'a>) {
|
||||||
if self.value.is_some() {
|
if self.value.is_some() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,10 @@ use oxc_span::Span;
|
||||||
|
|
||||||
use super::kind::Kind;
|
use super::kind::Kind;
|
||||||
|
|
||||||
|
pub type EscapedStringId = std::num::NonZeroU32;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Default)]
|
#[derive(Debug, Clone, Copy, Default)]
|
||||||
pub struct Token<'a> {
|
pub struct Token {
|
||||||
/// Token Kind
|
/// Token Kind
|
||||||
pub kind: Kind,
|
pub kind: Kind,
|
||||||
|
|
||||||
|
|
@ -18,40 +20,22 @@ pub struct Token<'a> {
|
||||||
/// Indicates the token is on a newline
|
/// Indicates the token is on a newline
|
||||||
pub is_on_new_line: bool,
|
pub is_on_new_line: bool,
|
||||||
|
|
||||||
/// Is the original string escaped?
|
/// A index handle to `Lexer::escaped_strings`
|
||||||
pub escaped: bool,
|
/// See https://floooh.github.io/2018/06/17/handles-vs-pointers.html for some background reading
|
||||||
|
pub escaped_string_id: Option<EscapedStringId>,
|
||||||
pub value: TokenValue<'a>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_pointer_width = "64")]
|
#[cfg(target_pointer_width = "64")]
|
||||||
mod size_asserts {
|
mod size_asserts {
|
||||||
oxc_index::assert_eq_size!(super::Token, [u8; 32]);
|
oxc_index::assert_eq_size!(super::Token, [u8; 16]);
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Token<'a> {
|
impl Token {
|
||||||
pub fn span(&self) -> Span {
|
pub fn span(&self) -> Span {
|
||||||
Span::new(self.start, self.end)
|
Span::new(self.start, self.end)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
pub fn escaped(&self) -> bool {
|
||||||
pub enum TokenValue<'a> {
|
self.escaped_string_id.is_some()
|
||||||
None,
|
|
||||||
String(&'a str),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Default for TokenValue<'a> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> TokenValue<'a> {
|
|
||||||
pub fn get_string(&self) -> Option<&str> {
|
|
||||||
match self {
|
|
||||||
Self::String(s) => Some(s),
|
|
||||||
Self::None => None,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@ pub struct Parser<'a> {
|
||||||
errors: Vec<Error>,
|
errors: Vec<Error>,
|
||||||
|
|
||||||
/// The current parsing token
|
/// The current parsing token
|
||||||
token: Token<'a>,
|
token: Token,
|
||||||
|
|
||||||
/// The end range of the previous token
|
/// The end range of the previous token
|
||||||
prev_token_end: u32,
|
prev_token_end: u32,
|
||||||
|
|
|
||||||
|
|
@ -302,13 +302,8 @@ impl<'a> Parser<'a> {
|
||||||
return self.parse_ts_infer_type();
|
return self.parse_ts_infer_type();
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut operator = None;
|
let operator =
|
||||||
|
if self.at(Kind::Str) { None } else { TSTypeOperator::from_src(self.cur_string()) };
|
||||||
if !self.at(Kind::Str) {
|
|
||||||
if let Some(atom) = self.cur_string() {
|
|
||||||
operator = TSTypeOperator::from_src(atom);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// test ts ts_type_operator
|
// test ts ts_type_operator
|
||||||
// type B = keyof A;
|
// type B = keyof A;
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue