refactor(lexer): change TokenValue::String(Atom) to TokenValue::String(&str) (#174)

This commit is contained in:
Boshen 2023-03-12 18:33:44 -07:00 committed by GitHub
parent fce6e1ec73
commit f36e3301fd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 129 additions and 216 deletions

View file

@ -182,8 +182,8 @@ pub enum TSTypeOperator {
impl TSTypeOperator {
#[must_use]
pub fn from_src(src: &Atom) -> Option<Self> {
match src.as_str() {
pub fn from_src(src: &str) -> Option<Self> {
match src {
"keyof" => Some(Self::Keyof),
"unique" => Some(Self::Unique),
"readonly" => Some(Self::Readonly),

View file

@ -1,6 +1,6 @@
//! Code related to navigating `Token`s from the lexer
use oxc_ast::{context::Context, Atom, Span};
use oxc_ast::{context::Context, Span};
use oxc_diagnostics::Result;
use crate::lexer::{Kind, LexerCheckpoint, LexerContext, Token};
@ -8,7 +8,7 @@ use crate::{diagnostics, Parser};
pub struct ParserCheckpoint<'a> {
lexer: LexerCheckpoint<'a>,
cur_token: Token,
cur_token: Token<'a>,
prev_span_end: u32,
errors_pos: usize,
}
@ -46,10 +46,10 @@ impl<'a> Parser<'a> {
unsafe { self.source.get_unchecked(range.start as usize..range.end as usize) }
}
/// Get current atom
/// Get current string
#[must_use]
pub fn cur_atom(&self) -> Option<&Atom> {
self.cur_token().value.get_atom()
pub fn cur_string(&self) -> Option<&str> {
self.cur_token().value.get_string()
}
/// Peek next token, returns EOF for final peek

View file

@ -101,10 +101,10 @@ impl<'a> Parser<'a> {
let span = self.start_span();
let name = match std::mem::take(&mut self.token.value) {
TokenValue::String(value) => value,
_ => "".into(),
_ => "",
};
self.bump_remap(kind);
(self.end_span(span), name)
(self.end_span(span), Atom::from(name))
}
fn check_identifier(&mut self, span: Span, name: &Atom) {
@ -124,7 +124,7 @@ impl<'a> Parser<'a> {
/// # Panics
pub fn parse_private_identifier(&mut self) -> PrivateIdentifier {
let span = self.start_span();
let name = self.cur_atom().unwrap().clone();
let name = Atom::from(self.cur_string().unwrap());
self.bump_any();
PrivateIdentifier { span: self.end_span(span), name }
}
@ -318,7 +318,7 @@ impl<'a> Parser<'a> {
};
let span = self.start_span();
self.bump_any();
Ok(StringLiteral { span: self.end_span(span), value })
Ok(StringLiteral { span: self.end_span(span), value: value.into() })
}
/// Section Array Expression `https://tc39.es/ecma262/#prod-ArrayLiteral`
@ -408,7 +408,7 @@ impl<'a> Parser<'a> {
};
// cooked = None when template literal has invalid escape sequence
let cooked = self.cur_atom().map(Clone::clone);
let cooked = self.cur_string().map(Atom::from);
let raw = &self.cur_src()[1..self.cur_src().len() - end_offset as usize];
let raw = Atom::from(if cooked.is_some() && raw.contains('\r') {

View file

@ -3,7 +3,7 @@
#![allow(clippy::missing_errors_doc)]
use oxc_allocator::{Box, Vec};
use oxc_ast::{ast::*, Span};
use oxc_ast::{ast::*, Atom, Span};
use oxc_diagnostics::Result;
use crate::diagnostics;
@ -351,14 +351,14 @@ impl<'a> Parser<'a> {
}
// we are at a valid normal Ident or Keyword, let's keep on lexing for `-`
self.re_lex_jsx_identifier();
let name = self.cur_atom().unwrap().clone();
let name = Atom::from(self.cur_string().unwrap());
self.bump_any();
Ok(self.ast.jsx_identifier(self.end_span(span), name))
}
fn parse_jsx_text(&mut self) -> JSXText {
let span = self.start_span();
let value = self.cur_atom().unwrap().clone();
let value = Atom::from(self.cur_string().unwrap());
self.bump_any();
self.ast.jsx_text(self.end_span(span), value)
}

View file

@ -2,8 +2,6 @@
use std::fmt;
use oxc_ast::Atom;
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
#[non_exhaustive]
pub enum Kind {
@ -192,91 +190,6 @@ pub enum Kind {
#[allow(clippy::enum_glob_use)]
use self::Kind::*;
static KW_IS: Atom = Atom::new_inline("is");
static KW_AS: Atom = Atom::new_inline("as");
static KW_DO: Atom = Atom::new_inline("do");
static KW_IF: Atom = Atom::new_inline("if");
static KW_IN: Atom = Atom::new_inline("in");
static KW_OF: Atom = Atom::new_inline("of");
static KW_ANY: Atom = Atom::new_inline("any");
static KW_FOR: Atom = Atom::new_inline("for");
static KW_GET: Atom = Atom::new_inline("get");
static KW_LET: Atom = Atom::new_inline("let");
static KW_NEW: Atom = Atom::new_inline("new");
static KW_OUT: Atom = Atom::new_inline("out");
static KW_SET: Atom = Atom::new_inline("set");
static KW_TRY: Atom = Atom::new_inline("try");
static KW_VAR: Atom = Atom::new_inline("var");
static KW_CASE: Atom = Atom::new_inline("case");
static KW_ELSE: Atom = Atom::new_inline("else");
static KW_ENUM: Atom = Atom::new_inline("enum");
static KW_FROM: Atom = Atom::new_inline("from");
static KW_META: Atom = Atom::new_inline("meta");
static KW_NULL: Atom = Atom::new_inline("null");
static KW_THIS: Atom = Atom::new_inline("this");
static KW_TRUE: Atom = Atom::new_inline("true");
static KW_TYPE: Atom = Atom::new_inline("type");
static KW_VOID: Atom = Atom::new_inline("void");
static KW_WITH: Atom = Atom::new_inline("with");
static KW_ASYNC: Atom = Atom::new_inline("async");
static KW_AWAIT: Atom = Atom::new_inline("await");
static KW_BREAK: Atom = Atom::new_inline("break");
static KW_CATCH: Atom = Atom::new_inline("catch");
static KW_CLASS: Atom = Atom::new_inline("class");
static KW_CONST: Atom = Atom::new_inline("const");
static KW_FALSE: Atom = Atom::new_inline("false");
static KW_INFER: Atom = Atom::new_inline("infer");
static KW_KEYOF: Atom = Atom::new_inline("keyof");
static KW_NEVER: Atom = Atom::new_inline("never");
static KW_SUPER: Atom = Atom::new_inline("super");
static KW_THROW: Atom = Atom::new_inline("throw");
static KW_WHILE: Atom = Atom::new_inline("while");
static KW_YIELD: Atom = Atom::new_inline("yield");
static KW_ASSERT: Atom = Atom::new_inline("assert");
static KW_BIGINT: Atom = Atom::new_inline("bigint");
static KW_DELETE: Atom = Atom::new_inline("delete");
static KW_EXPORT: Atom = Atom::new_inline("export");
static KW_GLOBAL: Atom = Atom::new_inline("global");
static KW_IMPORT: Atom = Atom::new_inline("import");
static KW_MODULE: Atom = Atom::new_inline("module");
static KW_NUMBER: Atom = Atom::new_inline("number");
static KW_OBJECT: Atom = Atom::new_inline("object");
static KW_PUBLIC: Atom = Atom::new_inline("public");
static KW_RETURN: Atom = Atom::new_inline("return");
static KW_STATIC: Atom = Atom::new_inline("static");
static KW_STRING: Atom = Atom::new_inline("string");
static KW_SWITCH: Atom = Atom::new_inline("switch");
static KW_SYMBOL: Atom = Atom::new_inline("symbol");
static KW_TARGET: Atom = Atom::new_inline("target");
static KW_TYPEOF: Atom = Atom::new_inline("typeof");
static KW_UNIQUE: Atom = Atom::new_inline("unique");
static KW_ASSERTS: Atom = Atom::new_inline("asserts");
static KW_BOOLEAN: Atom = Atom::new_inline("boolean");
static KW_DECLARE: Atom = Atom::new_inline("declare");
static KW_DEFAULT: Atom = Atom::new_inline("default");
static KW_EXTENDS: Atom = Atom::new_inline("extends");
static KW_FINALLY: Atom = Atom::new_inline("finally");
static KW_PACKAGE: Atom = Atom::new_inline("package");
static KW_PRIVATE: Atom = Atom::new_inline("private");
static KW_REQUIRE: Atom = Atom::new_inline("require");
static KW_UNKNOWN: Atom = Atom::new_inline("unknown");
static KW_ABSTRACT: Atom = Atom::new_inline("abstract");
static KW_ACCESSOR: Atom = Atom::new_inline("accessor");
static KW_CONTINUE: Atom = Atom::new_inline("continue");
static KW_DEBUGGER: Atom = Atom::new_inline("debugger");
static KW_FUNCTION: Atom = Atom::new_inline("function");
static KW_OVERRIDE: Atom = Atom::new_inline("override");
static KW_READONLY: Atom = Atom::new_inline("readonly");
static KW_INTERFACE: Atom = Atom::new_inline("interface");
static KW_INTRINSIC: Atom = Atom::new_inline("intrinsic");
static KW_NAMESPACE: Atom = Atom::new_inline("namespace");
static KW_PROTECTED: Atom = Atom::new_inline("protected");
static KW_SATISFIES: Atom = Atom::new_inline("satisfies");
static KW_UNDEFINED: Atom = Atom::new_inline("undefined");
static KW_IMPLEMENTS: Atom = Atom::new_inline("implements");
static KW_INSTANCEOF: Atom = Atom::new_inline("instanceof");
static KW_CONSTRUCTOR: Atom = Atom::new_inline("constructor");
impl Kind {
#[must_use]
pub fn is_eof(self) -> bool {
@ -475,110 +388,110 @@ impl Kind {
}
#[must_use]
pub fn match_keyword(s: &str) -> (Self, Atom) {
pub fn match_keyword(s: &str) -> Self {
let len = s.len();
if len == 1 || len >= 12 {
return (Ident, Atom::new(s));
return Ident;
}
Self::match_keyword_impl(s).map_or_else(|| (Ident, Atom::new(s)), |(k, s)| (k, s.clone()))
Self::match_keyword_impl(s)
}
fn match_keyword_impl(s: &str) -> Option<(Self, &'static Atom)> {
fn match_keyword_impl(s: &str) -> Self {
match s {
"as" => Some((As, &KW_AS)),
"do" => Some((Do, &KW_DO)),
"if" => Some((If, &KW_IF)),
"in" => Some((In, &KW_IN)),
"is" => Some((Is, &KW_IS)),
"of" => Some((Of, &KW_OF)),
"as" => As,
"do" => Do,
"if" => If,
"in" => In,
"is" => Is,
"of" => Of,
"any" => Some((Any, &KW_ANY)),
"for" => Some((For, &KW_FOR)),
"get" => Some((Get, &KW_GET)),
"let" => Some((Let, &KW_LET)),
"new" => Some((New, &KW_NEW)),
"out" => Some((Out, &KW_OUT)),
"set" => Some((Set, &KW_SET)),
"try" => Some((Try, &KW_TRY)),
"var" => Some((Var, &KW_VAR)),
"any" => Any,
"for" => For,
"get" => Get,
"let" => Let,
"new" => New,
"out" => Out,
"set" => Set,
"try" => Try,
"var" => Var,
"case" => Some((Case, &KW_CASE)),
"else" => Some((Else, &KW_ELSE)),
"enum" => Some((Enum, &KW_ENUM)),
"from" => Some((From, &KW_FROM)),
"meta" => Some((Meta, &KW_META)),
"null" => Some((Null, &KW_NULL)),
"this" => Some((This, &KW_THIS)),
"true" => Some((True, &KW_TRUE)),
"type" => Some((Type, &KW_TYPE)),
"void" => Some((Void, &KW_VOID)),
"with" => Some((With, &KW_WITH)),
"case" => Case,
"else" => Else,
"enum" => Enum,
"from" => From,
"meta" => Meta,
"null" => Null,
"this" => This,
"true" => True,
"type" => Type,
"void" => Void,
"with" => With,
"async" => Some((Async, &KW_ASYNC)),
"await" => Some((Await, &KW_AWAIT)),
"break" => Some((Break, &KW_BREAK)),
"catch" => Some((Catch, &KW_CATCH)),
"class" => Some((Class, &KW_CLASS)),
"const" => Some((Const, &KW_CONST)),
"false" => Some((False, &KW_FALSE)),
"infer" => Some((Infer, &KW_INFER)),
"keyof" => Some((KeyOf, &KW_KEYOF)),
"never" => Some((Never, &KW_NEVER)),
"super" => Some((Super, &KW_SUPER)),
"throw" => Some((Throw, &KW_THROW)),
"while" => Some((While, &KW_WHILE)),
"yield" => Some((Yield, &KW_YIELD)),
"async" => Async,
"await" => Await,
"break" => Break,
"catch" => Catch,
"class" => Class,
"const" => Const,
"false" => False,
"infer" => Infer,
"keyof" => KeyOf,
"never" => Never,
"super" => Super,
"throw" => Throw,
"while" => While,
"yield" => Yield,
"assert" => Some((Assert, &KW_ASSERT)),
"bigint" => Some((BigInt, &KW_BIGINT)),
"delete" => Some((Delete, &KW_DELETE)),
"export" => Some((Export, &KW_EXPORT)),
"global" => Some((Global, &KW_GLOBAL)),
"import" => Some((Import, &KW_IMPORT)),
"module" => Some((Module, &KW_MODULE)),
"number" => Some((Number, &KW_NUMBER)),
"object" => Some((Object, &KW_OBJECT)),
"public" => Some((Public, &KW_PUBLIC)),
"return" => Some((Return, &KW_RETURN)),
"static" => Some((Static, &KW_STATIC)),
"string" => Some((String, &KW_STRING)),
"switch" => Some((Switch, &KW_SWITCH)),
"symbol" => Some((Symbol, &KW_SYMBOL)),
"target" => Some((Target, &KW_TARGET)),
"typeof" => Some((Typeof, &KW_TYPEOF)),
"unique" => Some((Unique, &KW_UNIQUE)),
"assert" => Assert,
"bigint" => BigInt,
"delete" => Delete,
"export" => Export,
"global" => Global,
"import" => Import,
"module" => Module,
"number" => Number,
"object" => Object,
"public" => Public,
"return" => Return,
"static" => Static,
"string" => String,
"switch" => Switch,
"symbol" => Symbol,
"target" => Target,
"typeof" => Typeof,
"unique" => Unique,
"asserts" => Some((Asserts, &KW_ASSERTS)),
"boolean" => Some((Boolean, &KW_BOOLEAN)),
"declare" => Some((Declare, &KW_DECLARE)),
"default" => Some((Default, &KW_DEFAULT)),
"extends" => Some((Extends, &KW_EXTENDS)),
"finally" => Some((Finally, &KW_FINALLY)),
"package" => Some((Package, &KW_PACKAGE)),
"private" => Some((Private, &KW_PRIVATE)),
"require" => Some((Require, &KW_REQUIRE)),
"unknown" => Some((Unknown, &KW_UNKNOWN)),
"asserts" => Asserts,
"boolean" => Boolean,
"declare" => Declare,
"default" => Default,
"extends" => Extends,
"finally" => Finally,
"package" => Package,
"private" => Private,
"require" => Require,
"unknown" => Unknown,
"abstract" => Some((Abstract, &KW_ABSTRACT)),
"accessor" => Some((Accessor, &KW_ACCESSOR)),
"continue" => Some((Continue, &KW_CONTINUE)),
"debugger" => Some((Debugger, &KW_DEBUGGER)),
"function" => Some((Function, &KW_FUNCTION)),
"override" => Some((Override, &KW_OVERRIDE)),
"readonly" => Some((Readonly, &KW_READONLY)),
"abstract" => Abstract,
"accessor" => Accessor,
"continue" => Continue,
"debugger" => Debugger,
"function" => Function,
"override" => Override,
"readonly" => Readonly,
"interface" => Some((Interface, &KW_INTERFACE)),
"intrinsic" => Some((Intrinsic, &KW_INTRINSIC)),
"namespace" => Some((Namespace, &KW_NAMESPACE)),
"protected" => Some((Protected, &KW_PROTECTED)),
"satisfies" => Some((Satisfies, &KW_SATISFIES)),
"undefined" => Some((Undefined, &KW_UNDEFINED)),
"interface" => Interface,
"intrinsic" => Intrinsic,
"namespace" => Namespace,
"protected" => Protected,
"satisfies" => Satisfies,
"undefined" => Undefined,
"implements" => Some((Implements, &KW_IMPLEMENTS)),
"instanceof" => Some((Instanceof, &KW_INSTANCEOF)),
"implements" => Implements,
"instanceof" => Instanceof,
"constructor" => Some((Constructor, &KW_CONSTRUCTOR)),
_ => None,
"constructor" => Constructor,
_ => Ident,
}
}

View file

@ -39,7 +39,7 @@ pub struct LexerCheckpoint<'a> {
/// Remaining chars to be tokenized
chars: Chars<'a>,
token: Token,
token: Token<'a>,
errors_pos: usize,
}
@ -122,7 +122,7 @@ impl<'a> Lexer<'a> {
}
/// Find the nth lookahead token lazily
pub fn lookahead(&mut self, n: u8) -> &Token {
pub fn lookahead(&mut self, n: u8) -> &Token<'a> {
let n = n as usize;
debug_assert!(n > 0);
@ -162,7 +162,7 @@ impl<'a> Lexer<'a> {
}
/// Main entry point
pub fn next_token(&mut self) -> Token {
pub fn next_token(&mut self) -> Token<'a> {
if let Some(checkpoint) = self.lookahead.pop_front() {
self.current.chars = checkpoint.chars;
self.current.errors_pos = checkpoint.errors_pos;
@ -172,12 +172,12 @@ impl<'a> Lexer<'a> {
self.finish_next(kind)
}
pub fn next_jsx_child(&mut self) -> Token {
pub fn next_jsx_child(&mut self) -> Token<'a> {
let kind = self.read_jsx_child();
self.finish_next(kind)
}
fn finish_next(&mut self, kind: Kind) -> Token {
fn finish_next(&mut self, kind: Kind) -> Token<'a> {
self.current.token.kind = kind;
self.current.token.end = self.offset();
debug_assert!(self.current.token.start <= self.current.token.end);
@ -190,7 +190,7 @@ impl<'a> Lexer<'a> {
/// where a `RegularExpressionLiteral` is permitted
/// Which meams the parser needs to re-tokenize on `PrimaryExpression`,
/// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression`
pub fn next_regex(&mut self, kind: Kind) -> Token {
pub fn next_regex(&mut self, kind: Kind) -> Token<'a> {
self.current.token.start = self.offset()
- match kind {
Kind::Slash => 1,
@ -202,7 +202,7 @@ impl<'a> Lexer<'a> {
self.finish_next(kind)
}
pub fn next_right_angle(&mut self) -> Token {
pub fn next_right_angle(&mut self) -> Token<'a> {
let kind = self.read_right_angle();
self.lookahead.clear();
self.finish_next(kind)
@ -210,7 +210,7 @@ impl<'a> Lexer<'a> {
/// Re-tokenize the current `}` token for `TemplateSubstitutionTail`
/// See Section 12, the parser needs to re-tokenize on `TemplateSubstitutionTail`,
pub fn next_template_substitution_tail(&mut self) -> Token {
pub fn next_template_substitution_tail(&mut self) -> Token<'a> {
self.current.token.start = self.offset() - 1;
let kind = self.read_template_literal(Kind::TemplateMiddle, Kind::TemplateTail);
self.lookahead.clear();
@ -218,14 +218,14 @@ impl<'a> Lexer<'a> {
}
/// Expand the current token for `JSXIdentifier`
pub fn next_jsx_identifier(&mut self, prev_len: u32) -> Token {
pub fn next_jsx_identifier(&mut self, prev_len: u32) -> Token<'a> {
let kind = self.read_jsx_identifier(prev_len);
self.lookahead.clear();
self.finish_next(kind)
}
/// Re-tokenize '<<' or '<=' or '<<=' to '<'
pub fn re_lex_as_typescript_l_angle(&mut self, kind: Kind) -> Token {
pub fn re_lex_as_typescript_l_angle(&mut self, kind: Kind) -> Token<'a> {
let offset = match kind {
Kind::ShiftLeft | Kind::LtEq => 2,
Kind::ShiftLeftEq => 3,
@ -239,7 +239,7 @@ impl<'a> Lexer<'a> {
}
/// Re-tokenize '>>' or '>=' or '>>>' or '>>=' or '>>>=' to '<'
pub fn re_lex_as_typescript_r_angle(&mut self, kind: Kind) -> Token {
pub fn re_lex_as_typescript_r_angle(&mut self, kind: Kind) -> Token<'a> {
let offset = match kind {
Kind::ShiftRight | Kind::GtEq => 2,
Kind::ShiftRightEq | Kind::ShiftRight3 => 3,
@ -309,8 +309,8 @@ impl<'a> Lexer<'a> {
}
/// Add string to `SourceAtomSet` and get `TokenValue::Atom`
fn string_to_token_value(&mut self, s: &'a str) -> TokenValue {
TokenValue::String(Atom::from(s))
fn string_to_token_value(&mut self, s: &'a str) -> TokenValue<'a> {
TokenValue::String(s)
}
fn set_numeric_value(&mut self, kind: Kind, src: &'a str) {
@ -556,9 +556,9 @@ impl<'a> Lexer<'a> {
fn identifier_name_or_keyword(&mut self, builder: AutoCow<'a>) -> Kind {
let (has_escape, text) = self.identifier_name(builder);
let (kind, atom) = Kind::match_keyword(text);
let kind = Kind::match_keyword(text);
self.current.token.escaped = has_escape;
self.current.token.value = TokenValue::String(atom);
self.current.token.value = TokenValue::String(text);
kind
}

View file

@ -6,7 +6,7 @@ use oxc_ast::{ast::RegExpFlags, Atom, Span};
use super::kind::Kind;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Token {
pub struct Token<'a> {
/// Token Kind
pub kind: Kind,
@ -22,7 +22,7 @@ pub struct Token {
/// Is the original string escaped?
pub escaped: bool,
pub value: TokenValue,
pub value: TokenValue<'a>,
}
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
@ -32,7 +32,7 @@ fn no_bloat_token() {
assert_eq!(size_of::<Token>(), 56);
}
impl Token {
impl<'a> Token<'a> {
#[must_use]
pub fn span(&self) -> Span {
Span::new(self.start, self.end)
@ -40,11 +40,11 @@ impl Token {
}
#[derive(Debug, Clone, PartialEq)]
pub enum TokenValue {
pub enum TokenValue<'a> {
None,
Number(f64),
BigInt(BigUint),
String(Atom),
String(&'a str),
RegExp(RegExp),
}
@ -54,13 +54,13 @@ pub struct RegExp {
pub flags: RegExpFlags,
}
impl Default for TokenValue {
impl<'a> Default for TokenValue<'a> {
fn default() -> Self {
Self::None
}
}
impl TokenValue {
impl<'a> TokenValue<'a> {
#[must_use]
pub fn as_number(&self) -> f64 {
match self {
@ -86,7 +86,7 @@ impl TokenValue {
}
#[must_use]
pub fn get_atom(&self) -> Option<&Atom> {
pub fn get_string(&self) -> Option<&str> {
match self {
Self::String(s) => Some(s),
_ => None,

View file

@ -45,7 +45,7 @@ pub struct Parser<'a> {
errors: Diagnostics,
/// The current parsing token
token: Token,
token: Token<'a>,
/// The end range of the previous token
prev_token_end: u32,

View file

@ -288,7 +288,7 @@ impl<'a> Parser<'a> {
let mut operator = None;
if !self.at(Kind::Str) {
if let Some(atom) = self.cur_atom() {
if let Some(atom) = self.cur_string() {
operator = TSTypeOperator::from_src(atom);
}
}