mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
perf(lexer): jump table (#779)
This commit is contained in:
parent
b2db6f7ef1
commit
c8a215ea5e
3 changed files with 566 additions and 368 deletions
|
|
@ -1,132 +0,0 @@
|
|||
use super::Kind;
|
||||
|
||||
pub const SINGLE_CHAR_TOKENS: &[Kind; 128] = &[
|
||||
/* 0 */ Kind::Undetermined,
|
||||
/* 1 */ Kind::Undetermined,
|
||||
/* 2 */ Kind::Undetermined,
|
||||
/* 3 */ Kind::Undetermined,
|
||||
/* 4 */ Kind::Undetermined,
|
||||
/* 5 */ Kind::Undetermined,
|
||||
/* 6 */ Kind::Undetermined,
|
||||
/* 7 */ Kind::Undetermined,
|
||||
/* 8 */ Kind::Undetermined,
|
||||
/* 9 */ Kind::Undetermined,
|
||||
/* 10 */ Kind::Undetermined,
|
||||
/* 11 */ Kind::Undetermined,
|
||||
/* 12 */ Kind::Undetermined,
|
||||
/* 13 */ Kind::Undetermined,
|
||||
/* 14 */ Kind::Undetermined,
|
||||
/* 15 */ Kind::Undetermined,
|
||||
/* 16 */ Kind::Undetermined,
|
||||
/* 17 */ Kind::Undetermined,
|
||||
/* 18 */ Kind::Undetermined,
|
||||
/* 19 */ Kind::Undetermined,
|
||||
/* 20 */ Kind::Undetermined,
|
||||
/* 21 */ Kind::Undetermined,
|
||||
/* 22 */ Kind::Undetermined,
|
||||
/* 23 */ Kind::Undetermined,
|
||||
/* 24 */ Kind::Undetermined,
|
||||
/* 25 */ Kind::Undetermined,
|
||||
/* 26 */ Kind::Undetermined,
|
||||
/* 27 */ Kind::Undetermined,
|
||||
/* 28 */ Kind::Undetermined,
|
||||
/* 29 */ Kind::Undetermined,
|
||||
/* 30 */ Kind::Undetermined,
|
||||
/* 31 */ Kind::Undetermined,
|
||||
/* 32 */ Kind::Undetermined,
|
||||
/* 33 */ Kind::Undetermined,
|
||||
/* 34 */ Kind::Undetermined,
|
||||
/* 35 */ Kind::Undetermined,
|
||||
/* 36 */ Kind::Undetermined,
|
||||
/* 37 */ Kind::Undetermined,
|
||||
/* 38 */ Kind::Undetermined,
|
||||
/* 39 */ Kind::Undetermined,
|
||||
/* 40 */ Kind::LParen, // 0x28
|
||||
/* 41 */ Kind::RParen, // 0x29
|
||||
/* 42 */ Kind::Undetermined,
|
||||
/* 43 */ Kind::Undetermined,
|
||||
/* 44 */ Kind::Comma, // 0x2C
|
||||
/* 45 */ Kind::Undetermined,
|
||||
/* 46 */ Kind::Undetermined,
|
||||
/* 47 */ Kind::Undetermined,
|
||||
/* 48 */ Kind::Undetermined,
|
||||
/* 49 */ Kind::Undetermined,
|
||||
/* 50 */ Kind::Undetermined,
|
||||
/* 51 */ Kind::Undetermined,
|
||||
/* 52 */ Kind::Undetermined,
|
||||
/* 53 */ Kind::Undetermined,
|
||||
/* 54 */ Kind::Undetermined,
|
||||
/* 55 */ Kind::Undetermined,
|
||||
/* 56 */ Kind::Undetermined,
|
||||
/* 57 */ Kind::Undetermined,
|
||||
/* 58 */ Kind::Colon, // 0x3A
|
||||
/* 59 */ Kind::Semicolon, // 0x3B
|
||||
/* 60 */ Kind::Undetermined,
|
||||
/* 61 */ Kind::Undetermined,
|
||||
/* 62 */ Kind::RAngle, // 0x3E
|
||||
/* 63 */ Kind::Undetermined,
|
||||
/* 64 */ Kind::At,
|
||||
/* 65 */ Kind::Undetermined,
|
||||
/* 66 */ Kind::Undetermined,
|
||||
/* 67 */ Kind::Undetermined,
|
||||
/* 68 */ Kind::Undetermined,
|
||||
/* 69 */ Kind::Undetermined,
|
||||
/* 70 */ Kind::Undetermined,
|
||||
/* 71 */ Kind::Undetermined,
|
||||
/* 72 */ Kind::Undetermined,
|
||||
/* 73 */ Kind::Undetermined,
|
||||
/* 74 */ Kind::Undetermined,
|
||||
/* 75 */ Kind::Undetermined,
|
||||
/* 76 */ Kind::Undetermined,
|
||||
/* 77 */ Kind::Undetermined,
|
||||
/* 78 */ Kind::Undetermined,
|
||||
/* 79 */ Kind::Undetermined,
|
||||
/* 80 */ Kind::Undetermined,
|
||||
/* 81 */ Kind::Undetermined,
|
||||
/* 82 */ Kind::Undetermined,
|
||||
/* 83 */ Kind::Undetermined,
|
||||
/* 84 */ Kind::Undetermined,
|
||||
/* 85 */ Kind::Undetermined,
|
||||
/* 86 */ Kind::Undetermined,
|
||||
/* 87 */ Kind::Undetermined,
|
||||
/* 88 */ Kind::Undetermined,
|
||||
/* 89 */ Kind::Undetermined,
|
||||
/* 90 */ Kind::Undetermined,
|
||||
/* 91 */ Kind::LBrack, // 0x5B
|
||||
/* 92 */ Kind::Undetermined,
|
||||
/* 93 */ Kind::RBrack, // 0x5D
|
||||
/* 94 */ Kind::Undetermined,
|
||||
/* 95 */ Kind::Undetermined,
|
||||
/* 96 */ Kind::Undetermined,
|
||||
/* 97 */ Kind::Undetermined,
|
||||
/* 98 */ Kind::Undetermined,
|
||||
/* 99 */ Kind::Undetermined,
|
||||
/* 100 */ Kind::Undetermined,
|
||||
/* 101 */ Kind::Undetermined,
|
||||
/* 102 */ Kind::Undetermined,
|
||||
/* 103 */ Kind::Undetermined,
|
||||
/* 104 */ Kind::Undetermined,
|
||||
/* 105 */ Kind::Undetermined,
|
||||
/* 106 */ Kind::Undetermined,
|
||||
/* 107 */ Kind::Undetermined,
|
||||
/* 108 */ Kind::Undetermined,
|
||||
/* 109 */ Kind::Undetermined,
|
||||
/* 110 */ Kind::Undetermined,
|
||||
/* 111 */ Kind::Undetermined,
|
||||
/* 112 */ Kind::Undetermined,
|
||||
/* 113 */ Kind::Undetermined,
|
||||
/* 114 */ Kind::Undetermined,
|
||||
/* 115 */ Kind::Undetermined,
|
||||
/* 116 */ Kind::Undetermined,
|
||||
/* 117 */ Kind::Undetermined,
|
||||
/* 118 */ Kind::Undetermined,
|
||||
/* 119 */ Kind::Undetermined,
|
||||
/* 120 */ Kind::Undetermined,
|
||||
/* 121 */ Kind::Undetermined,
|
||||
/* 122 */ Kind::Undetermined,
|
||||
/* 123 */ Kind::LCurly, // 0x7B
|
||||
/* 124 */ Kind::Undetermined,
|
||||
/* 125 */ Kind::RCurly, // 0x7D
|
||||
/* 126 */ Kind::Tilde, // 0x7E
|
||||
/* 127 */ Kind::Undetermined,
|
||||
];
|
||||
|
|
@ -201,10 +201,6 @@ impl Kind {
|
|||
matches!(self, Eof)
|
||||
}
|
||||
|
||||
pub fn is_trivia(self) -> bool {
|
||||
matches!(self, WhiteSpace | NewLine | Comment | MultiLineComment)
|
||||
}
|
||||
|
||||
pub fn is_number(self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
//! * [rustc](https://github.com/rust-lang/rust/blob/master/compiler/rustc_lexer/src)
|
||||
//! * [v8](https://v8.dev/blog/scanner)
|
||||
|
||||
mod constants;
|
||||
mod kind;
|
||||
mod number;
|
||||
mod string_builder;
|
||||
|
|
@ -20,9 +19,8 @@ use oxc_diagnostics::Error;
|
|||
use oxc_span::{SourceType, Span};
|
||||
use oxc_syntax::{
|
||||
identifier::{
|
||||
is_identifier_part, is_identifier_start_all, is_identifier_start_ascii,
|
||||
is_irregular_line_terminator, is_irregular_whitespace, is_line_terminator, CR, EOF, FF, LF,
|
||||
LS, PS, TAB, VT,
|
||||
is_identifier_part, is_identifier_start_all, is_irregular_line_terminator,
|
||||
is_irregular_whitespace, is_line_terminator, CR, EOF, FF, LF, LS, PS, TAB, VT,
|
||||
},
|
||||
unicode_id_start::is_id_start_unicode,
|
||||
};
|
||||
|
|
@ -30,7 +28,6 @@ pub use token::{RegExp, Token, TokenValue};
|
|||
|
||||
pub use self::kind::Kind;
|
||||
use self::{
|
||||
constants::SINGLE_CHAR_TOKENS,
|
||||
number::{parse_big_int, parse_float, parse_int},
|
||||
string_builder::AutoCow,
|
||||
trivia_builder::TriviaBuilder,
|
||||
|
|
@ -249,6 +246,12 @@ impl<'a> Lexer<'a> {
|
|||
Span::new(self.current.token.start, self.offset())
|
||||
}
|
||||
|
||||
/// Consume the current char
|
||||
#[inline]
|
||||
fn consume_char(&mut self) -> char {
|
||||
self.current.chars.next().unwrap()
|
||||
}
|
||||
|
||||
/// Peek the next char without advancing the position
|
||||
#[inline]
|
||||
fn peek(&self) -> char {
|
||||
|
|
@ -327,11 +330,13 @@ impl<'a> Lexer<'a> {
|
|||
loop {
|
||||
let offset = self.offset();
|
||||
self.current.token.start = offset;
|
||||
let builder = AutoCow::new(self);
|
||||
|
||||
if let Some(c) = self.current.chars.next() {
|
||||
let kind = self.match_char(c, builder);
|
||||
if !kind.is_trivia() {
|
||||
if let Some(c) = self.current.chars.clone().next() {
|
||||
let kind = self.match_char(c);
|
||||
if !matches!(
|
||||
kind,
|
||||
Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
|
||||
) {
|
||||
return kind;
|
||||
}
|
||||
} else {
|
||||
|
|
@ -340,111 +345,33 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_lines)]
|
||||
fn match_char(&mut self, c: char, mut builder: AutoCow<'a>) -> Kind {
|
||||
// fast path for single character tokens
|
||||
// '{' '}' '(' ')' '[' ']' ';' ',' ':' '~'
|
||||
#[inline]
|
||||
fn match_char(&mut self, c: char) -> Kind {
|
||||
let size = c as usize;
|
||||
|
||||
if size < 128 {
|
||||
let kind = SINGLE_CHAR_TOKENS[size];
|
||||
if kind != Kind::Undetermined {
|
||||
return kind;
|
||||
}
|
||||
// fast path for identifiers
|
||||
if is_identifier_start_ascii(c) {
|
||||
builder.push_matching(c);
|
||||
return self.identifier_name_or_keyword(builder);
|
||||
}
|
||||
return BYTE_HANDLERS[size](self);
|
||||
}
|
||||
// NOTE: matching order is significant here, by real world occurrences
|
||||
// see https://blog.mozilla.org/nnethercote/2011/07/01/faster-javascript-parsing/
|
||||
// > the rough order of frequency for different token kinds is as follows:
|
||||
// identifiers/keywords, ‘.’, ‘=’, strings, decimal numbers, ‘:’, ‘+’, hex/octal numbers, and then everything else
|
||||
|
||||
match c {
|
||||
' ' | '\t' => Kind::WhiteSpace,
|
||||
'\r' | '\n' => {
|
||||
self.current.token.is_on_new_line = true;
|
||||
Kind::NewLine
|
||||
}
|
||||
'.' => {
|
||||
let kind = self.read_dot(&mut builder);
|
||||
if kind.is_number() {
|
||||
self.set_numeric_value(kind, builder.finish(self));
|
||||
}
|
||||
kind
|
||||
}
|
||||
'=' => self.read_equal(),
|
||||
'"' | '\'' => {
|
||||
if self.context == LexerContext::JsxAttributeValue {
|
||||
self.read_jsx_string_literal(c)
|
||||
} else {
|
||||
self.read_string_literal(c)
|
||||
}
|
||||
}
|
||||
'1'..='9' => {
|
||||
let kind = self.decimal_literal_after_first_digit(&mut builder);
|
||||
self.set_numeric_value(kind, builder.finish(self));
|
||||
kind
|
||||
}
|
||||
'+' => self.read_plus(),
|
||||
'-' => self.read_minus().map_or_else(|| self.skip_single_line_comment(), |kind| kind),
|
||||
'0' => {
|
||||
let kind = self.read_zero(&mut builder);
|
||||
self.set_numeric_value(kind, builder.finish(self));
|
||||
kind
|
||||
}
|
||||
'/' => {
|
||||
match self.peek() {
|
||||
'/' => {
|
||||
self.current.chars.next();
|
||||
self.skip_single_line_comment()
|
||||
}
|
||||
'*' => {
|
||||
self.current.chars.next();
|
||||
self.skip_multi_line_comment()
|
||||
}
|
||||
_ => {
|
||||
// regex is handled separately, see `next_regex`
|
||||
self.read_slash()
|
||||
}
|
||||
}
|
||||
}
|
||||
'`' => self.read_template_literal(Kind::TemplateHead, Kind::NoSubstitutionTemplate),
|
||||
'!' => self.read_exclamation(),
|
||||
'%' => self.read_percent(),
|
||||
'*' => self.read_star(),
|
||||
'&' => self.read_ampersand(),
|
||||
'|' => self.read_pipe(),
|
||||
'?' => self.read_question(),
|
||||
'<' => {
|
||||
self.read_left_angle().map_or_else(|| self.skip_single_line_comment(), |kind| kind)
|
||||
}
|
||||
'^' => self.read_caret(),
|
||||
'#' => {
|
||||
// HashbangComment ::
|
||||
// `#!` SingleLineCommentChars?
|
||||
if self.current.token.start == 0 && self.next_eq('!') {
|
||||
self.read_hashbang_comment()
|
||||
} else {
|
||||
builder.get_mut_string_without_current_ascii_char(self);
|
||||
self.private_identifier(builder)
|
||||
}
|
||||
}
|
||||
'\\' => {
|
||||
builder.force_allocation_without_current_ascii_char(self);
|
||||
self.identifier_unicode_escape_sequence(&mut builder, true);
|
||||
self.identifier_name_or_keyword(builder)
|
||||
}
|
||||
c if is_id_start_unicode(c) => {
|
||||
let mut builder = AutoCow::new(self);
|
||||
let c = self.consume_char();
|
||||
builder.push_matching(c);
|
||||
self.identifier_name_or_keyword(builder)
|
||||
self.identifier_name(builder);
|
||||
Kind::Ident
|
||||
}
|
||||
c if is_irregular_whitespace(c) => {
|
||||
self.consume_char();
|
||||
Kind::WhiteSpace
|
||||
}
|
||||
c if is_irregular_whitespace(c) => Kind::WhiteSpace,
|
||||
c if is_irregular_line_terminator(c) => {
|
||||
self.consume_char();
|
||||
self.current.token.is_on_new_line = true;
|
||||
Kind::NewLine
|
||||
}
|
||||
_ => {
|
||||
self.consume_char();
|
||||
self.error(diagnostics::InvalidCharacter(c, self.unterminated_range()));
|
||||
Kind::Undetermined
|
||||
}
|
||||
|
|
@ -490,12 +417,13 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
|
||||
/// Section 12.6.1 Identifier Names
|
||||
fn identifier_name(&mut self, mut builder: AutoCow<'a>) -> (bool, &'a str) {
|
||||
fn identifier_tail(&mut self, mut builder: AutoCow<'a>) -> (bool, &'a str) {
|
||||
// ident tail
|
||||
loop {
|
||||
let c = self.peek();
|
||||
if !is_identifier_part(c) {
|
||||
if self.next_eq('\\') {
|
||||
if c == '\\' {
|
||||
self.current.chars.next();
|
||||
builder.force_allocation_without_current_ascii_char(self);
|
||||
self.identifier_unicode_escape_sequence(&mut builder, false);
|
||||
continue;
|
||||
|
|
@ -506,16 +434,21 @@ impl<'a> Lexer<'a> {
|
|||
builder.push_matching(c);
|
||||
}
|
||||
let has_escape = builder.has_escape();
|
||||
|
||||
(has_escape, builder.finish(self))
|
||||
}
|
||||
|
||||
fn identifier_name_or_keyword(&mut self, builder: AutoCow<'a>) -> Kind {
|
||||
let (has_escape, text) = self.identifier_name(builder);
|
||||
let kind = Kind::match_keyword(text);
|
||||
fn identifier_name(&mut self, builder: AutoCow<'a>) -> &'a str {
|
||||
let (has_escape, text) = self.identifier_tail(builder);
|
||||
self.current.token.escaped = has_escape;
|
||||
self.current.token.value = TokenValue::String(text);
|
||||
kind
|
||||
text
|
||||
}
|
||||
|
||||
fn identifier_name_handler(&mut self) -> &'a str {
|
||||
let mut builder = AutoCow::new(self);
|
||||
let c = self.consume_char();
|
||||
builder.push_matching(c);
|
||||
self.identifier_name(builder)
|
||||
}
|
||||
|
||||
/// Section 12.7 Punctuators
|
||||
|
|
@ -574,42 +507,6 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn read_equal(&mut self) -> Kind {
|
||||
if self.next_eq('=') {
|
||||
if self.next_eq('=') {
|
||||
Kind::Eq3
|
||||
} else {
|
||||
Kind::Eq2
|
||||
}
|
||||
} else if self.next_eq('>') {
|
||||
Kind::Arrow
|
||||
} else {
|
||||
Kind::Eq
|
||||
}
|
||||
}
|
||||
|
||||
fn read_exclamation(&mut self) -> Kind {
|
||||
if self.next_eq('=') {
|
||||
if self.next_eq('=') {
|
||||
Kind::Neq2
|
||||
} else {
|
||||
Kind::Neq
|
||||
}
|
||||
} else {
|
||||
Kind::Bang
|
||||
}
|
||||
}
|
||||
|
||||
fn read_plus(&mut self) -> Kind {
|
||||
if self.next_eq('+') {
|
||||
Kind::Plus2
|
||||
} else if self.next_eq('=') {
|
||||
Kind::PlusEq
|
||||
} else {
|
||||
Kind::Plus
|
||||
}
|
||||
}
|
||||
|
||||
/// returns None for `SingleLineHTMLCloseComment` `-->` in script mode
|
||||
fn read_minus(&mut self) -> Option<Kind> {
|
||||
if self.next_eq('-') {
|
||||
|
|
@ -629,92 +526,6 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn read_caret(&mut self) -> Kind {
|
||||
if self.next_eq('=') {
|
||||
Kind::CaretEq
|
||||
} else {
|
||||
Kind::Caret
|
||||
}
|
||||
}
|
||||
|
||||
fn read_percent(&mut self) -> Kind {
|
||||
if self.next_eq('=') {
|
||||
Kind::PercentEq
|
||||
} else {
|
||||
Kind::Percent
|
||||
}
|
||||
}
|
||||
|
||||
fn read_star(&mut self) -> Kind {
|
||||
if self.next_eq('*') {
|
||||
if self.next_eq('=') {
|
||||
Kind::Star2Eq
|
||||
} else {
|
||||
Kind::Star2
|
||||
}
|
||||
} else if self.next_eq('=') {
|
||||
Kind::StarEq
|
||||
} else {
|
||||
Kind::Star
|
||||
}
|
||||
}
|
||||
|
||||
fn read_ampersand(&mut self) -> Kind {
|
||||
if self.next_eq('&') {
|
||||
if self.next_eq('=') {
|
||||
Kind::Amp2Eq
|
||||
} else {
|
||||
Kind::Amp2
|
||||
}
|
||||
} else if self.next_eq('=') {
|
||||
Kind::AmpEq
|
||||
} else {
|
||||
Kind::Amp
|
||||
}
|
||||
}
|
||||
|
||||
fn read_pipe(&mut self) -> Kind {
|
||||
if self.next_eq('|') {
|
||||
if self.next_eq('=') {
|
||||
Kind::Pipe2Eq
|
||||
} else {
|
||||
Kind::Pipe2
|
||||
}
|
||||
} else if self.next_eq('=') {
|
||||
Kind::PipeEq
|
||||
} else {
|
||||
Kind::Pipe
|
||||
}
|
||||
}
|
||||
|
||||
fn read_question(&mut self) -> Kind {
|
||||
if self.next_eq('?') {
|
||||
if self.next_eq('=') {
|
||||
Kind::Question2Eq
|
||||
} else {
|
||||
Kind::Question2
|
||||
}
|
||||
} else if self.peek() == '.' {
|
||||
// parse `?.1` as `?` `.1`
|
||||
if self.peek2().is_ascii_digit() {
|
||||
Kind::Question
|
||||
} else {
|
||||
self.current.chars.next();
|
||||
Kind::QuestionDot
|
||||
}
|
||||
} else {
|
||||
Kind::Question
|
||||
}
|
||||
}
|
||||
|
||||
fn read_slash(&mut self) -> Kind {
|
||||
if self.next_eq('=') {
|
||||
Kind::SlashEq
|
||||
} else {
|
||||
Kind::Slash
|
||||
}
|
||||
}
|
||||
|
||||
fn private_identifier(&mut self, mut builder: AutoCow<'a>) -> Kind {
|
||||
let start = self.offset();
|
||||
match self.current.chars.next() {
|
||||
|
|
@ -738,7 +549,7 @@ impl<'a> Lexer<'a> {
|
|||
return Kind::Undetermined;
|
||||
}
|
||||
}
|
||||
let (_, name) = self.identifier_name(builder);
|
||||
let (_, name) = self.identifier_tail(builder);
|
||||
self.current.token.value = self.string_to_token_value(name);
|
||||
Kind::PrivateIdentifier
|
||||
}
|
||||
|
|
@ -1478,3 +1289,526 @@ enum SurrogatePair {
|
|||
// invalid \u Hex4Digits \u Hex4Digits
|
||||
HighLow(u32, u32),
|
||||
}
|
||||
|
||||
type ByteHandler = fn(&mut Lexer<'_>) -> Kind;
|
||||
|
||||
/// Lookup table mapping any incoming byte to a handler function defined below.
|
||||
/// <https://github.com/ratel-rust/ratel-core/blob/master/ratel/src/lexer/mod.rs>
|
||||
#[rustfmt::skip]
|
||||
static BYTE_HANDLERS: [ByteHandler; 128] = [
|
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F //
|
||||
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, SPS, LIN, SPS, SPS, LIN, ERR, ERR, // 0
|
||||
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1
|
||||
SPS, EXL, QOT, HAS, IDT, PRC, AMP, QOT, PNO, PNC, ATR, PLS, COM, MIN, PRD, SLH, // 2
|
||||
ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, GTR, QST, // 3
|
||||
AT_, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, // 4
|
||||
IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, ESC, BTC, CRT, IDT, // 5
|
||||
TPL, L_A, L_B, L_C, L_D, L_E, L_F, L_G, IDT, L_I, IDT, L_K, L_L, L_M, L_N, L_O, // 6
|
||||
L_P, IDT, L_R, L_S, L_T, L_U, L_V, L_W, IDT, L_Y, IDT, BEO, PIP, BEC, TLD, ERR, // 7
|
||||
];
|
||||
|
||||
const ERR: ByteHandler = |lexer| {
|
||||
let c = lexer.consume_char();
|
||||
lexer.error(diagnostics::InvalidCharacter(c, lexer.unterminated_range()));
|
||||
Kind::Undetermined
|
||||
};
|
||||
|
||||
// <TAB> <VT> <FF>
|
||||
const SPS: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::WhiteSpace
|
||||
};
|
||||
|
||||
// '\r' '\n'
|
||||
const LIN: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
lexer.current.token.is_on_new_line = true;
|
||||
Kind::NewLine
|
||||
};
|
||||
|
||||
// !
|
||||
const EXL: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
if lexer.next_eq('=') {
|
||||
if lexer.next_eq('=') {
|
||||
Kind::Neq2
|
||||
} else {
|
||||
Kind::Neq
|
||||
}
|
||||
} else {
|
||||
Kind::Bang
|
||||
}
|
||||
};
|
||||
|
||||
// ' "
|
||||
const QOT: ByteHandler = |lexer| {
|
||||
let c = lexer.consume_char();
|
||||
if lexer.context == LexerContext::JsxAttributeValue {
|
||||
lexer.read_jsx_string_literal(c)
|
||||
} else {
|
||||
lexer.read_string_literal(c)
|
||||
}
|
||||
};
|
||||
|
||||
// #
|
||||
const HAS: ByteHandler = |lexer| {
|
||||
let mut builder = AutoCow::new(lexer);
|
||||
let c = lexer.consume_char();
|
||||
builder.push_matching(c);
|
||||
// HashbangComment ::
|
||||
// `#!` SingleLineCommentChars?
|
||||
if lexer.current.token.start == 0 && lexer.next_eq('!') {
|
||||
lexer.read_hashbang_comment()
|
||||
} else {
|
||||
builder.get_mut_string_without_current_ascii_char(lexer);
|
||||
lexer.private_identifier(builder)
|
||||
}
|
||||
};
|
||||
|
||||
const IDT: ByteHandler = |lexer| {
|
||||
lexer.identifier_name_handler();
|
||||
Kind::Ident
|
||||
};
|
||||
|
||||
// %
|
||||
const PRC: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
if lexer.next_eq('=') {
|
||||
Kind::PercentEq
|
||||
} else {
|
||||
Kind::Percent
|
||||
}
|
||||
};
|
||||
|
||||
// &
|
||||
const AMP: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
if lexer.next_eq('&') {
|
||||
if lexer.next_eq('=') {
|
||||
Kind::Amp2Eq
|
||||
} else {
|
||||
Kind::Amp2
|
||||
}
|
||||
} else if lexer.next_eq('=') {
|
||||
Kind::AmpEq
|
||||
} else {
|
||||
Kind::Amp
|
||||
}
|
||||
};
|
||||
|
||||
// (
|
||||
const PNO: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::LParen
|
||||
};
|
||||
|
||||
// )
|
||||
const PNC: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::RParen
|
||||
};
|
||||
|
||||
// *
|
||||
const ATR: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
if lexer.next_eq('*') {
|
||||
if lexer.next_eq('=') {
|
||||
Kind::Star2Eq
|
||||
} else {
|
||||
Kind::Star2
|
||||
}
|
||||
} else if lexer.next_eq('=') {
|
||||
Kind::StarEq
|
||||
} else {
|
||||
Kind::Star
|
||||
}
|
||||
};
|
||||
|
||||
// +
|
||||
const PLS: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
if lexer.next_eq('+') {
|
||||
Kind::Plus2
|
||||
} else if lexer.next_eq('=') {
|
||||
Kind::PlusEq
|
||||
} else {
|
||||
Kind::Plus
|
||||
}
|
||||
};
|
||||
|
||||
// ,
|
||||
const COM: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::Comma
|
||||
};
|
||||
|
||||
// -
|
||||
const MIN: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
lexer.read_minus().unwrap_or_else(|| lexer.skip_single_line_comment())
|
||||
};
|
||||
|
||||
// .
|
||||
const PRD: ByteHandler = |lexer| {
|
||||
let mut builder = AutoCow::new(lexer);
|
||||
let c = lexer.consume_char();
|
||||
builder.push_matching(c);
|
||||
let kind = lexer.read_dot(&mut builder);
|
||||
if kind.is_number() {
|
||||
lexer.set_numeric_value(kind, builder.finish(lexer));
|
||||
}
|
||||
kind
|
||||
};
|
||||
|
||||
// /
|
||||
const SLH: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
match lexer.peek() {
|
||||
'/' => {
|
||||
lexer.current.chars.next();
|
||||
lexer.skip_single_line_comment()
|
||||
}
|
||||
'*' => {
|
||||
lexer.current.chars.next();
|
||||
lexer.skip_multi_line_comment()
|
||||
}
|
||||
_ => {
|
||||
// regex is handled separately, see `next_regex`
|
||||
if lexer.next_eq('=') {
|
||||
Kind::SlashEq
|
||||
} else {
|
||||
Kind::Slash
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// 0
|
||||
const ZER: ByteHandler = |lexer| {
|
||||
let mut builder = AutoCow::new(lexer);
|
||||
let c = lexer.consume_char();
|
||||
builder.push_matching(c);
|
||||
let kind = lexer.read_zero(&mut builder);
|
||||
lexer.set_numeric_value(kind, builder.finish(lexer));
|
||||
kind
|
||||
};
|
||||
|
||||
// 1 to 9
|
||||
const DIG: ByteHandler = |lexer| {
|
||||
let mut builder = AutoCow::new(lexer);
|
||||
let c = lexer.consume_char();
|
||||
builder.push_matching(c);
|
||||
let kind = lexer.decimal_literal_after_first_digit(&mut builder);
|
||||
lexer.set_numeric_value(kind, builder.finish(lexer));
|
||||
kind
|
||||
};
|
||||
|
||||
// :
|
||||
const COL: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::Colon
|
||||
};
|
||||
|
||||
// ;
|
||||
const SEM: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::Semicolon
|
||||
};
|
||||
|
||||
// <
|
||||
const LSS: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
lexer.read_left_angle().unwrap_or_else(|| lexer.skip_single_line_comment())
|
||||
};
|
||||
|
||||
// =
|
||||
const EQL: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
if lexer.next_eq('=') {
|
||||
if lexer.next_eq('=') {
|
||||
Kind::Eq3
|
||||
} else {
|
||||
Kind::Eq2
|
||||
}
|
||||
} else if lexer.next_eq('>') {
|
||||
Kind::Arrow
|
||||
} else {
|
||||
Kind::Eq
|
||||
}
|
||||
};
|
||||
|
||||
// >
|
||||
const GTR: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
// `>=` is re-lexed with [Lexer::next_jsx_child]
|
||||
Kind::RAngle
|
||||
};
|
||||
|
||||
// ?
|
||||
const QST: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
if lexer.next_eq('?') {
|
||||
if lexer.next_eq('=') {
|
||||
Kind::Question2Eq
|
||||
} else {
|
||||
Kind::Question2
|
||||
}
|
||||
} else if lexer.peek() == '.' {
|
||||
// parse `?.1` as `?` `.1`
|
||||
if lexer.peek2().is_ascii_digit() {
|
||||
Kind::Question
|
||||
} else {
|
||||
lexer.current.chars.next();
|
||||
Kind::QuestionDot
|
||||
}
|
||||
} else {
|
||||
Kind::Question
|
||||
}
|
||||
};
|
||||
|
||||
// @
|
||||
const AT_: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::At
|
||||
};
|
||||
|
||||
// [
|
||||
const BTO: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::LBrack
|
||||
};
|
||||
|
||||
// \
|
||||
const ESC: ByteHandler = |lexer| {
|
||||
let mut builder = AutoCow::new(lexer);
|
||||
let c = lexer.consume_char();
|
||||
builder.push_matching(c);
|
||||
builder.force_allocation_without_current_ascii_char(lexer);
|
||||
lexer.identifier_unicode_escape_sequence(&mut builder, true);
|
||||
let text = lexer.identifier_name(builder);
|
||||
Kind::match_keyword(text)
|
||||
};
|
||||
|
||||
// ]
|
||||
const BTC: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::RBrack
|
||||
};
|
||||
|
||||
// ^
|
||||
const CRT: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
if lexer.next_eq('=') {
|
||||
Kind::CaretEq
|
||||
} else {
|
||||
Kind::Caret
|
||||
}
|
||||
};
|
||||
|
||||
// `
|
||||
const TPL: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
lexer.read_template_literal(Kind::TemplateHead, Kind::NoSubstitutionTemplate)
|
||||
};
|
||||
|
||||
// {
|
||||
const BEO: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::LCurly
|
||||
};
|
||||
|
||||
// |
|
||||
const PIP: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
if lexer.next_eq('|') {
|
||||
if lexer.next_eq('=') {
|
||||
Kind::Pipe2Eq
|
||||
} else {
|
||||
Kind::Pipe2
|
||||
}
|
||||
} else if lexer.next_eq('=') {
|
||||
Kind::PipeEq
|
||||
} else {
|
||||
Kind::Pipe
|
||||
}
|
||||
};
|
||||
|
||||
// }
|
||||
const BEC: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::RCurly
|
||||
};
|
||||
|
||||
// ~
|
||||
const TLD: ByteHandler = |lexer| {
|
||||
lexer.consume_char();
|
||||
Kind::Tilde
|
||||
};
|
||||
|
||||
const L_A: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"abstract" => Kind::Abstract,
|
||||
"accessor" => Kind::Accessor,
|
||||
"any" => Kind::Any,
|
||||
"as" => Kind::As,
|
||||
"assert" => Kind::Assert,
|
||||
"asserts" => Kind::Asserts,
|
||||
"async" => Kind::Async,
|
||||
"await" => Kind::Await,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_B: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"bigint" => Kind::BigInt,
|
||||
"boolean" => Kind::Boolean,
|
||||
"break" => Kind::Break,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_C: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"case" => Kind::Case,
|
||||
"catch" => Kind::Catch,
|
||||
"class" => Kind::Class,
|
||||
"const" => Kind::Const,
|
||||
"constructor" => Kind::Constructor,
|
||||
"continue" => Kind::Continue,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_D: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"debugger" => Kind::Debugger,
|
||||
"declare" => Kind::Declare,
|
||||
"default" => Kind::Default,
|
||||
"delete" => Kind::Delete,
|
||||
"do" => Kind::Do,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_E: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"else" => Kind::Else,
|
||||
"enum" => Kind::Enum,
|
||||
"export" => Kind::Export,
|
||||
"extends" => Kind::Extends,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_F: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"false" => Kind::False,
|
||||
"finally" => Kind::Finally,
|
||||
"for" => Kind::For,
|
||||
"from" => Kind::From,
|
||||
"function" => Kind::Function,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_G: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"get" => Kind::Get,
|
||||
"global" => Kind::Global,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_I: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"if" => Kind::If,
|
||||
"implements" => Kind::Implements,
|
||||
"import" => Kind::Import,
|
||||
"in" => Kind::In,
|
||||
"infer" => Kind::Infer,
|
||||
"instanceof" => Kind::Instanceof,
|
||||
"interface" => Kind::Interface,
|
||||
"intrinsic" => Kind::Intrinsic,
|
||||
"is" => Kind::Is,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_K: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"keyof" => Kind::KeyOf,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_L: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"let" => Kind::Let,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_M: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"meta" => Kind::Meta,
|
||||
"module" => Kind::Module,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_N: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"namespace" => Kind::Namespace,
|
||||
"never" => Kind::Never,
|
||||
"new" => Kind::New,
|
||||
"null" => Kind::Null,
|
||||
"number" => Kind::Number,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_O: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"object" => Kind::Object,
|
||||
"of" => Kind::Of,
|
||||
"out" => Kind::Out,
|
||||
"override" => Kind::Override,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_P: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"package" => Kind::Package,
|
||||
"private" => Kind::Private,
|
||||
"protected" => Kind::Protected,
|
||||
"public" => Kind::Public,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_R: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"readonly" => Kind::Readonly,
|
||||
"require" => Kind::Require,
|
||||
"return" => Kind::Return,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_S: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"satisfies" => Kind::Satisfies,
|
||||
"set" => Kind::Set,
|
||||
"static" => Kind::Static,
|
||||
"string" => Kind::String,
|
||||
"super" => Kind::Super,
|
||||
"switch" => Kind::Switch,
|
||||
"symbol" => Kind::Symbol,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_T: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"target" => Kind::Target,
|
||||
"this" => Kind::This,
|
||||
"throw" => Kind::Throw,
|
||||
"true" => Kind::True,
|
||||
"try" => Kind::Try,
|
||||
"type" => Kind::Type,
|
||||
"typeof" => Kind::Typeof,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_U: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"undefined" => Kind::Undefined,
|
||||
"unique" => Kind::Unique,
|
||||
"unknown" => Kind::Unknown,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_V: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"var" => Kind::Var,
|
||||
"void" => Kind::Void,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_W: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"while" => Kind::While,
|
||||
"with" => Kind::With,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
||||
const L_Y: ByteHandler = |lexer| match lexer.identifier_name_handler() {
|
||||
"yield" => Kind::Yield,
|
||||
_ => Kind::Ident,
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in a new issue