mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
refactor(parser): macro for ASCII byte handlers (#2066)
As discussed on #2046, it wasn't ideal to have `unsafe { lexer.consume_ascii_char() }` in every byte handler. It also wasn't great to have a safe function `consume_ascii_char()` which could cause UB if called incorrectly (so wasn't really safe at all). This PR achieves the same objective of #2046, but using a macro to define byte handlers for ASCII chars, which builds in the assertion that next char is guaranteed to be ASCII. Before #2046: ```rs const SPS: ByteHandler = |lexer| { lexer.consume_char(); Kind::WhiteSpace }; ``` After this PR: ```rs ascii_byte_handler!(SPS(lexer) { lexer.consume_char(); Kind::WhiteSpace }); ``` i.e. The body of the handlers are unchanged from how they were before https://github.com/oxc-project/oxc/pull/2046. This expands to: ```rs const SPS: ByteHandler = |lexer| { unsafe { let s = lexer.current.chars.as_str(); assert_unchecked!(!s.is_empty()); assert_unchecked!(s.as_bytes()[0] < 128); } lexer.consume_char(); Kind::WhiteSpace }; ``` But due to the assertions the macro inserts, `consume_char()` is now optimized for ASCII characters, and reduces to a single instruction. So the `consume_ascii_char()` function introduced by #2046 is unnecessary, and can be removed again. The "boundary of unsafe" is moved to a new function `handle_byte()` which `read_next_token()` calls. `read_next_token()` is responsible for upholding the safety invariants, which include ensuring that `ascii_byte_handler!()` macro is not being misused (that last part is strictly speaking a bit of a cheat, but...). I am not a fan of macros, as they're not great for readability. But in this case I don't think it's *too* bad, because: 1. The macro is well-documented. 2. It's not too clever (only one syntax is accepted). 3. It's used repetitively in a clear pattern, and once you've understood one, you understand them all. What do you think? Does this strike a reasonable balance between readability and safety?
This commit is contained in:
parent
18a58d472b
commit
8d5f5b8a49
1 changed files with 217 additions and 201 deletions
|
|
@ -11,7 +11,6 @@ mod string_builder;
|
||||||
mod token;
|
mod token;
|
||||||
mod trivia_builder;
|
mod trivia_builder;
|
||||||
|
|
||||||
use assert_unchecked::assert_unchecked;
|
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::FxHashMap;
|
||||||
use std::{collections::VecDeque, str::Chars};
|
use std::{collections::VecDeque, str::Chars};
|
||||||
|
|
||||||
|
|
@ -271,20 +270,6 @@ impl<'a> Lexer<'a> {
|
||||||
self.current.chars.next().unwrap()
|
self.current.chars.next().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Consume the current char when it's known to be ASCII.
|
|
||||||
/// This compiles down to a single instruction, just incrementing `chars` iterator's pointer.
|
|
||||||
/// NOTE: Caller must ensure not at EOF and current char is ASCII.
|
|
||||||
#[inline]
|
|
||||||
fn consume_ascii_char(&mut self) -> char {
|
|
||||||
let s = self.current.chars.as_str();
|
|
||||||
// SAFETY: Caller must ensure not at EOF and current char is ASCII.
|
|
||||||
unsafe {
|
|
||||||
assert_unchecked!(!s.is_empty());
|
|
||||||
assert_unchecked!(s.as_bytes()[0] < 128);
|
|
||||||
}
|
|
||||||
self.current.chars.next().unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peek the next char without advancing the position
|
/// Peek the next char without advancing the position
|
||||||
#[inline]
|
#[inline]
|
||||||
fn peek(&self) -> Option<char> {
|
fn peek(&self) -> Option<char> {
|
||||||
|
|
@ -395,7 +380,9 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let byte = remaining.as_bytes()[0];
|
let byte = remaining.as_bytes()[0];
|
||||||
let kind = BYTE_HANDLERS[byte as usize](self);
|
// SAFETY: Check for `remaining.is_empty()` ensures not at end of file,
|
||||||
|
// and `byte` is the byte at current position of `self.current.chars`.
|
||||||
|
let kind = unsafe { handle_byte(byte, self) };
|
||||||
|
|
||||||
if !matches!(
|
if !matches!(
|
||||||
kind,
|
kind,
|
||||||
|
|
@ -1307,6 +1294,17 @@ enum SurrogatePair {
|
||||||
HighLow(u32, u32),
|
HighLow(u32, u32),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::unnecessary_safety_comment)]
|
||||||
|
/// Handle next byte of source.
|
||||||
|
/// SAFETY:
|
||||||
|
/// * Lexer must not be at end of file.
|
||||||
|
/// * `byte` must be next byte of source code, corresponding to current position
|
||||||
|
/// of `lexer.current.chars`.
|
||||||
|
/// * Only `BYTE_HANDLERS` for ASCII characters may use the `ascii_byte_handler!()` macro.
|
||||||
|
unsafe fn handle_byte(byte: u8, lexer: &mut Lexer) -> Kind {
|
||||||
|
BYTE_HANDLERS[byte as usize](lexer)
|
||||||
|
}
|
||||||
|
|
||||||
type ByteHandler = fn(&mut Lexer<'_>) -> Kind;
|
type ByteHandler = fn(&mut Lexer<'_>) -> Kind;
|
||||||
|
|
||||||
/// Lookup table mapping any incoming byte to a handler function defined below.
|
/// Lookup table mapping any incoming byte to a handler function defined below.
|
||||||
|
|
@ -1332,33 +1330,82 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [
|
||||||
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // F
|
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // F
|
||||||
];
|
];
|
||||||
|
|
||||||
|
#[allow(clippy::unnecessary_safety_comment)]
|
||||||
|
/// Macro for defining byte handler for an ASCII character.
|
||||||
|
///
|
||||||
|
/// In addition to defining a `const` for the handler, it also asserts that lexer
|
||||||
|
/// is not at end of file, and that next char is ASCII.
|
||||||
|
/// Where the handler is for an ASCII character, these assertions are self-evidently true.
|
||||||
|
///
|
||||||
|
/// These assertions produce no runtime code, but hint to the compiler that it can assume that
|
||||||
|
/// next char is ASCII, and it uses that information to optimize the rest of the handler.
|
||||||
|
/// e.g. `lexer.current.chars.next()` becomes just a single assembler instruction.
|
||||||
|
/// Without the assertions, the compiler is unable to deduce the next char is ASCII, due to
|
||||||
|
/// the indirection of the `BYTE_HANDLERS` jump table.
|
||||||
|
///
|
||||||
|
/// These assertions are unchecked (i.e. won't panic) and will cause UB if they're incorrect.
|
||||||
|
///
|
||||||
|
/// SAFETY: Only use this macro to define byte handlers for ASCII characters.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// ascii_byte_handler!(SPS(lexer) {
|
||||||
|
/// lexer.consume_char();
|
||||||
|
/// Kind::WhiteSpace
|
||||||
|
/// });
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// expands to:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// const SPS: ByteHandler = |lexer| {
|
||||||
|
/// unsafe {
|
||||||
|
/// use ::assert_unchecked::assert_unchecked;
|
||||||
|
/// let s = lexer.current.chars.as_str();
|
||||||
|
/// assert_unchecked!(!s.is_empty());
|
||||||
|
/// assert_unchecked!(s.as_bytes()[0] < 128);
|
||||||
|
/// }
|
||||||
|
/// lexer.consume_char();
|
||||||
|
/// Kind::WhiteSpace
|
||||||
|
/// };
|
||||||
|
/// ```
|
||||||
|
macro_rules! ascii_byte_handler {
|
||||||
|
($id:ident($lex:ident) $body:expr) => {
|
||||||
|
const $id: ByteHandler = |$lex| {
|
||||||
|
// SAFETY: This macro is only used for ASCII characters
|
||||||
|
unsafe {
|
||||||
|
use ::assert_unchecked::assert_unchecked;
|
||||||
|
let s = $lex.current.chars.as_str();
|
||||||
|
assert_unchecked!(!s.is_empty());
|
||||||
|
assert_unchecked!(s.as_bytes()[0] < 128);
|
||||||
|
}
|
||||||
|
$body
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// `\0` `\1` etc
|
// `\0` `\1` etc
|
||||||
const ERR: ByteHandler = |lexer| {
|
ascii_byte_handler!(ERR(lexer) {
|
||||||
// Next char is an ASCII char e.g. `\0`
|
let c = lexer.consume_char();
|
||||||
let c = lexer.consume_ascii_char();
|
|
||||||
lexer.error(diagnostics::InvalidCharacter(c, lexer.unterminated_range()));
|
lexer.error(diagnostics::InvalidCharacter(c, lexer.unterminated_range()));
|
||||||
Kind::Undetermined
|
Kind::Undetermined
|
||||||
};
|
});
|
||||||
|
|
||||||
// <SPACE> <TAB> <VT> <FF>
|
// <SPACE> <TAB> <VT> <FF>
|
||||||
const SPS: ByteHandler = |lexer| {
|
ascii_byte_handler!(SPS(lexer) {
|
||||||
// Next char is an ASCII space character
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::WhiteSpace
|
Kind::WhiteSpace
|
||||||
};
|
});
|
||||||
|
|
||||||
// '\r' '\n'
|
// '\r' '\n'
|
||||||
const LIN: ByteHandler = |lexer| {
|
ascii_byte_handler!(LIN(lexer) {
|
||||||
// Next char is `\r` or `\n`, which are both ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
lexer.current.token.is_on_new_line = true;
|
lexer.current.token.is_on_new_line = true;
|
||||||
Kind::NewLine
|
Kind::NewLine
|
||||||
};
|
});
|
||||||
|
|
||||||
// !
|
// !
|
||||||
const EXL: ByteHandler = |lexer| {
|
ascii_byte_handler!(EXL(lexer) {
|
||||||
// Next char is `!`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
if lexer.next_eq('=') {
|
if lexer.next_eq('=') {
|
||||||
if lexer.next_eq('=') {
|
if lexer.next_eq('=') {
|
||||||
Kind::Neq2
|
Kind::Neq2
|
||||||
|
|
@ -1368,23 +1415,21 @@ const EXL: ByteHandler = |lexer| {
|
||||||
} else {
|
} else {
|
||||||
Kind::Bang
|
Kind::Bang
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// ' "
|
// ' "
|
||||||
const QOT: ByteHandler = |lexer| {
|
ascii_byte_handler!(QOT(lexer) {
|
||||||
// Next char is `'` or `"`, which are both ASCII
|
let c = lexer.consume_char();
|
||||||
let c = lexer.consume_ascii_char();
|
|
||||||
if lexer.context == LexerContext::JsxAttributeValue {
|
if lexer.context == LexerContext::JsxAttributeValue {
|
||||||
lexer.read_jsx_string_literal(c)
|
lexer.read_jsx_string_literal(c)
|
||||||
} else {
|
} else {
|
||||||
lexer.read_string_literal(c)
|
lexer.read_string_literal(c)
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// #
|
// #
|
||||||
const HAS: ByteHandler = |lexer| {
|
ascii_byte_handler!(HAS(lexer) {
|
||||||
// Next char is `#`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
// HashbangComment ::
|
// HashbangComment ::
|
||||||
// `#!` SingleLineCommentChars?
|
// `#!` SingleLineCommentChars?
|
||||||
if lexer.current.token.start == 0 && lexer.next_eq('!') {
|
if lexer.current.token.start == 0 && lexer.next_eq('!') {
|
||||||
|
|
@ -1392,28 +1437,27 @@ const HAS: ByteHandler = |lexer| {
|
||||||
} else {
|
} else {
|
||||||
lexer.private_identifier()
|
lexer.private_identifier()
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
const IDT: ByteHandler = |lexer| {
|
// `A..=Z`, `a..=z` (except special cases below), `_`, `$`
|
||||||
|
ascii_byte_handler!(IDT(lexer) {
|
||||||
lexer.identifier_name_handler();
|
lexer.identifier_name_handler();
|
||||||
Kind::Ident
|
Kind::Ident
|
||||||
};
|
});
|
||||||
|
|
||||||
// %
|
// %
|
||||||
const PRC: ByteHandler = |lexer| {
|
ascii_byte_handler!(PRC(lexer) {
|
||||||
// Next char is `%`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
if lexer.next_eq('=') {
|
if lexer.next_eq('=') {
|
||||||
Kind::PercentEq
|
Kind::PercentEq
|
||||||
} else {
|
} else {
|
||||||
Kind::Percent
|
Kind::Percent
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// &
|
// &
|
||||||
const AMP: ByteHandler = |lexer| {
|
ascii_byte_handler!(AMP(lexer) {
|
||||||
// Next char is `&`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
if lexer.next_eq('&') {
|
if lexer.next_eq('&') {
|
||||||
if lexer.next_eq('=') {
|
if lexer.next_eq('=') {
|
||||||
Kind::Amp2Eq
|
Kind::Amp2Eq
|
||||||
|
|
@ -1425,26 +1469,23 @@ const AMP: ByteHandler = |lexer| {
|
||||||
} else {
|
} else {
|
||||||
Kind::Amp
|
Kind::Amp
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// (
|
// (
|
||||||
const PNO: ByteHandler = |lexer| {
|
ascii_byte_handler!(PNO(lexer) {
|
||||||
// Next char is `(`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::LParen
|
Kind::LParen
|
||||||
};
|
});
|
||||||
|
|
||||||
// )
|
// )
|
||||||
const PNC: ByteHandler = |lexer| {
|
ascii_byte_handler!(PNC(lexer) {
|
||||||
// Next char is `)`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::RParen
|
Kind::RParen
|
||||||
};
|
});
|
||||||
|
|
||||||
// *
|
// *
|
||||||
const ATR: ByteHandler = |lexer| {
|
ascii_byte_handler!(ATR(lexer) {
|
||||||
// Next char is `*`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
if lexer.next_eq('*') {
|
if lexer.next_eq('*') {
|
||||||
if lexer.next_eq('=') {
|
if lexer.next_eq('=') {
|
||||||
Kind::Star2Eq
|
Kind::Star2Eq
|
||||||
|
|
@ -1456,12 +1497,11 @@ const ATR: ByteHandler = |lexer| {
|
||||||
} else {
|
} else {
|
||||||
Kind::Star
|
Kind::Star
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// +
|
// +
|
||||||
const PLS: ByteHandler = |lexer| {
|
ascii_byte_handler!(PLS(lexer) {
|
||||||
// Next char is `+`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
if lexer.next_eq('+') {
|
if lexer.next_eq('+') {
|
||||||
Kind::Plus2
|
Kind::Plus2
|
||||||
} else if lexer.next_eq('=') {
|
} else if lexer.next_eq('=') {
|
||||||
|
|
@ -1469,33 +1509,29 @@ const PLS: ByteHandler = |lexer| {
|
||||||
} else {
|
} else {
|
||||||
Kind::Plus
|
Kind::Plus
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// ,
|
// ,
|
||||||
const COM: ByteHandler = |lexer| {
|
ascii_byte_handler!(COM(lexer) {
|
||||||
// Next char is `,`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::Comma
|
Kind::Comma
|
||||||
};
|
});
|
||||||
|
|
||||||
// -
|
// -
|
||||||
const MIN: ByteHandler = |lexer| {
|
ascii_byte_handler!(MIN(lexer) {
|
||||||
// Next char is `-`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
lexer.read_minus().unwrap_or_else(|| lexer.skip_single_line_comment())
|
lexer.read_minus().unwrap_or_else(|| lexer.skip_single_line_comment())
|
||||||
};
|
});
|
||||||
|
|
||||||
// .
|
// .
|
||||||
const PRD: ByteHandler = |lexer| {
|
ascii_byte_handler!(PRD(lexer) {
|
||||||
// Next char is `.`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
lexer.read_dot()
|
lexer.read_dot()
|
||||||
};
|
});
|
||||||
|
|
||||||
// /
|
// /
|
||||||
const SLH: ByteHandler = |lexer| {
|
ascii_byte_handler!(SLH(lexer) {
|
||||||
// Next char is `/`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
match lexer.peek() {
|
match lexer.peek() {
|
||||||
Some('/') => {
|
Some('/') => {
|
||||||
lexer.current.chars.next();
|
lexer.current.chars.next();
|
||||||
|
|
@ -1514,47 +1550,41 @@ const SLH: ByteHandler = |lexer| {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// 0
|
// 0
|
||||||
const ZER: ByteHandler = |lexer| {
|
ascii_byte_handler!(ZER(lexer) {
|
||||||
// Next char is `0`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
lexer.read_zero()
|
lexer.read_zero()
|
||||||
};
|
});
|
||||||
|
|
||||||
// 1 to 9
|
// 1 to 9
|
||||||
const DIG: ByteHandler = |lexer| {
|
ascii_byte_handler!(DIG(lexer) {
|
||||||
// Next char is an ASCII digit
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
lexer.decimal_literal_after_first_digit()
|
lexer.decimal_literal_after_first_digit()
|
||||||
};
|
});
|
||||||
|
|
||||||
// :
|
// :
|
||||||
const COL: ByteHandler = |lexer| {
|
ascii_byte_handler!(COL(lexer) {
|
||||||
// Next char is `:`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::Colon
|
Kind::Colon
|
||||||
};
|
});
|
||||||
|
|
||||||
// ;
|
// ;
|
||||||
const SEM: ByteHandler = |lexer| {
|
ascii_byte_handler!(SEM(lexer) {
|
||||||
// Next char is `;`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::Semicolon
|
Kind::Semicolon
|
||||||
};
|
});
|
||||||
|
|
||||||
// <
|
// <
|
||||||
const LSS: ByteHandler = |lexer| {
|
ascii_byte_handler!(LSS(lexer) {
|
||||||
// Next char is `<`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
lexer.read_left_angle().unwrap_or_else(|| lexer.skip_single_line_comment())
|
lexer.read_left_angle().unwrap_or_else(|| lexer.skip_single_line_comment())
|
||||||
};
|
});
|
||||||
|
|
||||||
// =
|
// =
|
||||||
const EQL: ByteHandler = |lexer| {
|
ascii_byte_handler!(EQL(lexer) {
|
||||||
// Next char is `=`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
if lexer.next_eq('=') {
|
if lexer.next_eq('=') {
|
||||||
if lexer.next_eq('=') {
|
if lexer.next_eq('=') {
|
||||||
Kind::Eq3
|
Kind::Eq3
|
||||||
|
|
@ -1566,20 +1596,18 @@ const EQL: ByteHandler = |lexer| {
|
||||||
} else {
|
} else {
|
||||||
Kind::Eq
|
Kind::Eq
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// >
|
// >
|
||||||
const GTR: ByteHandler = |lexer| {
|
ascii_byte_handler!(GTR(lexer) {
|
||||||
// Next char is `>`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
// `>=` is re-lexed with [Lexer::next_jsx_child]
|
// `>=` is re-lexed with [Lexer::next_jsx_child]
|
||||||
Kind::RAngle
|
Kind::RAngle
|
||||||
};
|
});
|
||||||
|
|
||||||
// ?
|
// ?
|
||||||
const QST: ByteHandler = |lexer| {
|
ascii_byte_handler!(QST(lexer) {
|
||||||
// Next char is `?`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
if lexer.next_eq('?') {
|
if lexer.next_eq('?') {
|
||||||
if lexer.next_eq('=') {
|
if lexer.next_eq('=') {
|
||||||
Kind::Question2Eq
|
Kind::Question2Eq
|
||||||
|
|
@ -1597,72 +1625,61 @@ const QST: ByteHandler = |lexer| {
|
||||||
} else {
|
} else {
|
||||||
Kind::Question
|
Kind::Question
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// @
|
// @
|
||||||
const AT_: ByteHandler = |lexer| {
|
ascii_byte_handler!(AT_(lexer) {
|
||||||
// Next char is `@`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::At
|
Kind::At
|
||||||
};
|
});
|
||||||
|
|
||||||
// [
|
// [
|
||||||
const BTO: ByteHandler = |lexer| {
|
ascii_byte_handler!(BTO(lexer) {
|
||||||
// Next char is `[`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::LBrack
|
Kind::LBrack
|
||||||
};
|
});
|
||||||
|
|
||||||
// \
|
// \
|
||||||
const ESC: ByteHandler = |lexer| {
|
ascii_byte_handler!(ESC(lexer) {
|
||||||
let lexer_ref = lexer as &Lexer<'_>;
|
let mut builder = AutoCow::new(lexer);
|
||||||
let mut builder = AutoCow::new(lexer_ref);
|
lexer.consume_char();
|
||||||
// Next char at start of this function was `\`, which is ASCII.
|
|
||||||
// `AutoCow::new` cannot have changed the state of `lexer.current.chars` iterator,
|
|
||||||
// as we explicitly passed it only an immutable reference.
|
|
||||||
lexer.consume_ascii_char();
|
|
||||||
builder.force_allocation_without_current_ascii_char(lexer);
|
builder.force_allocation_without_current_ascii_char(lexer);
|
||||||
lexer.identifier_unicode_escape_sequence(&mut builder, true);
|
lexer.identifier_unicode_escape_sequence(&mut builder, true);
|
||||||
let text = lexer.identifier_name(builder);
|
let text = lexer.identifier_name(builder);
|
||||||
Kind::match_keyword(text)
|
Kind::match_keyword(text)
|
||||||
};
|
});
|
||||||
|
|
||||||
// ]
|
// ]
|
||||||
const BTC: ByteHandler = |lexer| {
|
ascii_byte_handler!(BTC(lexer) {
|
||||||
// Next char is `]`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::RBrack
|
Kind::RBrack
|
||||||
};
|
});
|
||||||
|
|
||||||
// ^
|
// ^
|
||||||
const CRT: ByteHandler = |lexer| {
|
ascii_byte_handler!(CRT(lexer) {
|
||||||
// Next char is `^`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
if lexer.next_eq('=') {
|
if lexer.next_eq('=') {
|
||||||
Kind::CaretEq
|
Kind::CaretEq
|
||||||
} else {
|
} else {
|
||||||
Kind::Caret
|
Kind::Caret
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// `
|
// `
|
||||||
const TPL: ByteHandler = |lexer| {
|
ascii_byte_handler!(TPL(lexer) {
|
||||||
// Next char is '`', which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
lexer.read_template_literal(Kind::TemplateHead, Kind::NoSubstitutionTemplate)
|
lexer.read_template_literal(Kind::TemplateHead, Kind::NoSubstitutionTemplate)
|
||||||
};
|
});
|
||||||
|
|
||||||
// {
|
// {
|
||||||
const BEO: ByteHandler = |lexer| {
|
ascii_byte_handler!(BEO(lexer) {
|
||||||
// Next char is `{`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::LCurly
|
Kind::LCurly
|
||||||
};
|
});
|
||||||
|
|
||||||
// |
|
// |
|
||||||
const PIP: ByteHandler = |lexer| {
|
ascii_byte_handler!(PIP(lexer) {
|
||||||
// Next char is `|`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
if lexer.next_eq('|') {
|
if lexer.next_eq('|') {
|
||||||
if lexer.next_eq('=') {
|
if lexer.next_eq('=') {
|
||||||
Kind::Pipe2Eq
|
Kind::Pipe2Eq
|
||||||
|
|
@ -1674,23 +1691,21 @@ const PIP: ByteHandler = |lexer| {
|
||||||
} else {
|
} else {
|
||||||
Kind::Pipe
|
Kind::Pipe
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
// }
|
// }
|
||||||
const BEC: ByteHandler = |lexer| {
|
ascii_byte_handler!(BEC(lexer) {
|
||||||
// Next char is `}`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::RCurly
|
Kind::RCurly
|
||||||
};
|
});
|
||||||
|
|
||||||
// ~
|
// ~
|
||||||
const TLD: ByteHandler = |lexer| {
|
ascii_byte_handler!(TLD(lexer) {
|
||||||
// Next char is `~`, which is ASCII
|
lexer.consume_char();
|
||||||
lexer.consume_ascii_char();
|
|
||||||
Kind::Tilde
|
Kind::Tilde
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_A: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_A(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"wait" => Kind::Await,
|
"wait" => Kind::Await,
|
||||||
"sync" => Kind::Async,
|
"sync" => Kind::Async,
|
||||||
"bstract" => Kind::Abstract,
|
"bstract" => Kind::Abstract,
|
||||||
|
|
@ -1700,16 +1715,16 @@ const L_A: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
||||||
"ssert" => Kind::Assert,
|
"ssert" => Kind::Assert,
|
||||||
"sserts" => Kind::Asserts,
|
"sserts" => Kind::Asserts,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_B: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_B(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"reak" => Kind::Break,
|
"reak" => Kind::Break,
|
||||||
"oolean" => Kind::Boolean,
|
"oolean" => Kind::Boolean,
|
||||||
"igint" => Kind::BigInt,
|
"igint" => Kind::BigInt,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_C: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_C(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"onst" => Kind::Const,
|
"onst" => Kind::Const,
|
||||||
"lass" => Kind::Class,
|
"lass" => Kind::Class,
|
||||||
"ontinue" => Kind::Continue,
|
"ontinue" => Kind::Continue,
|
||||||
|
|
@ -1717,41 +1732,41 @@ const L_C: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
||||||
"ase" => Kind::Case,
|
"ase" => Kind::Case,
|
||||||
"onstructor" => Kind::Constructor,
|
"onstructor" => Kind::Constructor,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_D: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_D(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"o" => Kind::Do,
|
"o" => Kind::Do,
|
||||||
"elete" => Kind::Delete,
|
"elete" => Kind::Delete,
|
||||||
"eclare" => Kind::Declare,
|
"eclare" => Kind::Declare,
|
||||||
"efault" => Kind::Default,
|
"efault" => Kind::Default,
|
||||||
"ebugger" => Kind::Debugger,
|
"ebugger" => Kind::Debugger,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_E: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_E(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"lse" => Kind::Else,
|
"lse" => Kind::Else,
|
||||||
"num" => Kind::Enum,
|
"num" => Kind::Enum,
|
||||||
"xport" => Kind::Export,
|
"xport" => Kind::Export,
|
||||||
"xtends" => Kind::Extends,
|
"xtends" => Kind::Extends,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_F: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_F(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"unction" => Kind::Function,
|
"unction" => Kind::Function,
|
||||||
"alse" => Kind::False,
|
"alse" => Kind::False,
|
||||||
"or" => Kind::For,
|
"or" => Kind::For,
|
||||||
"inally" => Kind::Finally,
|
"inally" => Kind::Finally,
|
||||||
"rom" => Kind::From,
|
"rom" => Kind::From,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_G: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_G(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"et" => Kind::Get,
|
"et" => Kind::Get,
|
||||||
"lobal" => Kind::Global,
|
"lobal" => Kind::Global,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_I: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_I(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"f" => Kind::If,
|
"f" => Kind::If,
|
||||||
"nstanceof" => Kind::Instanceof,
|
"nstanceof" => Kind::Instanceof,
|
||||||
"n" => Kind::In,
|
"n" => Kind::In,
|
||||||
|
|
@ -1762,57 +1777,57 @@ const L_I: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
||||||
"ntrinsic" => Kind::Intrinsic,
|
"ntrinsic" => Kind::Intrinsic,
|
||||||
"s" => Kind::Is,
|
"s" => Kind::Is,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_K: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_K(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"eyof" => Kind::KeyOf,
|
"eyof" => Kind::KeyOf,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_L: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_L(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"et" => Kind::Let,
|
"et" => Kind::Let,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_M: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_M(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"eta" => Kind::Meta,
|
"eta" => Kind::Meta,
|
||||||
"odule" => Kind::Module,
|
"odule" => Kind::Module,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_N: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_N(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"ull" => Kind::Null,
|
"ull" => Kind::Null,
|
||||||
"ew" => Kind::New,
|
"ew" => Kind::New,
|
||||||
"umber" => Kind::Number,
|
"umber" => Kind::Number,
|
||||||
"amespace" => Kind::Namespace,
|
"amespace" => Kind::Namespace,
|
||||||
"ever" => Kind::Never,
|
"ever" => Kind::Never,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_O: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_O(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"f" => Kind::Of,
|
"f" => Kind::Of,
|
||||||
"bject" => Kind::Object,
|
"bject" => Kind::Object,
|
||||||
"ut" => Kind::Out,
|
"ut" => Kind::Out,
|
||||||
"verride" => Kind::Override,
|
"verride" => Kind::Override,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_P: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_P(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"ackage" => Kind::Package,
|
"ackage" => Kind::Package,
|
||||||
"rivate" => Kind::Private,
|
"rivate" => Kind::Private,
|
||||||
"rotected" => Kind::Protected,
|
"rotected" => Kind::Protected,
|
||||||
"ublic" => Kind::Public,
|
"ublic" => Kind::Public,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_R: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_R(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"eturn" => Kind::Return,
|
"eturn" => Kind::Return,
|
||||||
"equire" => Kind::Require,
|
"equire" => Kind::Require,
|
||||||
"eadonly" => Kind::Readonly,
|
"eadonly" => Kind::Readonly,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_S: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_S(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"et" => Kind::Set,
|
"et" => Kind::Set,
|
||||||
"uper" => Kind::Super,
|
"uper" => Kind::Super,
|
||||||
"witch" => Kind::Switch,
|
"witch" => Kind::Switch,
|
||||||
|
|
@ -1821,9 +1836,9 @@ const L_S: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
||||||
"tring" => Kind::String,
|
"tring" => Kind::String,
|
||||||
"atisfies" => Kind::Satisfies,
|
"atisfies" => Kind::Satisfies,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_T: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_T(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"his" => Kind::This,
|
"his" => Kind::This,
|
||||||
"rue" => Kind::True,
|
"rue" => Kind::True,
|
||||||
"hrow" => Kind::Throw,
|
"hrow" => Kind::Throw,
|
||||||
|
|
@ -1832,33 +1847,34 @@ const L_T: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
||||||
"arget" => Kind::Target,
|
"arget" => Kind::Target,
|
||||||
"ype" => Kind::Type,
|
"ype" => Kind::Type,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_U: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_U(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"ndefined" => Kind::Undefined,
|
"ndefined" => Kind::Undefined,
|
||||||
"sing" => Kind::Using,
|
"sing" => Kind::Using,
|
||||||
"nique" => Kind::Unique,
|
"nique" => Kind::Unique,
|
||||||
"nknown" => Kind::Unknown,
|
"nknown" => Kind::Unknown,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_V: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_V(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"ar" => Kind::Var,
|
"ar" => Kind::Var,
|
||||||
"oid" => Kind::Void,
|
"oid" => Kind::Void,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_W: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_W(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"hile" => Kind::While,
|
"hile" => Kind::While,
|
||||||
"ith" => Kind::With,
|
"ith" => Kind::With,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
const L_Y: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
|
ascii_byte_handler!(L_Y(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
"ield" => Kind::Yield,
|
"ield" => Kind::Yield,
|
||||||
_ => Kind::Ident,
|
_ => Kind::Ident,
|
||||||
};
|
});
|
||||||
|
|
||||||
// Non-ASCII characters
|
// Non-ASCII characters.
|
||||||
|
// NB: Must not use `ascii_byte_handler!()` macro, as this handler is for non-ASCII chars.
|
||||||
#[allow(clippy::redundant_closure_for_method_calls)]
|
#[allow(clippy::redundant_closure_for_method_calls)]
|
||||||
const UNI: ByteHandler = |lexer| lexer.unicode_char_handler();
|
const UNI: ByteHandler = |lexer| lexer.unicode_char_handler();
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue