oxc/crates/oxc_parser/src/lexer/mod.rs

//! An Ecma-262 Lexer / Tokenizer
//! Prior Arts:
//!     * [jsparagus](https://github.com/mozilla-spidermonkey/jsparagus/blob/master/crates/parser/src)
//!     * [rome](https://github.com/rome/tools/tree/main/crates/rome_js_parser/src/lexer)
//!     * [rustc](https://github.com/rust-lang/rust/blob/master/compiler/rustc_lexer/src)
//!     * [v8](https://v8.dev/blog/scanner)

mod kind;
mod number;
mod string_builder;
mod token;
mod trivia_builder;

use std::{collections::VecDeque, str::Chars};

use oxc_allocator::{Allocator, String};
use oxc_ast::ast::RegExpFlags;
use oxc_diagnostics::Error;
use oxc_span::{SourceType, Span};
use oxc_syntax::{
    identifier::{
        is_identifier_part, is_identifier_start_all, is_irregular_line_terminator,
        is_irregular_whitespace, is_line_terminator, CR, FF, LF, LS, PS, TAB, VT,
    },
    unicode_id_start::is_id_start_unicode,
};
pub use token::{RegExp, Token, TokenValue};

pub use self::kind::Kind;
use self::{
    number::{parse_big_int, parse_float, parse_int},
    string_builder::AutoCow,
    trivia_builder::TriviaBuilder,
};
use crate::diagnostics;

#[derive(Debug, Clone)]
pub struct LexerCheckpoint<'a> {
    /// Remaining chars to be tokenized
    chars: Chars<'a>,

    token: Token<'a>,

    errors_pos: usize,
}

#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum LexerContext {
    Regular,
    /// Lex the next token, returns `JsxString` or any other token
    JsxAttributeValue,
}

pub struct Lexer<'a> {
    allocator: &'a Allocator,

    source: &'a str,

    source_type: SourceType,

    current: LexerCheckpoint<'a>,

    pub(crate) errors: Vec<Error>,

    lookahead: VecDeque<LexerCheckpoint<'a>>,

    context: LexerContext,

    pub(crate) trivia_builder: TriviaBuilder,
}

#[allow(clippy::unused_self)]
impl<'a> Lexer<'a> {
    pub fn new(allocator: &'a Allocator, source: &'a str, source_type: SourceType) -> Self {
        let token = Token {
            // the first token is at the start of file, so is allows on a new line
            is_on_new_line: true,
            ..Token::default()
        };
        let current = LexerCheckpoint { chars: source.chars(), token, errors_pos: 0 };
        Self {
            allocator,
            source,
            source_type,
            current,
            errors: vec![],
            lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript
            context: LexerContext::Regular,
            trivia_builder: TriviaBuilder::default(),
        }
    }

    /// Remaining string from `Chars`
    pub fn remaining(&self) -> &'a str {
        self.current.chars.as_str()
    }

    /// Creates a checkpoint storing the current lexer state.
    /// Use `rewind` to restore the lexer to the state stored in the checkpoint.
    pub fn checkpoint(&self) -> LexerCheckpoint<'a> {
        LexerCheckpoint {
            chars: self.current.chars.clone(),
            token: self.current.token.clone(),
            errors_pos: self.errors.len(),
        }
    }

    /// Rewinds the lexer to the same state as when the passed in `checkpoint` was created.
    pub fn rewind(&mut self, checkpoint: LexerCheckpoint<'a>) {
        self.errors.truncate(checkpoint.errors_pos);
        self.current = checkpoint;
        self.lookahead.clear();
    }

    /// Find the nth lookahead token lazily
    pub fn lookahead(&mut self, n: u8) -> &Token<'a> {
        let n = n as usize;
        debug_assert!(n > 0);

        if self.lookahead.len() > n - 1 {
            return &self.lookahead[n - 1].token;
        }

        let checkpoint = self.checkpoint();

        if let Some(checkpoint) = self.lookahead.back() {
            self.current = checkpoint.clone();
        }

        // reset the current token for `read_next_token`,
        // otherwise it will contain the token from
        // `self.current = checkpoint`
        self.current.token = Token::default();

        for _i in self.lookahead.len()..n {
            let kind = self.read_next_token();
            let peeked = self.finish_next(kind);
            self.lookahead.push_back(LexerCheckpoint {
                chars: self.current.chars.clone(),
                token: peeked,
                errors_pos: self.errors.len(),
            });
        }

        self.current = checkpoint;

        &self.lookahead[n - 1].token
    }

    /// Set context
    pub fn set_context(&mut self, context: LexerContext) {
        self.context = context;
    }

    /// Main entry point
    pub fn next_token(&mut self) -> Token<'a> {
        if let Some(checkpoint) = self.lookahead.pop_front() {
            self.current.chars = checkpoint.chars;
            self.current.errors_pos = checkpoint.errors_pos;
            return checkpoint.token;
        }
        let kind = self.read_next_token();
        self.finish_next(kind)
    }

    pub fn next_jsx_child(&mut self) -> Token<'a> {
        self.current.token.start = self.offset();
        let kind = self.read_jsx_child();
        self.finish_next(kind)
    }

    fn finish_next(&mut self, kind: Kind) -> Token<'a> {
        self.current.token.kind = kind;
        self.current.token.end = self.offset();
        debug_assert!(self.current.token.start <= self.current.token.end);
        std::mem::take(&mut self.current.token)
    }

    /// Re-tokenize the current `/` or `/=` and return `RegExp`
    /// See Section 12:
    ///   The `InputElementRegExp` goal symbol is used in all syntactic grammar contexts
    ///   where a `RegularExpressionLiteral` is permitted
    /// Which meams the parser needs to re-tokenize on `PrimaryExpression`,
    /// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression`
    pub fn next_regex(&mut self, kind: Kind) -> Token<'a> {
        self.current.token.start = self.offset()
            - match kind {
                Kind::Slash => 1,
                Kind::SlashEq => 2,
                _ => unreachable!(),
            };
        let kind = self.read_regex();
        self.lookahead.clear();
        self.finish_next(kind)
    }

    pub fn next_right_angle(&mut self) -> Token<'a> {
        let kind = self.read_right_angle();
        self.lookahead.clear();
        self.finish_next(kind)
    }

    /// Re-tokenize the current `}` token for `TemplateSubstitutionTail`
    /// See Section 12, the parser needs to re-tokenize on `TemplateSubstitutionTail`,
    pub fn next_template_substitution_tail(&mut self) -> Token<'a> {
        self.current.token.start = self.offset() - 1;
        let kind = self.read_template_literal(Kind::TemplateMiddle, Kind::TemplateTail);
        self.lookahead.clear();
        self.finish_next(kind)
    }

    /// Expand the current token for `JSXIdentifier`
    pub fn next_jsx_identifier(&mut self, start_offset: u32) -> Token<'a> {
        let kind = self.read_jsx_identifier(start_offset);
        self.lookahead.clear();
        self.finish_next(kind)
    }

    /// Re-tokenize '<<' or '<=' or '<<=' to '<'
    pub fn re_lex_as_typescript_l_angle(&mut self, kind: Kind) -> Token<'a> {
        let offset = match kind {
            Kind::ShiftLeft | Kind::LtEq => 2,
            Kind::ShiftLeftEq => 3,
            _ => unreachable!(),
        };
        self.current.token.start = self.offset() - offset;
        self.current.chars = self.source[self.current.token.start as usize + 1..].chars();
        let kind = Kind::LAngle;
        self.lookahead.clear();
        self.finish_next(kind)
    }

    // ---------- Private Methods ---------- //
    fn error<T: Into<Error>>(&mut self, error: T) {
        self.errors.push(error.into());
    }

    /// Get the length offset from the source, in UTF-8 bytes
    #[inline]
    #[allow(clippy::cast_possible_truncation)]
    fn offset(&self) -> u32 {
        (self.source.len() - self.current.chars.as_str().len()) as u32
    }

    /// Get the current unterminated token range
    fn unterminated_range(&self) -> Span {
        Span::new(self.current.token.start, self.offset())
    }

    /// Consume the current char
    #[inline]
    fn consume_char(&mut self) -> char {
        self.current.chars.next().unwrap()
    }

    /// Peek the next char without advancing the position
    #[inline]
    fn peek(&self) -> Option<char> {
        self.current.chars.clone().next()
    }

    /// Peek the next next char without advancing the position
    #[inline]
    fn peek2(&self) -> Option<char> {
        let mut chars = self.current.chars.clone();
        chars.next();
        chars.next()
    }

    /// Peek the next character, and advance the current position if it matches
    #[inline]
    fn next_eq(&mut self, c: char) -> bool {
        let matched = self.peek() == Some(c);
        if matched {
            self.current.chars.next();
        }
        matched
    }

    fn current_offset(&self) -> Span {
        let offset = self.offset();
        Span::new(offset, offset)
    }

    /// Return `IllegalCharacter` Error or `UnexpectedEnd` if EOF
    fn unexpected_err(&mut self) {
        let offset = self.current_offset();
        match self.peek() {
            Some(c) => self.error(diagnostics::InvalidCharacter(c, offset)),
            None => self.error(diagnostics::UnexpectedEnd(offset)),
        }
    }

    fn set_numeric_value(&mut self, kind: Kind, src: &'a str) {
        let value = match kind {
            Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => {
                src.strip_suffix('n').map_or_else(
                    || parse_int(src, kind).map(TokenValue::Number),
                    |src| parse_big_int(src, kind).map(TokenValue::BigInt),
                )
            }
            Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => {
                parse_float(src).map(TokenValue::Number)
            }
            Kind::Undetermined => Ok(TokenValue::Number(std::f64::NAN)),
            _ => unreachable!("{kind}"),
        };

        match value {
            Ok(value) => self.current.token.value = value,
            Err(err) => {
                self.error(diagnostics::InvalidNumber(
                    err,
                    Span::new(self.current.token.start, self.offset()),
                ));
                self.current.token.value = TokenValue::Number(std::f64::NAN);
            }
        };
    }

    /// Read each char and set the current token
    /// Whitespace and line terminators are skipped
    fn read_next_token(&mut self) -> Kind {
        self.current.token.start = self.offset();

        loop {
            let offset = self.offset();
            self.current.token.start = offset;

            if let Some(c) = self.current.chars.clone().next() {
                let kind = self.match_char(c);
                if !matches!(
                    kind,
                    Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
                ) {
                    return kind;
                }
            } else {
                return Kind::Eof;
            }
        }
    }

    #[inline]
    fn match_char(&mut self, c: char) -> Kind {
        let size = c as usize;

        if size < 128 {
            return BYTE_HANDLERS[size](self);
        }

        match c {
            c if is_id_start_unicode(c) => {
                let mut builder = AutoCow::new(self);
                let c = self.consume_char();
                builder.push_matching(c);
                self.identifier_name(builder);
                Kind::Ident
            }
            c if is_irregular_whitespace(c) => {
                self.consume_char();
                Kind::WhiteSpace
            }
            c if is_irregular_line_terminator(c) => {
                self.consume_char();
                self.current.token.is_on_new_line = true;
                Kind::NewLine
            }
            _ => {
                self.consume_char();
                self.error(diagnostics::InvalidCharacter(c, self.unterminated_range()));
                Kind::Undetermined
            }
        }
    }

    /// Section 12.4 Single Line Comment
    #[allow(clippy::cast_possible_truncation)]
    fn skip_single_line_comment(&mut self) -> Kind {
        let start = self.current.token.start;
        while let Some(c) = self.current.chars.next() {
            if is_line_terminator(c) {
                self.current.token.is_on_new_line = true;
                self.trivia_builder
                    .add_single_line_comment(start, self.offset() - c.len_utf8() as u32);
                return Kind::Comment;
            }
        }
        // EOF
        self.trivia_builder.add_single_line_comment(start, self.offset());
        Kind::Comment
    }

    /// Section 12.4 Multi Line Comment
    fn skip_multi_line_comment(&mut self) -> Kind {
        while let Some(c) = self.current.chars.next() {
            if c == '*' && self.next_eq('/') {
                self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
                return Kind::MultiLineComment;
            }
            if is_line_terminator(c) {
                self.current.token.is_on_new_line = true;
            }
        }
        self.error(diagnostics::UnterminatedMultiLineComment(self.unterminated_range()));
        Kind::Eof
    }

    /// Section 12.5 Hashbang Comments
    fn read_hashbang_comment(&mut self) -> Kind {
        while let Some(c) = self.current.chars.next().as_ref() {
            if is_line_terminator(*c) {
                break;
            }
        }
        self.current.token.is_on_new_line = true;
        Kind::HashbangComment
    }

    /// Section 12.6.1 Identifier Names
    fn identifier_tail(&mut self, mut builder: AutoCow<'a>) -> (bool, &'a str) {
        // ident tail
        while let Some(c) = self.peek() {
            if !is_identifier_part(c) {
                if c == '\\' {
                    self.current.chars.next();
                    builder.force_allocation_without_current_ascii_char(self);
                    self.identifier_unicode_escape_sequence(&mut builder, false);
                    continue;
                }
                break;
            }
            self.current.chars.next();
            builder.push_matching(c);
        }
        let has_escape = builder.has_escape();
        (has_escape, builder.finish(self))
    }

    fn identifier_name(&mut self, builder: AutoCow<'a>) -> &'a str {
        let (has_escape, text) = self.identifier_tail(builder);
        self.current.token.escaped = has_escape;
        self.current.token.value = TokenValue::String(text);
        text
    }

    fn identifier_name_handler(&mut self) -> &'a str {
        let builder = AutoCow::new(self);
        self.consume_char();
        self.identifier_name(builder)
    }

    /// Section 12.7 Punctuators
    fn read_dot(&mut self, builder: &mut AutoCow<'a>) -> Kind {
        if self.peek() == Some('.') && self.peek2() == Some('.') {
            self.current.chars.next();
            self.current.chars.next();
            return Kind::Dot3;
        }
        if self.peek().is_some_and(|c| c.is_ascii_digit()) {
            builder.push_matching('.');
            self.decimal_literal_after_decimal_point(builder)
        } else {
            Kind::Dot
        }
    }

    /// returns None for `SingleLineHTMLOpenComment` `<!--` in script mode
    fn read_left_angle(&mut self) -> Option<Kind> {
        if self.next_eq('<') {
            if self.next_eq('=') {
                Some(Kind::ShiftLeftEq)
            } else {
                Some(Kind::ShiftLeft)
            }
        } else if self.next_eq('=') {
            Some(Kind::LtEq)
        } else if self.peek() == Some('!')
            // SingleLineHTMLOpenComment `<!--` in script mode
            && self.source_type.is_script()
            && self.remaining().starts_with("!--")
        {
            None
        } else {
            Some(Kind::LAngle)
        }
    }

    fn read_right_angle(&mut self) -> Kind {
        if self.next_eq('>') {
            if self.next_eq('>') {
                if self.next_eq('=') {
                    Kind::ShiftRight3Eq
                } else {
                    Kind::ShiftRight3
                }
            } else if self.next_eq('=') {
                Kind::ShiftRightEq
            } else {
                Kind::ShiftRight
            }
        } else if self.next_eq('=') {
            Kind::GtEq
        } else {
            Kind::RAngle
        }
    }

    /// returns None for `SingleLineHTMLCloseComment` `-->` in script mode
    fn read_minus(&mut self) -> Option<Kind> {
        if self.next_eq('-') {
            // SingleLineHTMLCloseComment `-->` in script mode
            if self.current.token.is_on_new_line
                && self.source_type.is_script()
                && self.next_eq('>')
            {
                None
            } else {
                Some(Kind::Minus2)
            }
        } else if self.next_eq('=') {
            Some(Kind::MinusEq)
        } else {
            Some(Kind::Minus)
        }
    }

    fn private_identifier(&mut self, mut builder: AutoCow<'a>) -> Kind {
        let start = self.offset();
        match self.current.chars.next() {
            Some(c) if is_identifier_start_all(c) => {
                builder.push_matching(c);
            }
            Some('\\') => {
                builder.force_allocation_without_current_ascii_char(self);
                self.identifier_unicode_escape_sequence(&mut builder, true);
            }
            Some(c) => {
                #[allow(clippy::cast_possible_truncation)]
                self.error(diagnostics::InvalidCharacter(
                    c,
                    Span::new(start, start + c.len_utf8() as u32),
                ));
                return Kind::Undetermined;
            }
            None => {
                self.error(diagnostics::UnexpectedEnd(Span::new(start, start)));
                return Kind::Undetermined;
            }
        }
        let (_, name) = self.identifier_tail(builder);
        self.current.token.value = TokenValue::String(name);
        Kind::PrivateIdentifier
    }

    /// 12.8.3 Numeric Literals with `0` prefix
    fn read_zero(&mut self, builder: &mut AutoCow<'a>) -> Kind {
        match self.peek() {
            Some('b' | 'B') => self.read_non_decimal(Kind::Binary, builder),
            Some('o' | 'O') => self.read_non_decimal(Kind::Octal, builder),
            Some('x' | 'X') => self.read_non_decimal(Kind::Hex, builder),
            Some(c @ ('e' | 'E')) => {
                self.current.chars.next();
                builder.push_matching(c);
                self.read_decimal_exponent(builder)
            }
            Some('.') => {
                self.current.chars.next();
                builder.push_matching('.');
                self.decimal_literal_after_decimal_point_after_digits(builder)
            }
            Some('n') => {
                self.current.chars.next();
                builder.push_matching('n');
                self.check_after_numeric_literal(Kind::Decimal)
            }
            Some(n) if n.is_ascii_digit() => self.read_legacy_octal(builder),
            _ => self.check_after_numeric_literal(Kind::Decimal),
        }
    }

    fn read_non_decimal(&mut self, kind: Kind, builder: &mut AutoCow<'a>) -> Kind {
        let c = self.current.chars.next().unwrap();
        builder.push_matching(c);

        if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
            let c = self.current.chars.next().unwrap();
            builder.push_matching(c);
        } else {
            self.unexpected_err();
            return Kind::Undetermined;
        }

        while let Some(c) = self.peek() {
            match c {
                '_' => {
                    self.current.chars.next();
                    builder.force_allocation_without_current_ascii_char(self);
                    if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
                        let c = self.current.chars.next().unwrap();
                        builder.push_matching(c);
                    } else {
                        self.unexpected_err();
                        return Kind::Undetermined;
                    }
                }
                c if kind.matches_number_char(c) => {
                    self.current.chars.next();
                    builder.push_matching(c);
                }
                _ => break,
            }
        }
        if self.peek() == Some('n') {
            self.current.chars.next();
            builder.push_matching('n');
        }
        self.check_after_numeric_literal(kind)
    }

    fn read_legacy_octal(&mut self, builder: &mut AutoCow<'a>) -> Kind {
        let mut kind = Kind::Octal;
        loop {
            match self.peek() {
                Some('0'..='7') => {
                    self.current.chars.next();
                }
                Some('8'..='9') => {
                    self.current.chars.next();
                    kind = Kind::Decimal;
                }
                _ => break,
            }
        }

        match self.peek() {
            // allow 08.5 and 09.5
            Some('.') if kind == Kind::Decimal => {
                self.current.chars.next();
                builder.push_matching('.');
                self.decimal_literal_after_decimal_point_after_digits(builder)
            }
            // allow 08e1 and 09e1
            Some('e') if kind == Kind::Decimal => {
                self.current.chars.next();
                builder.push_matching('e');
                self.read_decimal_exponent(builder)
            }
            _ => self.check_after_numeric_literal(kind),
        }
    }

    fn decimal_literal_after_first_digit(&mut self, builder: &mut AutoCow<'a>) -> Kind {
        self.read_decimal_digits_after_first_digit(builder);
        if self.next_eq('.') {
            builder.push_matching('.');
            return self.decimal_literal_after_decimal_point_after_digits(builder);
        } else if self.next_eq('n') {
            builder.push_matching('n');
            return self.check_after_numeric_literal(Kind::Decimal);
        }

        let kind = self.optional_exponent(builder).map_or(Kind::Decimal, |kind| kind);
        self.check_after_numeric_literal(kind)
    }

    fn read_decimal_exponent(&mut self, builder: &mut AutoCow<'a>) -> Kind {
        let kind = match self.peek() {
            Some('-') => {
                self.current.chars.next();
                builder.push_matching('-');
                Kind::NegativeExponential
            }
            Some('+') => {
                self.current.chars.next();
                builder.push_matching('+');
                Kind::PositiveExponential
            }
            _ => Kind::PositiveExponential,
        };
        self.read_decimal_digits(builder);
        kind
    }

    fn read_decimal_digits(&mut self, builder: &mut AutoCow<'a>) {
        if self.peek().is_some_and(|c| c.is_ascii_digit()) {
            let c = self.current.chars.next().unwrap();
            builder.push_matching(c);
        } else {
            self.unexpected_err();
            return;
        }

        self.read_decimal_digits_after_first_digit(builder);
    }

    fn read_decimal_digits_after_first_digit(&mut self, builder: &mut AutoCow<'a>) {
        while let Some(c) = self.peek() {
            match c {
                '_' => {
                    self.current.chars.next();
                    builder.force_allocation_without_current_ascii_char(self);
                    if self.peek().is_some_and(|c| c.is_ascii_digit()) {
                        let c = self.current.chars.next().unwrap();
                        builder.push_matching(c);
                    } else {
                        self.unexpected_err();
                        return;
                    }
                }
                c @ '0'..='9' => {
                    self.current.chars.next();
                    builder.push_matching(c);
                }
                _ => break,
            }
        }
    }

    fn decimal_literal_after_decimal_point(&mut self, builder: &mut AutoCow<'a>) -> Kind {
        self.read_decimal_digits(builder);
        self.optional_exponent(builder);
        self.check_after_numeric_literal(Kind::Float)
    }

    fn decimal_literal_after_decimal_point_after_digits(
        &mut self,
        builder: &mut AutoCow<'a>,
    ) -> Kind {
        self.optional_decimal_digits(builder);
        self.optional_exponent(builder);
        self.check_after_numeric_literal(Kind::Float)
    }

    fn optional_decimal_digits(&mut self, builder: &mut AutoCow<'a>) {
        if self.peek().is_some_and(|c| c.is_ascii_digit()) {
            let c = self.current.chars.next().unwrap();
            builder.push_matching(c);
        } else {
            return;
        }
        self.read_decimal_digits_after_first_digit(builder);
    }

    fn optional_exponent(&mut self, builder: &mut AutoCow<'a>) -> Option<Kind> {
        if matches!(self.peek(), Some('e' | 'E')) {
            let c = self.current.chars.next().unwrap();
            builder.push_matching(c);
            return Some(self.read_decimal_exponent(builder));
        }
        None
    }

    fn check_after_numeric_literal(&mut self, kind: Kind) -> Kind {
        let offset = self.offset();
        // The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit.
        let c = self.peek();
        if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start_all(ch)) {
            return kind;
        }
        self.current.chars.next();
        while let Some(c) = self.peek() {
            if is_identifier_start_all(c) {
                self.current.chars.next();
            } else {
                break;
            }
        }
        self.error(diagnostics::InvalidNumberEnd(Span::new(offset, self.offset())));
        Kind::Undetermined
    }

    /// 12.8.4 String Literals
    fn read_string_literal(&mut self, delimiter: char) -> Kind {
        let mut builder = AutoCow::new(self);
        loop {
            match self.current.chars.next() {
                None | Some('\r' | '\n') => {
                    self.error(diagnostics::UnterminatedString(self.unterminated_range()));
                    return Kind::Undetermined;
                }
                Some(c @ ('"' | '\'')) => {
                    if c == delimiter {
                        self.current.token.value =
                            TokenValue::String(builder.finish_without_push(self));
                        return Kind::Str;
                    }
                    builder.push_matching(c);
                }
                Some('\\') => {
                    let start = self.offset() - 1;
                    let text = builder.get_mut_string_without_current_ascii_char(self);
                    let mut is_valid_escape_sequence = true;
                    self.read_string_escape_sequence(text, false, &mut is_valid_escape_sequence);
                    if !is_valid_escape_sequence {
                        let range = Span::new(start, self.offset());
                        self.error(diagnostics::InvalidEscapeSequence(range));
                    }
                }
                Some(c) => {
                    builder.push_matching(c);
                }
            }
        }
    }

    /// 12.8.5 Regular Expression Literals
    fn read_regex(&mut self) -> Kind {
        let start = self.current.token.start + 1; // +1 to exclude `/`
        let mut in_escape = false;
        let mut in_character_class = false;
        loop {
            match self.current.chars.next() {
                None => {
                    self.error(diagnostics::UnterminatedRegExp(self.unterminated_range()));
                    return Kind::Undetermined;
                }
                Some(c) if is_line_terminator(c) => {
                    self.error(diagnostics::UnterminatedRegExp(self.unterminated_range()));
                    return Kind::Undetermined;
                }
                Some(c) => {
                    if in_escape {
                        in_escape = false;
                    } else if c == '/' && !in_character_class {
                        break;
                    } else if c == '[' {
                        in_character_class = true;
                    } else if c == '\\' {
                        in_escape = true;
                    } else if c == ']' {
                        in_character_class = false;
                    }
                }
            }
        }

        let end = self.offset() - 1; // -1 to exclude `/`
        let pattern = &self.source[start as usize..end as usize];

        let mut flags = RegExpFlags::empty();

        while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
            self.current.chars.next();
            if !ch.is_ascii_lowercase() {
                self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
                continue;
            }
            let flag = match ch {
                'g' => RegExpFlags::G,
                'i' => RegExpFlags::I,
                'm' => RegExpFlags::M,
                's' => RegExpFlags::S,
                'u' => RegExpFlags::U,
                'y' => RegExpFlags::Y,
                'd' => RegExpFlags::D,
                'v' => RegExpFlags::V,
                _ => {
                    self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
                    continue;
                }
            };
            if flags.contains(flag) {
                self.error(diagnostics::RegExpFlagTwice(ch, self.current_offset()));
                continue;
            }
            flags |= flag;
        }

        self.current.token.value = TokenValue::RegExp(RegExp { pattern, flags });

        Kind::RegExp
    }

    /// 12.8.6 Template Literal Lexical Components
    fn read_template_literal(&mut self, substitute: Kind, tail: Kind) -> Kind {
        let mut builder = AutoCow::new(self);
        let mut is_valid_escape_sequence = true;
        while let Some(c) = self.current.chars.next() {
            match c {
                '$' if self.peek() == Some('{') => {
                    if is_valid_escape_sequence {
                        self.current.token.value =
                            TokenValue::String(builder.finish_without_push(self));
                    }
                    self.current.chars.next();
                    return substitute;
                }
                '`' => {
                    if is_valid_escape_sequence {
                        self.current.token.value =
                            TokenValue::String(builder.finish_without_push(self));
                    }
                    return tail;
                }
                CR => {
                    builder.force_allocation_without_current_ascii_char(self);
                    if self.next_eq(LF) {
                        builder.push_different(LF);
                    }
                }
                '\\' => {
                    let text = builder.get_mut_string_without_current_ascii_char(self);
                    self.read_string_escape_sequence(text, true, &mut is_valid_escape_sequence);
                }
                _ => builder.push_matching(c),
            }
        }
        self.error(diagnostics::UnterminatedString(self.unterminated_range()));
        Kind::Undetermined
    }

    /// `JSXIdentifier` :
    ///   `IdentifierStart`
    ///   `JSXIdentifier` `IdentifierPart`
    ///   `JSXIdentifier` [no `WhiteSpace` or Comment here] -
    fn read_jsx_identifier(&mut self, start_offset: u32) -> Kind {
        let prev_str = &self.source[start_offset as usize..self.offset() as usize];

        let mut builder = AutoCow::new(self);
        while let Some(c) = self.peek() {
            if c == '-' || is_identifier_start_all(c) {
                self.current.chars.next();
                builder.push_matching(c);
                while let Some(c) = self.peek() {
                    if is_identifier_part(c) {
                        let c = self.current.chars.next().unwrap();
                        builder.push_matching(c);
                    } else {
                        break;
                    }
                }
            } else {
                break;
            }
        }
        let mut s = String::from_str_in(prev_str, self.allocator);
        s.push_str(builder.finish(self));
        self.current.token.value = TokenValue::String(s.into_bump_str());
        Kind::Ident
    }

    /// [`JSXChild`](https://facebook.github.io/jsx/#prod-JSXChild)
    /// `JSXChild` :
    /// `JSXText`
    /// `JSXElement`
    /// `JSXFragment`
    /// { `JSXChildExpressionopt` }
    fn read_jsx_child(&mut self) -> Kind {
        match self.peek() {
            Some('<') => {
                self.current.chars.next();
                Kind::LAngle
            }
            Some('{') => {
                self.current.chars.next();
                Kind::LCurly
            }
            Some(c) => {
                let mut builder = AutoCow::new(self);
                builder.push_matching(c);
                loop {
                    // `>` and `}` are errors in TypeScript but not Babel
                    // let's make this less strict so we can parse more code
                    if matches!(self.peek(), Some('{' | '<')) {
                        break;
                    }
                    if let Some(c) = self.current.chars.next() {
                        builder.push_matching(c);
                    } else {
                        break;
                    }
                }
                self.current.token.value = TokenValue::String(builder.finish(self));
                Kind::JSXText
            }
            None => Kind::Eof,
        }
    }

    /// `JSXDoubleStringCharacters` ::
    ///   `JSXDoubleStringCharacter` `JSXDoubleStringCharactersopt`
    /// `JSXDoubleStringCharacter` ::
    ///   `JSXStringCharacter` but not "
    /// `JSXSingleStringCharacters` ::
    ///   `JSXSingleStringCharacter` `JSXSingleStringCharactersopt`
    /// `JSXSingleStringCharacter` ::
    ///   `JSXStringCharacter` but not '
    /// `JSXStringCharacter` ::
    ///   `SourceCharacter` but not one of `HTMLCharacterReference`
    fn read_jsx_string_literal(&mut self, delimiter: char) -> Kind {
        let mut builder = AutoCow::new(self);
        loop {
            match self.current.chars.next() {
                Some(c @ ('"' | '\'')) => {
                    if c == delimiter {
                        self.current.token.value =
                            TokenValue::String(builder.finish_without_push(self));
                        return Kind::Str;
                    }
                    builder.push_matching(c);
                }
                Some(other) => {
                    builder.push_matching(other);
                }
                None => {
                    self.error(diagnostics::UnterminatedString(self.unterminated_range()));
                    return Kind::Undetermined;
                }
            }
        }
    }

    /* ---------- utils ---------- */

    /// Identifier `UnicodeEscapeSequence`
    ///   \u `Hex4Digits`
    ///   \u{ `CodePoint` }
    fn identifier_unicode_escape_sequence(
        &mut self,
        builder: &mut AutoCow<'a>,
        check_identifier_start: bool,
    ) {
        let start = self.offset();
        if self.current.chars.next() != Some('u') {
            let range = Span::new(start, self.offset());
            self.error(diagnostics::UnicodeEscapeSequence(range));
            return;
        }

        let value = match self.peek() {
            Some('{') => self.unicode_code_point(),
            _ => self.surrogate_pair(),
        };

        let Some(value) = value else {
            let range = Span::new(start, self.offset());
            self.error(diagnostics::UnicodeEscapeSequence(range));
            return;
        };

        // For Identifiers, surrogate pair is an invalid grammar, e.g. `var \uD800\uDEA7`.
        let ch = match value {
            SurrogatePair::Astral(..) | SurrogatePair::HighLow(..) => {
                let range = Span::new(start, self.offset());
                self.error(diagnostics::UnicodeEscapeSequence(range));
                return;
            }
            SurrogatePair::CodePoint(code_point) => {
                if let Ok(ch) = char::try_from(code_point) {
                    ch
                } else {
                    let range = Span::new(start, self.offset());
                    self.error(diagnostics::UnicodeEscapeSequence(range));
                    return;
                }
            }
        };

        let is_valid = if check_identifier_start {
            is_identifier_start_all(ch)
        } else {
            is_identifier_part(ch)
        };

        if !is_valid {
            self.error(diagnostics::InvalidCharacter(ch, self.current_offset()));
            return;
        }

        builder.push_different(ch);
    }

    /// String `UnicodeEscapeSequence`
    ///   \u `Hex4Digits`
    ///   \u `Hex4Digits` \u `Hex4Digits`
    ///   \u{ `CodePoint` }
    fn string_unicode_escape_sequence(
        &mut self,
        text: &mut String<'a>,
        is_valid_escape_sequence: &mut bool,
    ) {
        let value = match self.peek() {
            Some('{') => self.unicode_code_point(),
            _ => self.surrogate_pair(),
        };

        let Some(value) = value else {
            // error raised within the parser by `diagnostics::TemplateLiteral`
            *is_valid_escape_sequence = false;
            return;
        };

        // For strings and templates, surrogate pairs are valid grammar, e.g. `"\uD83D\uDE00" === 😀`
        // values are interpreted as is if they fall out of range
        match value {
            SurrogatePair::CodePoint(code_point) | SurrogatePair::Astral(code_point) => {
                if let Ok(ch) = char::try_from(code_point) {
                    text.push(ch);
                } else {
                    text.push_str("\\u");
                    text.push_str(format!("{code_point:x}").as_str());
                }
            }
            SurrogatePair::HighLow(high, low) => {
                text.push_str("\\u");
                text.push_str(format!("{high:x}").as_str());
                text.push_str("\\u");
                text.push_str(format!("{low:x}").as_str());
            }
        }
    }

    fn unicode_code_point(&mut self) -> Option<SurrogatePair> {
        if !self.next_eq('{') {
            return None;
        }
        let value = self.code_point()?;
        if !self.next_eq('}') {
            return None;
        }
        Some(SurrogatePair::CodePoint(value))
    }

    fn hex_4_digits(&mut self) -> Option<u32> {
        let mut value = 0;
        for _ in 0..4 {
            value = (value << 4) | self.hex_digit()?;
        }
        Some(value)
    }

    fn hex_digit(&mut self) -> Option<u32> {
        let value = match self.peek() {
            Some(c @ '0'..='9') => c as u32 - '0' as u32,
            Some(c @ 'a'..='f') => 10 + (c as u32 - 'a' as u32),
            Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32),
            _ => return None,
        };
        self.current.chars.next();
        Some(value)
    }

    fn code_point(&mut self) -> Option<u32> {
        let mut value = self.hex_digit()?;
        while let Some(next) = self.hex_digit() {
            value = (value << 4) | next;
            if value > 0x0010_FFFF {
                return None;
            }
        }
        Some(value)
    }

    /// Surrogate pairs
    /// See background info:
    ///   * `https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae`
    ///   * `https://mathiasbynens.be/notes/javascript-identifiers-es6`
    fn surrogate_pair(&mut self) -> Option<SurrogatePair> {
        let high = self.hex_4_digits()?;
        // The first code unit of a surrogate pair is always in the range from 0xD800 to 0xDBFF, and is called a high surrogate or a lead surrogate.
        if !((0xD800..=0xDBFF).contains(&high)
            && self.peek() == Some('\\')
            && self.peek2() == Some('u'))
        {
            return Some(SurrogatePair::CodePoint(high));
        }

        self.current.chars.next();
        self.current.chars.next();

        let low = self.hex_4_digits()?;

        // The second code unit of a surrogate pair is always in the range from 0xDC00 to 0xDFFF, and is called a low surrogate or a trail surrogate.
        if !(0xDC00..=0xDFFF).contains(&low) {
            return Some(SurrogatePair::HighLow(high, low));
        }

        // `https://tc39.es/ecma262/#sec-utf16decodesurrogatepair`
        let astral_code_point = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000;

        Some(SurrogatePair::Astral(astral_code_point))
    }

    // EscapeSequence ::
    fn read_string_escape_sequence(
        &mut self,
        text: &mut String<'a>,
        in_template: bool,
        is_valid_escape_sequence: &mut bool,
    ) {
        match self.current.chars.next() {
            None => {
                self.error(diagnostics::UnterminatedString(self.unterminated_range()));
            }
            Some(c) => match c {
                // \ LineTerminatorSequence
                // LineTerminatorSequence ::
                // <LF>
                // <CR> [lookahead ≠ <LF>]
                // <LS>
                // <PS>
                // <CR> <LF>
                LF | LS | PS => {}
                CR => {
                    self.next_eq(LF);
                }
                // SingleEscapeCharacter :: one of
                //   ' " \ b f n r t v
                '\'' | '"' | '\\' => text.push(c),
                'b' => text.push('\u{8}'),
                'f' => text.push(FF),
                'n' => text.push(LF),
                'r' => text.push(CR),
                't' => text.push(TAB),
                'v' => text.push(VT),
                // HexEscapeSequence
                'x' => {
                    self.hex_digit()
                        .and_then(|value1| {
                            let value2 = self.hex_digit()?;
                            Some((value1, value2))
                        })
                        .map(|(value1, value2)| (value1 << 4) | value2)
                        .and_then(|value| char::try_from(value).ok())
                        .map_or_else(
                            || {
                                *is_valid_escape_sequence = false;
                            },
                            |c| {
                                text.push(c);
                            },
                        );
                }
                // UnicodeEscapeSequence
                'u' => {
                    self.string_unicode_escape_sequence(text, is_valid_escape_sequence);
                }
                // 0 [lookahead ∉ DecimalDigit]
                '0' if !self.peek().is_some_and(|c| c.is_ascii_digit()) => text.push('\0'),
                // Section 12.8.4 String Literals
                // LegacyOctalEscapeSequence
                // NonOctalDecimalEscapeSequence
                a @ '0'..='7' if !in_template => {
                    let mut num = String::new_in(self.allocator);
                    num.push(a);
                    match a {
                        '4'..='7' => {
                            if matches!(self.peek(), Some('0'..='7')) {
                                let b = self.current.chars.next().unwrap();
                                num.push(b);
                            }
                        }
                        '0'..='3' => {
                            if matches!(self.peek(), Some('0'..='7')) {
                                let b = self.current.chars.next().unwrap();
                                num.push(b);
                                if matches!(self.peek(), Some('0'..='7')) {
                                    let c = self.current.chars.next().unwrap();
                                    num.push(c);
                                }
                            }
                        }
                        _ => {}
                    }

                    let value =
                        char::from_u32(u32::from_str_radix(num.as_str(), 8).unwrap()).unwrap();
                    text.push(value);
                }
                '0' if in_template && self.peek().is_some_and(|c| c.is_ascii_digit()) => {
                    self.current.chars.next();
                    // error raised within the parser by `diagnostics::TemplateLiteral`
                    *is_valid_escape_sequence = false;
                }
                // NotEscapeSequence :: DecimalDigit but not 0
                '1'..='9' if in_template => {
                    // error raised within the parser by `diagnostics::TemplateLiteral`
                    *is_valid_escape_sequence = false;
                }
                other => {
                    // NonOctalDecimalEscapeSequence \8 \9 in strict mode
                    text.push(other);
                }
            },
        }
    }
}

enum SurrogatePair {
    // valid \u Hex4Digits \u Hex4Digits
    Astral(u32),
    // valid \u Hex4Digits
    CodePoint(u32),
    // invalid \u Hex4Digits \u Hex4Digits
    HighLow(u32, u32),
}

type ByteHandler = fn(&mut Lexer<'_>) -> Kind;

/// Lookup table mapping any incoming byte to a handler function defined below.
/// <https://github.com/ratel-rust/ratel-core/blob/master/ratel/src/lexer/mod.rs>
#[rustfmt::skip]
static BYTE_HANDLERS: [ByteHandler; 128] = [
//  0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F    //
    ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, SPS, LIN, SPS, SPS, LIN, ERR, ERR, // 0
    ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1
    SPS, EXL, QOT, HAS, IDT, PRC, AMP, QOT, PNO, PNC, ATR, PLS, COM, MIN, PRD, SLH, // 2
    ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, GTR, QST, // 3
    AT_, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, // 4
    IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, ESC, BTC, CRT, IDT, // 5
    TPL, L_A, L_B, L_C, L_D, L_E, L_F, L_G, IDT, L_I, IDT, L_K, L_L, L_M, L_N, L_O, // 6
    L_P, IDT, L_R, L_S, L_T, L_U, L_V, L_W, IDT, L_Y, IDT, BEO, PIP, BEC, TLD, ERR, // 7
];

const ERR: ByteHandler = |lexer| {
    let c = lexer.consume_char();
    lexer.error(diagnostics::InvalidCharacter(c, lexer.unterminated_range()));
    Kind::Undetermined
};

// <TAB> <VT> <FF>
const SPS: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::WhiteSpace
};

// '\r' '\n'
const LIN: ByteHandler = |lexer| {
    lexer.consume_char();
    lexer.current.token.is_on_new_line = true;
    Kind::NewLine
};

// !
const EXL: ByteHandler = |lexer| {
    lexer.consume_char();
    if lexer.next_eq('=') {
        if lexer.next_eq('=') {
            Kind::Neq2
        } else {
            Kind::Neq
        }
    } else {
        Kind::Bang
    }
};

// ' "
const QOT: ByteHandler = |lexer| {
    let c = lexer.consume_char();
    if lexer.context == LexerContext::JsxAttributeValue {
        lexer.read_jsx_string_literal(c)
    } else {
        lexer.read_string_literal(c)
    }
};

// #
const HAS: ByteHandler = |lexer| {
    let mut builder = AutoCow::new(lexer);
    let c = lexer.consume_char();
    builder.push_matching(c);
    // HashbangComment ::
    //     `#!` SingleLineCommentChars?
    if lexer.current.token.start == 0 && lexer.next_eq('!') {
        lexer.read_hashbang_comment()
    } else {
        builder.get_mut_string_without_current_ascii_char(lexer);
        lexer.private_identifier(builder)
    }
};

const IDT: ByteHandler = |lexer| {
    lexer.identifier_name_handler();
    Kind::Ident
};

// %
const PRC: ByteHandler = |lexer| {
    lexer.consume_char();
    if lexer.next_eq('=') {
        Kind::PercentEq
    } else {
        Kind::Percent
    }
};

// &
const AMP: ByteHandler = |lexer| {
    lexer.consume_char();
    if lexer.next_eq('&') {
        if lexer.next_eq('=') {
            Kind::Amp2Eq
        } else {
            Kind::Amp2
        }
    } else if lexer.next_eq('=') {
        Kind::AmpEq
    } else {
        Kind::Amp
    }
};

// (
const PNO: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::LParen
};

// )
const PNC: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::RParen
};

// *
const ATR: ByteHandler = |lexer| {
    lexer.consume_char();
    if lexer.next_eq('*') {
        if lexer.next_eq('=') {
            Kind::Star2Eq
        } else {
            Kind::Star2
        }
    } else if lexer.next_eq('=') {
        Kind::StarEq
    } else {
        Kind::Star
    }
};

// +
const PLS: ByteHandler = |lexer| {
    lexer.consume_char();
    if lexer.next_eq('+') {
        Kind::Plus2
    } else if lexer.next_eq('=') {
        Kind::PlusEq
    } else {
        Kind::Plus
    }
};

// ,
const COM: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::Comma
};

// -
const MIN: ByteHandler = |lexer| {
    lexer.consume_char();
    lexer.read_minus().unwrap_or_else(|| lexer.skip_single_line_comment())
};

// .
const PRD: ByteHandler = |lexer| {
    let mut builder = AutoCow::new(lexer);
    let c = lexer.consume_char();
    builder.push_matching(c);
    let kind = lexer.read_dot(&mut builder);
    if kind.is_number() {
        lexer.set_numeric_value(kind, builder.finish(lexer));
    }
    kind
};

// /
const SLH: ByteHandler = |lexer| {
    lexer.consume_char();
    match lexer.peek() {
        Some('/') => {
            lexer.current.chars.next();
            lexer.skip_single_line_comment()
        }
        Some('*') => {
            lexer.current.chars.next();
            lexer.skip_multi_line_comment()
        }
        _ => {
            // regex is handled separately, see `next_regex`
            if lexer.next_eq('=') {
                Kind::SlashEq
            } else {
                Kind::Slash
            }
        }
    }
};

// 0
const ZER: ByteHandler = |lexer| {
    let mut builder = AutoCow::new(lexer);
    let c = lexer.consume_char();
    builder.push_matching(c);
    let kind = lexer.read_zero(&mut builder);
    lexer.set_numeric_value(kind, builder.finish(lexer));
    kind
};

// 1 to 9
const DIG: ByteHandler = |lexer| {
    let mut builder = AutoCow::new(lexer);
    let c = lexer.consume_char();
    builder.push_matching(c);
    let kind = lexer.decimal_literal_after_first_digit(&mut builder);
    lexer.set_numeric_value(kind, builder.finish(lexer));
    kind
};

// :
const COL: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::Colon
};

// ;
const SEM: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::Semicolon
};

// <
const LSS: ByteHandler = |lexer| {
    lexer.consume_char();
    lexer.read_left_angle().unwrap_or_else(|| lexer.skip_single_line_comment())
};

// =
const EQL: ByteHandler = |lexer| {
    lexer.consume_char();
    if lexer.next_eq('=') {
        if lexer.next_eq('=') {
            Kind::Eq3
        } else {
            Kind::Eq2
        }
    } else if lexer.next_eq('>') {
        Kind::Arrow
    } else {
        Kind::Eq
    }
};

// >
const GTR: ByteHandler = |lexer| {
    lexer.consume_char();
    // `>=` is re-lexed with [Lexer::next_jsx_child]
    Kind::RAngle
};

// ?
const QST: ByteHandler = |lexer| {
    lexer.consume_char();
    if lexer.next_eq('?') {
        if lexer.next_eq('=') {
            Kind::Question2Eq
        } else {
            Kind::Question2
        }
    } else if lexer.peek() == Some('.') {
        // parse `?.1` as `?` `.1`
        if lexer.peek2().is_some_and(|c| c.is_ascii_digit()) {
            Kind::Question
        } else {
            lexer.current.chars.next();
            Kind::QuestionDot
        }
    } else {
        Kind::Question
    }
};

// @
const AT_: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::At
};

// [
const BTO: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::LBrack
};

// \
const ESC: ByteHandler = |lexer| {
    let mut builder = AutoCow::new(lexer);
    let c = lexer.consume_char();
    builder.push_matching(c);
    builder.force_allocation_without_current_ascii_char(lexer);
    lexer.identifier_unicode_escape_sequence(&mut builder, true);
    let text = lexer.identifier_name(builder);
    Kind::match_keyword(text)
};

// ]
const BTC: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::RBrack
};

// ^
const CRT: ByteHandler = |lexer| {
    lexer.consume_char();
    if lexer.next_eq('=') {
        Kind::CaretEq
    } else {
        Kind::Caret
    }
};

// `
const TPL: ByteHandler = |lexer| {
    lexer.consume_char();
    lexer.read_template_literal(Kind::TemplateHead, Kind::NoSubstitutionTemplate)
};

// {
const BEO: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::LCurly
};

// |
const PIP: ByteHandler = |lexer| {
    lexer.consume_char();
    if lexer.next_eq('|') {
        if lexer.next_eq('=') {
            Kind::Pipe2Eq
        } else {
            Kind::Pipe2
        }
    } else if lexer.next_eq('=') {
        Kind::PipeEq
    } else {
        Kind::Pipe
    }
};

// }
const BEC: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::RCurly
};

// ~
const TLD: ByteHandler = |lexer| {
    lexer.consume_char();
    Kind::Tilde
};

const L_A: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "wait" => Kind::Await,
    "sync" => Kind::Async,
    "bstract" => Kind::Abstract,
    "ccessor" => Kind::Accessor,
    "ny" => Kind::Any,
    "s" => Kind::As,
    "ssert" => Kind::Assert,
    "sserts" => Kind::Asserts,
    _ => Kind::Ident,
};

const L_B: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "reak" => Kind::Break,
    "oolean" => Kind::Boolean,
    "igint" => Kind::BigInt,
    _ => Kind::Ident,
};

const L_C: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "onst" => Kind::Const,
    "lass" => Kind::Class,
    "ontinue" => Kind::Continue,
    "atch" => Kind::Catch,
    "ase" => Kind::Case,
    "onstructor" => Kind::Constructor,
    _ => Kind::Ident,
};

const L_D: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "o" => Kind::Do,
    "elete" => Kind::Delete,
    "eclare" => Kind::Declare,
    "efault" => Kind::Default,
    "ebugger" => Kind::Debugger,
    _ => Kind::Ident,
};

const L_E: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "lse" => Kind::Else,
    "num" => Kind::Enum,
    "xport" => Kind::Export,
    "xtends" => Kind::Extends,
    _ => Kind::Ident,
};

const L_F: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "unction" => Kind::Function,
    "alse" => Kind::False,
    "or" => Kind::For,
    "inally" => Kind::Finally,
    "rom" => Kind::From,
    _ => Kind::Ident,
};

const L_G: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "et" => Kind::Get,
    "lobal" => Kind::Global,
    _ => Kind::Ident,
};

const L_I: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "f" => Kind::If,
    "nstanceof" => Kind::Instanceof,
    "n" => Kind::In,
    "mplements" => Kind::Implements,
    "mport" => Kind::Import,
    "nfer" => Kind::Infer,
    "nterface" => Kind::Interface,
    "ntrinsic" => Kind::Intrinsic,
    "s" => Kind::Is,
    _ => Kind::Ident,
};

const L_K: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "eyof" => Kind::KeyOf,
    _ => Kind::Ident,
};

const L_L: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "et" => Kind::Let,
    _ => Kind::Ident,
};

const L_M: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "eta" => Kind::Meta,
    "odule" => Kind::Module,
    _ => Kind::Ident,
};

const L_N: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "ull" => Kind::Null,
    "ew" => Kind::New,
    "umber" => Kind::Number,
    "amespace" => Kind::Namespace,
    "ever" => Kind::Never,
    _ => Kind::Ident,
};

const L_O: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "f" => Kind::Of,
    "bject" => Kind::Object,
    "ut" => Kind::Out,
    "verride" => Kind::Override,
    _ => Kind::Ident,
};

const L_P: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "ackage" => Kind::Package,
    "rivate" => Kind::Private,
    "rotected" => Kind::Protected,
    "ublic" => Kind::Public,
    _ => Kind::Ident,
};

const L_R: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "eturn" => Kind::Return,
    "equire" => Kind::Require,
    "eadonly" => Kind::Readonly,
    _ => Kind::Ident,
};

const L_S: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "et" => Kind::Set,
    "uper" => Kind::Super,
    "witch" => Kind::Switch,
    "tatic" => Kind::Static,
    "ymbol" => Kind::Symbol,
    "tring" => Kind::String,
    "atisfies" => Kind::Satisfies,
    _ => Kind::Ident,
};

const L_T: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "his" => Kind::This,
    "rue" => Kind::True,
    "hrow" => Kind::Throw,
    "ry" => Kind::Try,
    "ypeof" => Kind::Typeof,
    "arget" => Kind::Target,
    "ype" => Kind::Type,
    _ => Kind::Ident,
};

const L_U: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "ndefined" => Kind::Undefined,
    "sing" => Kind::Using,
    "nique" => Kind::Unique,
    "nknown" => Kind::Unknown,
    _ => Kind::Ident,
};

const L_V: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "ar" => Kind::Var,
    "oid" => Kind::Void,
    _ => Kind::Ident,
};

const L_W: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "hile" => Kind::While,
    "ith" => Kind::With,
    _ => Kind::Ident,
};

const L_Y: ByteHandler = |lexer| match &lexer.identifier_name_handler()[1..] {
    "ield" => Kind::Yield,
    _ => Kind::Ident,
};