mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 20:32:10 +00:00
perf(parser): faster lexing JSX identifiers (#2557)
Speed up lexing JSX identifier continuations (i.e. after `-`), by searching for end of identifier byte-by-byte. Change does not register on benchmarks, only because benchmarks don't contain any `<Foo-Bar />` identifiers, so don't exercise this code path.
This commit is contained in:
parent
22c84c546a
commit
e793063f75
1 changed files with 36 additions and 9 deletions
|
|
@ -1,9 +1,16 @@
|
|||
use super::{Kind, Lexer, Token};
|
||||
use super::{
|
||||
cold_branch,
|
||||
search::{byte_search, safe_byte_match_table, SafeByteMatchTable},
|
||||
Kind, Lexer, Token,
|
||||
};
|
||||
use crate::diagnostics;
|
||||
|
||||
use memchr::{memchr, memchr2};
|
||||
use oxc_syntax::identifier::is_identifier_part;
|
||||
|
||||
static NOT_ASCII_JSX_ID_CONTINUE_TABLE: SafeByteMatchTable =
|
||||
safe_byte_match_table!(|b| !(b.is_ascii_alphanumeric() || matches!(b, b'_' | b'$' | b'-')));
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// `JSXDoubleStringCharacters` ::
|
||||
/// `JSXDoubleStringCharacter` `JSXDoubleStringCharactersopt`
|
||||
|
|
@ -92,19 +99,39 @@ impl<'a> Lexer<'a> {
|
|||
/// `JSXIdentifier` `IdentifierPart`
|
||||
/// `JSXIdentifier` [no `WhiteSpace` or Comment here] -
|
||||
pub(crate) fn continue_lex_jsx_identifier(&mut self) -> Option<Token> {
|
||||
if self.peek() != Some('-') {
|
||||
if self.source.peek_byte() != Some(b'-') {
|
||||
return None;
|
||||
}
|
||||
self.consume_char();
|
||||
while let Some(c) = self.peek() {
|
||||
if c == '-' || is_identifier_part(c) {
|
||||
self.consume_char();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the current lookahead `Minus` Token
|
||||
self.lookahead.clear();
|
||||
|
||||
// Consume bytes which are part of identifier tail
|
||||
let next_byte = byte_search! {
|
||||
lexer: self,
|
||||
table: NOT_ASCII_JSX_ID_CONTINUE_TABLE,
|
||||
handle_eof: {
|
||||
return Some(self.finish_next(Kind::Ident));
|
||||
},
|
||||
};
|
||||
|
||||
// Found a matching byte.
|
||||
// Either end of identifier found, or a Unicode char.
|
||||
if !next_byte.is_ascii() {
|
||||
// Unicode chars are rare in identifiers, so cold branch to keep common path for ASCII
|
||||
// as fast as possible
|
||||
cold_branch(|| {
|
||||
while let Some(c) = self.peek() {
|
||||
if c == '-' || is_identifier_part(c) {
|
||||
self.consume_char();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Some(self.finish_next(Kind::Ident))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue