mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
perf(parser): lexer match byte not char (#2025)
2 related changes to lexer's `read_next_token()`: 1. Hint to branch predictor that unicode identifiers and non-standard whitespace are rare by marking that branch `#[cold]`. 2. The branch is on whether next character is ASCII or not. This check only requires reading 1 byte, as ASCII characters are always single byte in UTF8. So only do the work of getting a `char` in the cold path, once it's established that character is not ASCII and this work is required.
This commit is contained in:
parent
a356918d83
commit
60a927d8f5
1 changed files with 20 additions and 17 deletions
|
|
@ -374,28 +374,31 @@ impl<'a> Lexer<'a> {
|
||||||
let offset = self.offset();
|
let offset = self.offset();
|
||||||
self.current.token.start = offset;
|
self.current.token.start = offset;
|
||||||
|
|
||||||
if let Some(c) = self.current.chars.clone().next() {
|
let remaining = self.current.chars.as_str();
|
||||||
let kind = self.match_char(c);
|
if remaining.is_empty() {
|
||||||
if !matches!(
|
|
||||||
kind,
|
|
||||||
Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
|
|
||||||
) {
|
|
||||||
return kind;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return Kind::Eof;
|
return Kind::Eof;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let byte = remaining.as_bytes()[0];
|
||||||
|
let kind = if byte < 128 {
|
||||||
|
BYTE_HANDLERS[byte as usize](self)
|
||||||
|
} else {
|
||||||
|
self.match_unicode_char()
|
||||||
|
};
|
||||||
|
|
||||||
|
if !matches!(
|
||||||
|
kind,
|
||||||
|
Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
|
||||||
|
) {
|
||||||
|
return kind;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
// `#[cold]` to hint to branch predictor that unicode identifiers and irregular whitespace are rare
|
||||||
fn match_char(&mut self, c: char) -> Kind {
|
#[cold]
|
||||||
let size = c as usize;
|
fn match_unicode_char(&mut self) -> Kind {
|
||||||
|
let c = self.current.chars.clone().next().unwrap();
|
||||||
if size < 128 {
|
|
||||||
return BYTE_HANDLERS[size](self);
|
|
||||||
}
|
|
||||||
|
|
||||||
match c {
|
match c {
|
||||||
c if is_id_start_unicode(c) => {
|
c if is_id_start_unicode(c) => {
|
||||||
let mut builder = AutoCow::new(self);
|
let mut builder = AutoCow::new(self);
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue