mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
perf(parser): optimize Lexer::hex_digit (#4572)
Optimize `Lexer::hex_digit`. Rather than checking for `A-F` and `a-f` separately, can check for them both in one go. `b'A' | 32 == b'a'` (and same for all other alphabetic letters) so matching against `b | 32` allows checking for all matching letters, lower or upper case, in one operation.
This commit is contained in:
parent
247b2afae7
commit
25679e6277
1 changed files with 23 additions and 7 deletions
|
|
@ -160,14 +160,30 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hex_digit(&mut self) -> Option<u32> {
|
fn hex_digit(&mut self) -> Option<u32> {
|
||||||
let value = match self.peek_byte() {
|
// Reduce instructions and remove 1 branch by comparing against `A-F` and `a-f` simultaneously
|
||||||
Some(b @ b'0'..=b'9') => u32::from(b) - '0' as u32,
|
// https://godbolt.org/z/9caMMzvP3
|
||||||
Some(b @ b'a'..=b'f') => 10 + (u32::from(b) - 'a' as u32),
|
let value = if let Some(b) = self.peek_byte() {
|
||||||
Some(b @ b'A'..=b'F') => 10 + (u32::from(b) - 'A' as u32),
|
if b.is_ascii_digit() {
|
||||||
_ => return None,
|
b - b'0'
|
||||||
|
} else {
|
||||||
|
// Match `A-F` or `a-f`. `b | 32` converts uppercase letters to lowercase,
|
||||||
|
// but leaves lowercase as they are
|
||||||
|
let lower_case = b | 32;
|
||||||
|
if matches!(lower_case, b'a'..=b'f') {
|
||||||
|
lower_case + 10 - b'a'
|
||||||
|
} else {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return None;
|
||||||
};
|
};
|
||||||
self.consume_char();
|
// Because of `b | 32` above, compiler cannot deduce that next byte is definitely ASCII
|
||||||
Some(value)
|
// so `next_byte_unchecked` is necessary to produce compact assembly, rather than `consume_char`.
|
||||||
|
// SAFETY: This code is only reachable if there is a byte remaining, and it's ASCII.
|
||||||
|
// Therefore it's safe to consume that byte, and will leave position on a UTF-8 char boundary.
|
||||||
|
unsafe { self.source.next_byte_unchecked() };
|
||||||
|
Some(u32::from(value))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn code_point(&mut self) -> Option<u32> {
|
fn code_point(&mut self) -> Option<u32> {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue