mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 04:08:41 +00:00
perf(parser): optimize Lexer::hex_digit (#4572)
Optimize `Lexer::hex_digit`. Rather than checking for `A-F` and `a-f` separately, can check for them both in one go. `b'A' | 32 == b'a'` (and same for all other alphabetic letters) so matching against `b | 32` allows checking for all matching letters, lower or upper case, in one operation.
This commit is contained in:
parent
247b2afae7
commit
25679e6277
1 changed files with 23 additions and 7 deletions
|
|
@ -160,14 +160,30 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
|
||||
fn hex_digit(&mut self) -> Option<u32> {
|
||||
let value = match self.peek_byte() {
|
||||
Some(b @ b'0'..=b'9') => u32::from(b) - '0' as u32,
|
||||
Some(b @ b'a'..=b'f') => 10 + (u32::from(b) - 'a' as u32),
|
||||
Some(b @ b'A'..=b'F') => 10 + (u32::from(b) - 'A' as u32),
|
||||
_ => return None,
|
||||
// Reduce instructions and remove 1 branch by comparing against `A-F` and `a-f` simultaneously
|
||||
// https://godbolt.org/z/9caMMzvP3
|
||||
let value = if let Some(b) = self.peek_byte() {
|
||||
if b.is_ascii_digit() {
|
||||
b - b'0'
|
||||
} else {
|
||||
// Match `A-F` or `a-f`. `b | 32` converts uppercase letters to lowercase,
|
||||
// but leaves lowercase as they are
|
||||
let lower_case = b | 32;
|
||||
if matches!(lower_case, b'a'..=b'f') {
|
||||
lower_case + 10 - b'a'
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
self.consume_char();
|
||||
Some(value)
|
||||
// Because of `b | 32` above, compiler cannot deduce that next byte is definitely ASCII
|
||||
// so `next_byte_unchecked` is necessary to produce compact assembly, rather than `consume_char`.
|
||||
// SAFETY: This code is only reachable if there is a byte remaining, and it's ASCII.
|
||||
// Therefore it's safe to consume that byte, and will leave position on a UTF-8 char boundary.
|
||||
unsafe { self.source.next_byte_unchecked() };
|
||||
Some(u32::from(value))
|
||||
}
|
||||
|
||||
fn code_point(&mut self) -> Option<u32> {
|
||||
|
|
|
|||
Loading…
Reference in a new issue