refactor(parser): fetch 2 bytes in ? byte handler (#4563)

Lexer's `?` byte handler needs to fetch next 2 bytes, so do that in one shot, rather than bounds-checking twice.
This commit is contained in:
overlookmotel 2024-07-31 00:28:49 +00:00
parent 565eccf631
commit 0acc4a726e
3 changed files with 31 additions and 37 deletions

View file

@ -412,22 +412,39 @@ ascii_byte_handler!(GTR(lexer) {
// ?
ascii_byte_handler!(QST(lexer) {
lexer.consume_char();
if lexer.next_ascii_char_eq(b'?') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Question2Eq
} else {
Kind::Question2
}
} else if lexer.peek_byte() == Some(b'.') {
// parse `?.1` as `?` `.1`
if lexer.peek_char2().is_some_and(|c| c.is_ascii_digit()) {
Kind::Question
} else {
lexer.consume_char();
Kind::QuestionDot
if let Some(next_2_bytes) = lexer.peek_2_bytes() {
match next_2_bytes[0] {
b'?' => {
if next_2_bytes[1] == b'=' {
lexer.consume_char();
lexer.consume_char();
Kind::Question2Eq
} else {
lexer.consume_char();
Kind::Question2
}
}
// parse `?.1` as `?` `.1`
b'.' if !next_2_bytes[1].is_ascii_digit() => {
lexer.consume_char();
Kind::QuestionDot
}
_ => Kind::Question,
}
} else {
Kind::Question
// At EOF, or only 1 byte left
match lexer.peek_byte() {
Some(b'?') => {
lexer.consume_char();
Kind::Question2
}
Some(b'.') => {
lexer.consume_char();
Kind::QuestionDot
}
_ => Kind::Question,
}
}
});

View file

@ -269,12 +269,6 @@ impl<'a> Lexer<'a> {
self.source.peek_char()
}
/// Peek the next next char without advancing the position
#[inline]
fn peek_char2(&self) -> Option<char> {
self.source.peek_char2()
}
/// Peek the next byte, and advance the current position if it matches
/// the given ASCII char.
// `#[inline(always)]` to make sure the `assert!` gets optimized out.

View file

@ -470,23 +470,6 @@ impl<'a> Source<'a> {
Some(c)
}
/// Peek next next char of source, without consuming it.
#[inline]
pub(super) fn peek_char2(&self) -> Option<char> {
// Handle EOF
if self.is_eof() {
return None;
}
// Check invariant that `ptr` is on a UTF-8 character boundary.
debug_assert!(!is_utf8_cont_byte(self.peek_byte().unwrap()));
let mut chars = self.remaining().chars();
// SAFETY: We already checked not at EOF, so `chars.next()` must return `Some(_)`
unsafe { chars.next().unwrap_unchecked() };
chars.next()
}
/// Peek next byte of source without consuming it.
#[inline]
pub(super) fn peek_byte(&self) -> Option<u8> {