mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 04:08:41 +00:00
refactor(parser): add Lexer::consume_2_chars (#4569)
Add `Lexer::consume_2_chars` to replace `lexer.consume_char(); lexer.consume_char();`. Mostly this is just neater code, but *may* also help compiler to elide bounds checks when it's preceded by `lexer.peek_2_bytes()`.
This commit is contained in:
parent
649913e6cd
commit
9e5be78af5
5 changed files with 45 additions and 6 deletions
|
|
@ -417,8 +417,7 @@ ascii_byte_handler!(QST(lexer) {
|
|||
match next_2_bytes[0] {
|
||||
b'?' => {
|
||||
if next_2_bytes[1] == b'=' {
|
||||
lexer.consume_char();
|
||||
lexer.consume_char();
|
||||
lexer.consume_2_chars();
|
||||
Kind::Question2Eq
|
||||
} else {
|
||||
lexer.consume_char();
|
||||
|
|
|
|||
|
|
@ -251,6 +251,18 @@ impl<'a> Lexer<'a> {
|
|||
self.source.next_char().unwrap()
|
||||
}
|
||||
|
||||
/// Consume the current char and the next if not at EOF
|
||||
#[inline]
|
||||
fn next_2_chars(&mut self) -> Option<[char; 2]> {
|
||||
self.source.next_2_chars()
|
||||
}
|
||||
|
||||
/// Consume the current char and the next
|
||||
#[inline]
|
||||
fn consume_2_chars(&mut self) -> [char; 2] {
|
||||
self.next_2_chars().unwrap()
|
||||
}
|
||||
|
||||
/// Peek the next byte without advancing the position
|
||||
#[inline]
|
||||
fn peek_byte(&self) -> Option<u8> {
|
||||
|
|
|
|||
|
|
@ -4,8 +4,7 @@ impl<'a> Lexer<'a> {
|
|||
/// Section 12.8 Punctuators
|
||||
pub(super) fn read_dot(&mut self) -> Kind {
|
||||
if self.peek_2_bytes() == Some([b'.', b'.']) {
|
||||
self.consume_char();
|
||||
self.consume_char();
|
||||
self.consume_2_chars();
|
||||
return Kind::Dot3;
|
||||
}
|
||||
if self.peek_byte().is_some_and(|b| b.is_ascii_digit()) {
|
||||
|
|
|
|||
|
|
@ -374,6 +374,36 @@ impl<'a> Source<'a> {
|
|||
Some(c)
|
||||
}
|
||||
|
||||
/// Get next 2 chars of source, and advance position to after them.
|
||||
#[inline]
|
||||
pub(super) fn next_2_chars(&mut self) -> Option<[char; 2]> {
|
||||
// Check not at EOF and handle if 2 x ASCII bytes
|
||||
let [byte1, byte2] = self.peek_2_bytes()?;
|
||||
if byte1.is_ascii() && byte2.is_ascii() {
|
||||
// SAFETY: We just checked that there are at least 2 bytes remaining,
|
||||
// and next 2 bytes are ASCII, so advancing by 2 bytes must put `ptr`
|
||||
// in bounds and on a UTF-8 character boundary
|
||||
unsafe { self.ptr = self.ptr.add(2) };
|
||||
return Some([byte1 as char, byte2 as char]);
|
||||
}
|
||||
|
||||
// Multi-byte Unicode character.
|
||||
// Check invariant that `ptr` is on a UTF-8 character boundary.
|
||||
debug_assert!(!is_utf8_cont_byte(byte1));
|
||||
|
||||
// Create a `Chars` iterator, get next 2 chars from it, and then update `self.ptr`
|
||||
// to match `Chars` iterator's updated pointer afterwards.
|
||||
// `Chars` iterator upholds same invariants as `Source`, so its pointer is guaranteed
|
||||
// to be valid as `self.ptr`.
|
||||
let mut chars = self.remaining().chars();
|
||||
// SAFETY: We know that there's 2 bytes to be consumed, so first call to
|
||||
// `chars.next()` must return `Some(_)`
|
||||
let c1 = unsafe { chars.next().unwrap_unchecked() };
|
||||
let c2 = chars.next()?;
|
||||
self.ptr = chars.as_str().as_ptr();
|
||||
Some([c1, c2])
|
||||
}
|
||||
|
||||
/// Get next byte of source, and advance position to after it.
|
||||
///
|
||||
/// # SAFETY
|
||||
|
|
|
|||
|
|
@ -194,8 +194,7 @@ impl<'a> Lexer<'a> {
|
|||
return Some(SurrogatePair::CodePoint(high));
|
||||
}
|
||||
|
||||
self.consume_char();
|
||||
self.consume_char();
|
||||
self.consume_2_chars();
|
||||
|
||||
let low = self.hex_4_digits()?;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue