perf(parser): optimize conditional advance on ASCII values (#4298)

Part of https://github.com/oxc-project/oxc/issues/3291.
2026-05-19 12:19:15 +00:00 · 2024-07-27 01:17:25 +00:00 · 2024-07-27 01:17:25 +00:00 · 868fc87885
commit 868fc87885
parent e2735ca2c5
6 changed files with 69 additions and 46 deletions
--- a/crates/oxc_parser/src/lexer/byte_handlers.rs
+++ b/crates/oxc_parser/src/lexer/byte_handlers.rs
@ -209,8 +209,8 @@ ascii_byte_handler!(LIN(lexer) {
 // !
 ascii_byte_handler!(EXL(lexer) {
    lexer.consume_char();
-    if lexer.next_eq('=') {
-        if lexer.next_eq('=') {
+    if lexer.next_ascii_char_eq(b'=') {
+        if lexer.next_ascii_char_eq(b'=') {
            Kind::Neq2
        } else {
            Kind::Neq
@ -237,7 +237,7 @@ ascii_byte_handler!(HAS(lexer) {
    lexer.consume_char();
    // HashbangComment ::
    //     `#!` SingleLineCommentChars?
-    if lexer.token.start == 0 && lexer.next_eq('!') {
+    if lexer.token.start == 0 && lexer.next_ascii_char_eq(b'!') {
        lexer.read_hashbang_comment()
    } else {
        lexer.private_identifier()
@ -252,7 +252,7 @@ ascii_identifier_handler!(IDT(_id_without_first_char) {
 // %
 ascii_byte_handler!(PRC(lexer) {
    lexer.consume_char();
-    if lexer.next_eq('=') {
+    if lexer.next_ascii_char_eq(b'=') {
        Kind::PercentEq
    } else {
        Kind::Percent
@ -262,13 +262,13 @@ ascii_byte_handler!(PRC(lexer) {
 // &
 ascii_byte_handler!(AMP(lexer) {
    lexer.consume_char();
-    if lexer.next_eq('&') {
-        if lexer.next_eq('=') {
+    if lexer.next_ascii_char_eq(b'&') {
+        if lexer.next_ascii_char_eq(b'=') {
            Kind::Amp2Eq
        } else {
            Kind::Amp2
        }
-    } else if lexer.next_eq('=') {
+    } else if lexer.next_ascii_char_eq(b'=') {
        Kind::AmpEq
    } else {
        Kind::Amp
@ -290,13 +290,13 @@ ascii_byte_handler!(PNC(lexer) {
 // *
 ascii_byte_handler!(ATR(lexer) {
    lexer.consume_char();
-    if lexer.next_eq('*') {
-        if lexer.next_eq('=') {
+    if lexer.next_ascii_char_eq(b'*') {
+        if lexer.next_ascii_char_eq(b'=') {
            Kind::Star2Eq
        } else {
            Kind::Star2
        }
-    } else if lexer.next_eq('=') {
+    } else if lexer.next_ascii_char_eq(b'=') {
        Kind::StarEq
    } else {
        Kind::Star
@ -306,9 +306,9 @@ ascii_byte_handler!(ATR(lexer) {
 // +
 ascii_byte_handler!(PLS(lexer) {
    lexer.consume_char();
-    if lexer.next_eq('+') {
+    if lexer.next_ascii_char_eq(b'+') {
        Kind::Plus2
-    } else if lexer.next_eq('=') {
+    } else if lexer.next_ascii_char_eq(b'=') {
        Kind::PlusEq
    } else {
        Kind::Plus
@ -347,7 +347,7 @@ ascii_byte_handler!(SLH(lexer) {
        }
        _ => {
            // regex is handled separately, see `next_regex`
-            if lexer.next_eq('=') {
+            if lexer.next_ascii_char_eq(b'=') {
                Kind::SlashEq
            } else {
                Kind::Slash
@ -389,13 +389,13 @@ ascii_byte_handler!(LSS(lexer) {
 // =
 ascii_byte_handler!(EQL(lexer) {
    lexer.consume_char();
-    if lexer.next_eq('=') {
-        if lexer.next_eq('=') {
+    if lexer.next_ascii_char_eq(b'=') {
+        if lexer.next_ascii_char_eq(b'=') {
            Kind::Eq3
        } else {
            Kind::Eq2
        }
-    } else if lexer.next_eq('>') {
+    } else if lexer.next_ascii_char_eq(b'>') {
        Kind::Arrow
    } else {
        Kind::Eq
@ -412,8 +412,8 @@ ascii_byte_handler!(GTR(lexer) {
 // ?
 ascii_byte_handler!(QST(lexer) {
    lexer.consume_char();
-    if lexer.next_eq('?') {
-        if lexer.next_eq('=') {
+    if lexer.next_ascii_char_eq(b'?') {
+        if lexer.next_ascii_char_eq(b'=') {
            Kind::Question2Eq
        } else {
            Kind::Question2
@ -457,7 +457,7 @@ ascii_byte_handler!(BTC(lexer) {
 // ^
 ascii_byte_handler!(CRT(lexer) {
    lexer.consume_char();
-    if lexer.next_eq('=') {
+    if lexer.next_ascii_char_eq(b'=') {
        Kind::CaretEq
    } else {
        Kind::Caret
@ -479,13 +479,13 @@ ascii_byte_handler!(BEO(lexer) {
 // |
 ascii_byte_handler!(PIP(lexer) {
    lexer.consume_char();
-    if lexer.next_eq('|') {
-        if lexer.next_eq('=') {
+    if lexer.next_ascii_char_eq(b'|') {
+        if lexer.next_ascii_char_eq(b'=') {
            Kind::Pipe2Eq
        } else {
            Kind::Pipe2
        }
-    } else if lexer.next_eq('=') {
+    } else if lexer.next_ascii_char_eq(b'=') {
        Kind::PipeEq
    } else {
        Kind::Pipe
--- a/crates/oxc_parser/src/lexer/mod.rs
+++ b/crates/oxc_parser/src/lexer/mod.rs
@ -263,14 +263,16 @@ impl<'a> Lexer<'a> {
        self.source.peek_char2()
    }

-    /// Peek the next character, and advance the current position if it matches
-    #[inline]
-    fn next_eq(&mut self, c: char) -> bool {
-        let matched = self.peek() == Some(c);
-        if matched {
-            self.source.next_char().unwrap();
-        }
-        matched
+    /// Peek the next byte, and advance the current position if it matches
+    /// the given ASCII char.
+    #[allow(clippy::inline_always)]
+    #[inline(always)]
+    fn next_ascii_char_eq(&mut self, b: u8) -> bool {
+        // TODO: can be replaced by `std::ascii:Char` once stabilized.
+        // https://github.com/rust-lang/rust/issues/110998
+        assert!(b.is_ascii());
+        // SAFETY: `b` is a valid ASCII char.
+        unsafe { self.source.advance_if_ascii_eq(b) }
    }

    fn current_offset(&self) -> Span {
--- a/crates/oxc_parser/src/lexer/numeric.rs
+++ b/crates/oxc_parser/src/lexer/numeric.rs
@ -29,9 +29,9 @@ impl<'a> Lexer<'a> {

    pub(super) fn decimal_literal_after_first_digit(&mut self) -> Kind {
        self.read_decimal_digits_after_first_digit();
-        if self.next_eq('.') {
+        if self.next_ascii_char_eq(b'.') {
            return self.decimal_literal_after_decimal_point_after_digits();
-        } else if self.next_eq('n') {
+        } else if self.next_ascii_char_eq(b'n') {
            return self.check_after_numeric_literal(Kind::Decimal);
        }

--- a/crates/oxc_parser/src/lexer/punctuation.rs
+++ b/crates/oxc_parser/src/lexer/punctuation.rs
@ -17,13 +17,13 @@ impl<'a> Lexer<'a> {

    /// returns None for `SingleLineHTMLOpenComment` `<!--` in script mode
    pub(super) fn read_left_angle(&mut self) -> Option<Kind> {
-        if self.next_eq('<') {
-            if self.next_eq('=') {
+        if self.next_ascii_char_eq(b'<') {
+            if self.next_ascii_char_eq(b'=') {
                Some(Kind::ShiftLeftEq)
            } else {
                Some(Kind::ShiftLeft)
            }
-        } else if self.next_eq('=') {
+        } else if self.next_ascii_char_eq(b'=') {
            Some(Kind::LtEq)
        } else if self.peek() == Some('!')
            // SingleLineHTMLOpenComment `<!--` in script mode
@ -38,14 +38,17 @@ impl<'a> Lexer<'a> {

    /// returns None for `SingleLineHTMLCloseComment` `-->` in script mode
    pub(super) fn read_minus(&mut self) -> Option<Kind> {
-        if self.next_eq('-') {
+        if self.next_ascii_char_eq(b'-') {
            // SingleLineHTMLCloseComment `-->` in script mode
-            if self.token.is_on_new_line && self.source_type.is_script() && self.next_eq('>') {
+            if self.token.is_on_new_line
+                && self.source_type.is_script()
+                && self.next_ascii_char_eq(b'>')
+            {
                None
            } else {
                Some(Kind::Minus2)
            }
-        } else if self.next_eq('=') {
+        } else if self.next_ascii_char_eq(b'=') {
            Some(Kind::MinusEq)
        } else {
            Some(Kind::Minus)
@ -59,19 +62,19 @@ impl<'a> Lexer<'a> {
    }

    fn read_right_angle(&mut self) -> Kind {
-        if self.next_eq('>') {
-            if self.next_eq('>') {
-                if self.next_eq('=') {
+        if self.next_ascii_char_eq(b'>') {
+            if self.next_ascii_char_eq(b'>') {
+                if self.next_ascii_char_eq(b'=') {
                    Kind::ShiftRight3Eq
                } else {
                    Kind::ShiftRight3
                }
-            } else if self.next_eq('=') {
+            } else if self.next_ascii_char_eq(b'=') {
                Kind::ShiftRightEq
            } else {
                Kind::ShiftRight
            }
-        } else if self.next_eq('=') {
+        } else if self.next_ascii_char_eq(b'=') {
            Kind::GtEq
        } else {
            Kind::RAngle
--- a/crates/oxc_parser/src/lexer/source.rs
+++ b/crates/oxc_parser/src/lexer/source.rs
@ -197,6 +197,24 @@ impl<'a> Source<'a> {
        self.ptr = self.end;
    }

+    /// Advance `Source`'s cursor by one byte if it is equal to the given ASCII value.
+    ///
+    /// # SAFETY
+    ///
+    /// Caller must ensure that `ascii_byte` is a valid ASCII character.
+    #[allow(clippy::inline_always)]
+    #[inline(always)]
+    pub(super) unsafe fn advance_if_ascii_eq(&mut self, ascii_byte: u8) -> bool {
+        debug_assert!(ascii_byte.is_ascii());
+        let matched = self.peek_byte() == Some(ascii_byte);
+        if matched {
+            // SAFETY: next byte exists and is a valid ASCII char (and thus UTF-8
+            // char boundary).
+            self.ptr = unsafe { self.ptr.add(1) };
+        }
+        matched
+    }
+
    /// Get string slice from a `SourcePosition` up to the current position of `Source`.
    pub(super) fn str_from_pos_to_current(&self, pos: SourcePosition) -> &'a str {
        assert!(pos.ptr <= self.ptr);
--- a/crates/oxc_parser/src/lexer/unicode.rs
+++ b/crates/oxc_parser/src/lexer/unicode.rs
@ -141,11 +141,11 @@ impl<'a> Lexer<'a> {
    }

    fn unicode_code_point(&mut self) -> Option<SurrogatePair> {
-        if !self.next_eq('{') {
+        if !self.next_ascii_char_eq(b'{') {
            return None;
        }
        let value = self.code_point()?;
-        if !self.next_eq('}') {
+        if !self.next_ascii_char_eq(b'}') {
            return None;
        }
        Some(SurrogatePair::CodePoint(value))
@ -232,7 +232,7 @@ impl<'a> Lexer<'a> {
                // <CR> <LF>
                LF | LS | PS => {}
                CR => {
-                    self.next_eq(LF);
+                    self.next_ascii_char_eq(b'\n');
                }
                // SingleEscapeCharacter :: one of
                //   ' " \ b f n r t v