refactor(parser): combine token kinds for skipped tokens (#2072)

Small optimization to the lexer. Whitespace, line breaks, and comments are all skipped by `read_next_token()`. At present there's a different `Kind` for each, and `read_next_token()` decides whether to skip with `matches!(kind, Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment)`. These `Kind`s are used for no other purpose, so there seems little reason to differentiate them. This PR combines them all into `Kind::Skip`, so then the test of whether to skip is reduced to `kind == Kind::Skip`. Only produces ~0.3% performance bump on parser benchmarks. But, why not?...
2026-05-24 20:32:10 +00:00 · 2024-01-18 13:14:12 +00:00 · 2024-01-18 13:14:12 +00:00 · 0e32618664
commit 0e32618664
parent d7ecd21801
2 changed files with 10 additions and 20 deletions
--- a/crates/oxc_parser/src/lexer/kind.rs
+++ b/crates/oxc_parser/src/lexer/kind.rs
@ -8,10 +8,7 @@ pub enum Kind {
    Undetermined,
    #[default]
    Eof,
-    WhiteSpace,
+    Skip, // Whitespace, line breaks, comments
    NewLine,
    Comment,
    MultiLineComment,
    // 12.5 Hashbang Comments
    HashbangComment,
    // 12.7.1 identifier
@ -482,11 +479,8 @@ impl Kind {
        match self {
            Undetermined => "Unknown",
            Eof => "EOF",
-            NewLine => "\n",
+            Skip => "Skipped",
            Comment => "//",
            MultiLineComment => "/** */",
            HashbangComment => "#!",
            WhiteSpace => " ",
            Ident => "Identifier",
            Await => "await",
            Break => "break",
--- a/crates/oxc_parser/src/lexer/mod.rs
+++ b/crates/oxc_parser/src/lexer/mod.rs
@ -383,11 +383,7 @@ impl<'a> Lexer<'a> {
            // SAFETY: Check for `remaining.is_empty()` ensures not at end of file,
            // and `byte` is the byte at current position of `self.current.chars`.
            let kind = unsafe { handle_byte(byte, self) };
-
+            if kind != Kind::Skip {
            if !matches!(
                kind,
                Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
            ) {
                return kind;
            }
        }
@ -407,12 +403,12 @@ impl<'a> Lexer<'a> {
                self.trivia_builder
                    .add_irregular_whitespace(self.current.token.start, self.offset());
                self.consume_char();
-                Kind::WhiteSpace
+                Kind::Skip
            }
            c if is_irregular_line_terminator(c) => {
                self.consume_char();
                self.current.token.is_on_new_line = true;
-                Kind::NewLine
+                Kind::Skip
            }
            _ => {
                self.consume_char();
@ -431,12 +427,12 @@ impl<'a> Lexer<'a> {
                self.current.token.is_on_new_line = true;
                self.trivia_builder
                    .add_single_line_comment(start, self.offset() - c.len_utf8() as u32);
-                return Kind::Comment;
+                return Kind::Skip;
            }
        }
        // EOF
        self.trivia_builder.add_single_line_comment(start, self.offset());
-        Kind::Comment
+        Kind::Skip
    }
    /// Section 12.4 Multi Line Comment
@ -444,7 +440,7 @@ impl<'a> Lexer<'a> {
        while let Some(c) = self.current.chars.next() {
            if c == '*' && self.next_eq('/') {
                self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
-                return Kind::MultiLineComment;
+                return Kind::Skip;
            }
            if is_line_terminator(c) {
                self.current.token.is_on_new_line = true;
@ -1393,14 +1389,14 @@ ascii_byte_handler!(ERR(lexer) {
 // <SPACE> <TAB> <VT> <FF>
 ascii_byte_handler!(SPS(lexer) {
    lexer.consume_char();
-    Kind::WhiteSpace
+    Kind::Skip
 });
 // '\r' '\n'
 ascii_byte_handler!(LIN(lexer) {
    lexer.consume_char();
    lexer.current.token.is_on_new_line = true;
-    Kind::NewLine
+    Kind::Skip
 });
 // !