From 0e326186647379bac460725ea52261ab750bb7a9 Mon Sep 17 00:00:00 2001
From: overlookmotel <theoverlookmotel@gmail.com>
Date: Thu, 18 Jan 2024 13:14:12 +0000
Subject: [PATCH] refactor(parser): combine token kinds for skipped tokens
 (#2072)

Small optimization to the lexer.

Whitespace, line breaks, and comments are all skipped by
`read_next_token()`.

At present there's a different `Kind` for each, and `read_next_token()`
decides whether to skip with `matches!(kind, Kind::WhiteSpace |
Kind::NewLine | Kind::Comment | Kind::MultiLineComment)`.

These `Kind`s are used for no other purpose, so there seems little
reason to differentiate them.

This PR combines them all into `Kind::Skip`, so then the test of whether
to skip is reduced to `kind == Kind::Skip`.

Only produces ~0.3% performance bump on parser benchmarks. But, why
not?...
---
 crates/oxc_parser/src/lexer/kind.rs | 10 ++--------
 crates/oxc_parser/src/lexer/mod.rs  | 20 ++++++++------------
 2 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/crates/oxc_parser/src/lexer/kind.rs b/crates/oxc_parser/src/lexer/kind.rs
index 2d623e976..ed8861a04 100644
--- a/crates/oxc_parser/src/lexer/kind.rs
+++ b/crates/oxc_parser/src/lexer/kind.rs
@@ -8,10 +8,7 @@ pub enum Kind {
     Undetermined,
     #[default]
     Eof,
-    WhiteSpace,
-    NewLine,
-    Comment,
-    MultiLineComment,
+    Skip, // Whitespace, line breaks, comments
     // 12.5 Hashbang Comments
     HashbangComment,
     // 12.7.1 identifier
@@ -482,11 +479,8 @@ impl Kind {
         match self {
             Undetermined => "Unknown",
             Eof => "EOF",
-            NewLine => "\n",
-            Comment => "//",
-            MultiLineComment => "/** */",
+            Skip => "Skipped",
             HashbangComment => "#!",
-            WhiteSpace => " ",
             Ident => "Identifier",
             Await => "await",
             Break => "break",
diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs
index c3d698eda..ea661af88 100644
--- a/crates/oxc_parser/src/lexer/mod.rs
+++ b/crates/oxc_parser/src/lexer/mod.rs
@@ -383,11 +383,7 @@ impl<'a> Lexer<'a> {
             // SAFETY: Check for `remaining.is_empty()` ensures not at end of file,
             // and `byte` is the byte at current position of `self.current.chars`.
             let kind = unsafe { handle_byte(byte, self) };
-
-            if !matches!(
-                kind,
-                Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
-            ) {
+            if kind != Kind::Skip {
                 return kind;
             }
         }
@@ -407,12 +403,12 @@ impl<'a> Lexer<'a> {
                 self.trivia_builder
                     .add_irregular_whitespace(self.current.token.start, self.offset());
                 self.consume_char();
-                Kind::WhiteSpace
+                Kind::Skip
             }
             c if is_irregular_line_terminator(c) => {
                 self.consume_char();
                 self.current.token.is_on_new_line = true;
-                Kind::NewLine
+                Kind::Skip
             }
             _ => {
                 self.consume_char();
@@ -431,12 +427,12 @@ impl<'a> Lexer<'a> {
                 self.current.token.is_on_new_line = true;
                 self.trivia_builder
                     .add_single_line_comment(start, self.offset() - c.len_utf8() as u32);
-                return Kind::Comment;
+                return Kind::Skip;
             }
         }
         // EOF
         self.trivia_builder.add_single_line_comment(start, self.offset());
-        Kind::Comment
+        Kind::Skip
     }
 
     /// Section 12.4 Multi Line Comment
@@ -444,7 +440,7 @@ impl<'a> Lexer<'a> {
         while let Some(c) = self.current.chars.next() {
             if c == '*' && self.next_eq('/') {
                 self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
-                return Kind::MultiLineComment;
+                return Kind::Skip;
             }
             if is_line_terminator(c) {
                 self.current.token.is_on_new_line = true;
@@ -1393,14 +1389,14 @@ ascii_byte_handler!(ERR(lexer) {
 // <SPACE> <TAB> <VT> <FF>
 ascii_byte_handler!(SPS(lexer) {
     lexer.consume_char();
-    Kind::WhiteSpace
+    Kind::Skip
 });
 
 // '\r' '\n'
 ascii_byte_handler!(LIN(lexer) {
     lexer.consume_char();
     lexer.current.token.is_on_new_line = true;
-    Kind::NewLine
+    Kind::Skip
 });
 
 // !