From 0e326186647379bac460725ea52261ab750bb7a9 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Thu, 18 Jan 2024 13:14:12 +0000 Subject: [PATCH] refactor(parser): combine token kinds for skipped tokens (#2072) Small optimization to the lexer. Whitespace, line breaks, and comments are all skipped by `read_next_token()`. At present there's a different `Kind` for each, and `read_next_token()` decides whether to skip with `matches!(kind, Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment)`. These `Kind`s are used for no other purpose, so there seems little reason to differentiate them. This PR combines them all into `Kind::Skip`, so then the test of whether to skip is reduced to `kind == Kind::Skip`. Only produces ~0.3% performance bump on parser benchmarks. But, why not?... --- crates/oxc_parser/src/lexer/kind.rs | 10 ++-------- crates/oxc_parser/src/lexer/mod.rs | 20 ++++++++------------ 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/crates/oxc_parser/src/lexer/kind.rs b/crates/oxc_parser/src/lexer/kind.rs index 2d623e976..ed8861a04 100644 --- a/crates/oxc_parser/src/lexer/kind.rs +++ b/crates/oxc_parser/src/lexer/kind.rs @@ -8,10 +8,7 @@ pub enum Kind { Undetermined, #[default] Eof, - WhiteSpace, - NewLine, - Comment, - MultiLineComment, + Skip, // Whitespace, line breaks, comments // 12.5 Hashbang Comments HashbangComment, // 12.7.1 identifier @@ -482,11 +479,8 @@ impl Kind { match self { Undetermined => "Unknown", Eof => "EOF", - NewLine => "\n", - Comment => "//", - MultiLineComment => "/** */", + Skip => "Skipped", HashbangComment => "#!", - WhiteSpace => " ", Ident => "Identifier", Await => "await", Break => "break", diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index c3d698eda..ea661af88 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -383,11 +383,7 @@ impl<'a> Lexer<'a> { // SAFETY: Check for `remaining.is_empty()` ensures not at end of file, // and `byte` is the byte at current position of `self.current.chars`. let kind = unsafe { handle_byte(byte, self) }; - - if !matches!( - kind, - Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment - ) { + if kind != Kind::Skip { return kind; } } @@ -407,12 +403,12 @@ impl<'a> Lexer<'a> { self.trivia_builder .add_irregular_whitespace(self.current.token.start, self.offset()); self.consume_char(); - Kind::WhiteSpace + Kind::Skip } c if is_irregular_line_terminator(c) => { self.consume_char(); self.current.token.is_on_new_line = true; - Kind::NewLine + Kind::Skip } _ => { self.consume_char(); @@ -431,12 +427,12 @@ impl<'a> Lexer<'a> { self.current.token.is_on_new_line = true; self.trivia_builder .add_single_line_comment(start, self.offset() - c.len_utf8() as u32); - return Kind::Comment; + return Kind::Skip; } } // EOF self.trivia_builder.add_single_line_comment(start, self.offset()); - Kind::Comment + Kind::Skip } /// Section 12.4 Multi Line Comment @@ -444,7 +440,7 @@ impl<'a> Lexer<'a> { while let Some(c) = self.current.chars.next() { if c == '*' && self.next_eq('/') { self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset()); - return Kind::MultiLineComment; + return Kind::Skip; } if is_line_terminator(c) { self.current.token.is_on_new_line = true; @@ -1393,14 +1389,14 @@ ascii_byte_handler!(ERR(lexer) { // ascii_byte_handler!(SPS(lexer) { lexer.consume_char(); - Kind::WhiteSpace + Kind::Skip }); // '\r' '\n' ascii_byte_handler!(LIN(lexer) { lexer.consume_char(); lexer.current.token.is_on_new_line = true; - Kind::NewLine + Kind::Skip }); // !