From 8376f15b9a377b557af6796e9870bbb8663f61a1 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Fri, 9 Feb 2024 04:02:51 +0000 Subject: [PATCH] perf(parser): eat whitespace after line break (#2353) Uses the `byte_search!` macro introduced in #2352 to consume whitespace after a line break. --- crates/oxc_parser/src/lexer/byte_handlers.rs | 3 +-- crates/oxc_parser/src/lexer/mod.rs | 1 + crates/oxc_parser/src/lexer/whitespace.rs | 28 ++++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 crates/oxc_parser/src/lexer/whitespace.rs diff --git a/crates/oxc_parser/src/lexer/byte_handlers.rs b/crates/oxc_parser/src/lexer/byte_handlers.rs index ef720d115..67beef8ee 100644 --- a/crates/oxc_parser/src/lexer/byte_handlers.rs +++ b/crates/oxc_parser/src/lexer/byte_handlers.rs @@ -203,8 +203,7 @@ ascii_byte_handler!(ISP(lexer) { // '\r' '\n' ascii_byte_handler!(LIN(lexer) { lexer.consume_char(); - lexer.token.is_on_new_line = true; - Kind::Skip + lexer.line_break_handler() }); // ! diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index ac18ad91d..41ed2c6eb 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -25,6 +25,7 @@ mod token; mod trivia_builder; mod typescript; mod unicode; +mod whitespace; use rustc_hash::FxHashMap; use std::collections::VecDeque; diff --git a/crates/oxc_parser/src/lexer/whitespace.rs b/crates/oxc_parser/src/lexer/whitespace.rs new file mode 100644 index 000000000..1f6300341 --- /dev/null +++ b/crates/oxc_parser/src/lexer/whitespace.rs @@ -0,0 +1,28 @@ +use super::{ + search::{byte_search, safe_byte_match_table, SafeByteMatchTable}, + Kind, Lexer, +}; + +static NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE: SafeByteMatchTable = + safe_byte_match_table!(|b| !matches!(b, b' ' | b'\t' | b'\r' | b'\n')); + +impl<'a> Lexer<'a> { + pub(super) fn line_break_handler(&mut self) -> Kind { + self.token.is_on_new_line = true; + + // Indentation is common after a line break. + // Consume it, along with any further line breaks. + // Irregular line breaks and whitespace are not consumed. + // They're uncommon, so leave them for the next call to `handle_byte` to take care of. + byte_search! { + lexer: self, + table: NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE, + handle_match: |_next_byte, _start| { + Kind::Skip + }, + handle_eof: |_start| { + Kind::Skip + }, + }; + } +}