diff --git a/crates/oxc_parser/src/lexer/byte_handlers.rs b/crates/oxc_parser/src/lexer/byte_handlers.rs index ef720d115..67beef8ee 100644 --- a/crates/oxc_parser/src/lexer/byte_handlers.rs +++ b/crates/oxc_parser/src/lexer/byte_handlers.rs @@ -203,8 +203,7 @@ ascii_byte_handler!(ISP(lexer) { // '\r' '\n' ascii_byte_handler!(LIN(lexer) { lexer.consume_char(); - lexer.token.is_on_new_line = true; - Kind::Skip + lexer.line_break_handler() }); // ! diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index ac18ad91d..41ed2c6eb 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -25,6 +25,7 @@ mod token; mod trivia_builder; mod typescript; mod unicode; +mod whitespace; use rustc_hash::FxHashMap; use std::collections::VecDeque; diff --git a/crates/oxc_parser/src/lexer/whitespace.rs b/crates/oxc_parser/src/lexer/whitespace.rs new file mode 100644 index 000000000..1f6300341 --- /dev/null +++ b/crates/oxc_parser/src/lexer/whitespace.rs @@ -0,0 +1,28 @@ +use super::{ + search::{byte_search, safe_byte_match_table, SafeByteMatchTable}, + Kind, Lexer, +}; + +static NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE: SafeByteMatchTable = + safe_byte_match_table!(|b| !matches!(b, b' ' | b'\t' | b'\r' | b'\n')); + +impl<'a> Lexer<'a> { + pub(super) fn line_break_handler(&mut self) -> Kind { + self.token.is_on_new_line = true; + + // Indentation is common after a line break. + // Consume it, along with any further line breaks. + // Irregular line breaks and whitespace are not consumed. + // They're uncommon, so leave them for the next call to `handle_byte` to take care of. + byte_search! { + lexer: self, + table: NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE, + handle_match: |_next_byte, _start| { + Kind::Skip + }, + handle_eof: |_start| { + Kind::Skip + }, + }; + } +}