From 622a2c37fa565b9f55b9af355de25f9cb42a2806 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Wed, 31 Jan 2024 13:35:46 +0000 Subject: [PATCH] refactor(lexer): don't use `lexer.current.chars` directly (#2237) This PR replaces most usages of `lexer.current.chars.next()` with `lexer.consume_char()`, or a new function `lexer.next_char()`. This is a preparatory step towards replacing the `Chars` iterator with something more flexible which can also consume bytes (not `char`s), and this PR was intended as pure refactor. But surprised to see there is a small performance bump (no idea why!). There's an additional benefit: Using `consume_char()` everywhere where we believe there's definitely a char there to be consumed will make logic errors produce a panic, rather than silently outputting garbage. --- crates/oxc_parser/src/lexer/byte_handlers.rs | 6 +-- crates/oxc_parser/src/lexer/comment.rs | 6 +-- crates/oxc_parser/src/lexer/identifier.rs | 6 +-- crates/oxc_parser/src/lexer/jsx.rs | 12 ++--- crates/oxc_parser/src/lexer/mod.rs | 6 +++ crates/oxc_parser/src/lexer/numeric.rs | 46 +++++++++---------- crates/oxc_parser/src/lexer/punctuation.rs | 4 +- crates/oxc_parser/src/lexer/regex.rs | 4 +- crates/oxc_parser/src/lexer/string.rs | 2 +- crates/oxc_parser/src/lexer/string_builder.rs | 4 +- crates/oxc_parser/src/lexer/template.rs | 4 +- crates/oxc_parser/src/lexer/unicode.rs | 14 +++--- 12 files changed, 60 insertions(+), 54 deletions(-) diff --git a/crates/oxc_parser/src/lexer/byte_handlers.rs b/crates/oxc_parser/src/lexer/byte_handlers.rs index 4c2926ad1..b486e10e0 100644 --- a/crates/oxc_parser/src/lexer/byte_handlers.rs +++ b/crates/oxc_parser/src/lexer/byte_handlers.rs @@ -242,11 +242,11 @@ ascii_byte_handler!(SLH(lexer) { lexer.consume_char(); match lexer.peek() { Some('/') => { - lexer.current.chars.next(); + lexer.consume_char(); lexer.skip_single_line_comment() } Some('*') => { - lexer.current.chars.next(); + lexer.consume_char(); lexer.skip_multi_line_comment() } _ => { @@ -327,7 +327,7 @@ ascii_byte_handler!(QST(lexer) { if lexer.peek2().is_some_and(|c| c.is_ascii_digit()) { Kind::Question } else { - lexer.current.chars.next(); + lexer.consume_char(); Kind::QuestionDot } } else { diff --git a/crates/oxc_parser/src/lexer/comment.rs b/crates/oxc_parser/src/lexer/comment.rs index f195796ba..5ac3ef5f0 100644 --- a/crates/oxc_parser/src/lexer/comment.rs +++ b/crates/oxc_parser/src/lexer/comment.rs @@ -8,7 +8,7 @@ impl<'a> Lexer<'a> { #[allow(clippy::cast_possible_truncation)] pub(super) fn skip_single_line_comment(&mut self) -> Kind { let start = self.current.token.start; - while let Some(c) = self.current.chars.next() { + while let Some(c) = self.next_char() { if is_line_terminator(c) { self.current.token.is_on_new_line = true; self.trivia_builder @@ -23,7 +23,7 @@ impl<'a> Lexer<'a> { /// Section 12.4 Multi Line Comment pub(super) fn skip_multi_line_comment(&mut self) -> Kind { - while let Some(c) = self.current.chars.next() { + while let Some(c) = self.next_char() { if c == '*' && self.next_eq('/') { self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset()); return Kind::Skip; @@ -38,7 +38,7 @@ impl<'a> Lexer<'a> { /// Section 12.5 Hashbang Comments pub(super) fn read_hashbang_comment(&mut self) -> Kind { - while let Some(c) = self.current.chars.next().as_ref() { + while let Some(c) = self.next_char().as_ref() { if is_line_terminator(*c) { break; } diff --git a/crates/oxc_parser/src/lexer/identifier.rs b/crates/oxc_parser/src/lexer/identifier.rs index 272dd32f8..f28a3d6ec 100644 --- a/crates/oxc_parser/src/lexer/identifier.rs +++ b/crates/oxc_parser/src/lexer/identifier.rs @@ -18,7 +18,7 @@ impl<'a> Lexer<'a> { pub(super) fn private_identifier(&mut self) -> Kind { let mut builder = AutoCow::new(self); let start = self.offset(); - match self.current.chars.next() { + match self.next_char() { Some(c) if is_identifier_start(c) => { builder.push_matching(c); } @@ -48,14 +48,14 @@ impl<'a> Lexer<'a> { while let Some(c) = self.peek() { if !is_identifier_part(c) { if c == '\\' { - self.current.chars.next(); + self.consume_char(); builder.force_allocation_without_current_ascii_char(self); self.identifier_unicode_escape_sequence(&mut builder, false); continue; } break; } - self.current.chars.next(); + self.consume_char(); builder.push_matching(c); } let has_escape = builder.has_escape(); diff --git a/crates/oxc_parser/src/lexer/jsx.rs b/crates/oxc_parser/src/lexer/jsx.rs index 94b4d0e7e..d04275f53 100644 --- a/crates/oxc_parser/src/lexer/jsx.rs +++ b/crates/oxc_parser/src/lexer/jsx.rs @@ -17,7 +17,7 @@ impl<'a> Lexer<'a> { pub(super) fn read_jsx_string_literal(&mut self, delimiter: char) -> Kind { let mut builder = AutoCow::new(self); loop { - match self.current.chars.next() { + match self.next_char() { Some(c @ ('"' | '\'')) => { if c == delimiter { self.save_string(builder.has_escape(), builder.finish_without_push(self)); @@ -58,11 +58,11 @@ impl<'a> Lexer<'a> { fn read_jsx_child(&mut self) -> Kind { match self.peek() { Some('<') => { - self.current.chars.next(); + self.consume_char(); Kind::LAngle } Some('{') => { - self.current.chars.next(); + self.consume_char(); Kind::LCurly } Some(_) => { @@ -74,7 +74,7 @@ impl<'a> Lexer<'a> { if self.peek().is_some_and(|c| c == '{' || c == '<') { break; } - if self.current.chars.next().is_none() { + if self.next_char().is_none() { break; } } @@ -91,10 +91,10 @@ impl<'a> Lexer<'a> { fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind { while let Some(c) = self.peek() { if c == '-' || is_identifier_start(c) { - self.current.chars.next(); + self.consume_char(); while let Some(c) = self.peek() { if is_identifier_part(c) { - self.current.chars.next(); + self.consume_char(); } else { break; } diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 2c14e2d85..3051a6244 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -212,6 +212,12 @@ impl<'a> Lexer<'a> { Span::new(self.current.token.start, self.offset()) } + /// Consume the current char if not at EOF + #[inline] + fn next_char(&mut self) -> Option { + self.current.chars.next() + } + /// Consume the current char #[inline] fn consume_char(&mut self) -> char { diff --git a/crates/oxc_parser/src/lexer/numeric.rs b/crates/oxc_parser/src/lexer/numeric.rs index 8dcc27d05..560bab2fd 100644 --- a/crates/oxc_parser/src/lexer/numeric.rs +++ b/crates/oxc_parser/src/lexer/numeric.rs @@ -11,15 +11,15 @@ impl<'a> Lexer<'a> { Some('o' | 'O') => self.read_non_decimal(Kind::Octal), Some('x' | 'X') => self.read_non_decimal(Kind::Hex), Some('e' | 'E') => { - self.current.chars.next(); + self.consume_char(); self.read_decimal_exponent() } Some('.') => { - self.current.chars.next(); + self.consume_char(); self.decimal_literal_after_decimal_point_after_digits() } Some('n') => { - self.current.chars.next(); + self.consume_char(); self.check_after_numeric_literal(Kind::Decimal) } Some(n) if n.is_ascii_digit() => self.read_legacy_octal(), @@ -40,10 +40,10 @@ impl<'a> Lexer<'a> { } fn read_non_decimal(&mut self, kind: Kind) -> Kind { - self.current.chars.next(); + self.consume_char(); if self.peek().is_some_and(|c| kind.matches_number_char(c)) { - self.current.chars.next(); + self.consume_char(); } else { self.unexpected_err(); return Kind::Undetermined; @@ -52,22 +52,22 @@ impl<'a> Lexer<'a> { while let Some(c) = self.peek() { match c { '_' => { - self.current.chars.next(); + self.consume_char(); if self.peek().is_some_and(|c| kind.matches_number_char(c)) { - self.current.chars.next(); + self.consume_char(); } else { self.unexpected_err(); return Kind::Undetermined; } } c if kind.matches_number_char(c) => { - self.current.chars.next(); + self.consume_char(); } _ => break, } } if self.peek() == Some('n') { - self.current.chars.next(); + self.consume_char(); } self.check_after_numeric_literal(kind) } @@ -77,10 +77,10 @@ impl<'a> Lexer<'a> { loop { match self.peek() { Some('0'..='7') => { - self.current.chars.next(); + self.consume_char(); } Some('8'..='9') => { - self.current.chars.next(); + self.consume_char(); kind = Kind::Decimal; } _ => break, @@ -90,12 +90,12 @@ impl<'a> Lexer<'a> { match self.peek() { // allow 08.5 and 09.5 Some('.') if kind == Kind::Decimal => { - self.current.chars.next(); + self.consume_char(); self.decimal_literal_after_decimal_point_after_digits() } // allow 08e1 and 09e1 Some('e') if kind == Kind::Decimal => { - self.current.chars.next(); + self.consume_char(); self.read_decimal_exponent() } _ => self.check_after_numeric_literal(kind), @@ -105,11 +105,11 @@ impl<'a> Lexer<'a> { fn read_decimal_exponent(&mut self) -> Kind { let kind = match self.peek() { Some('-') => { - self.current.chars.next(); + self.consume_char(); Kind::NegativeExponential } Some('+') => { - self.current.chars.next(); + self.consume_char(); Kind::PositiveExponential } _ => Kind::PositiveExponential, @@ -120,7 +120,7 @@ impl<'a> Lexer<'a> { fn read_decimal_digits(&mut self) { if self.peek().is_some_and(|c| c.is_ascii_digit()) { - self.current.chars.next(); + self.consume_char(); } else { self.unexpected_err(); return; @@ -133,16 +133,16 @@ impl<'a> Lexer<'a> { while let Some(c) = self.peek() { match c { '_' => { - self.current.chars.next(); + self.consume_char(); if self.peek().is_some_and(|c| c.is_ascii_digit()) { - self.current.chars.next(); + self.consume_char(); } else { self.unexpected_err(); return; } } '0'..='9' => { - self.current.chars.next(); + self.consume_char(); } _ => break, } @@ -163,7 +163,7 @@ impl<'a> Lexer<'a> { fn optional_decimal_digits(&mut self) { if self.peek().is_some_and(|c| c.is_ascii_digit()) { - self.current.chars.next(); + self.consume_char(); } else { return; } @@ -172,7 +172,7 @@ impl<'a> Lexer<'a> { fn optional_exponent(&mut self) -> Option { if matches!(self.peek(), Some('e' | 'E')) { - self.current.chars.next(); + self.consume_char(); return Some(self.read_decimal_exponent()); } None @@ -185,10 +185,10 @@ impl<'a> Lexer<'a> { if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start(ch)) { return kind; } - self.current.chars.next(); + self.consume_char(); while let Some(c) = self.peek() { if is_identifier_start(c) { - self.current.chars.next(); + self.consume_char(); } else { break; } diff --git a/crates/oxc_parser/src/lexer/punctuation.rs b/crates/oxc_parser/src/lexer/punctuation.rs index e119a45b5..067f41d35 100644 --- a/crates/oxc_parser/src/lexer/punctuation.rs +++ b/crates/oxc_parser/src/lexer/punctuation.rs @@ -4,8 +4,8 @@ impl<'a> Lexer<'a> { /// Section 12.8 Punctuators pub(super) fn read_dot(&mut self) -> Kind { if self.peek() == Some('.') && self.peek2() == Some('.') { - self.current.chars.next(); - self.current.chars.next(); + self.consume_char(); + self.consume_char(); return Kind::Dot3; } if self.peek().is_some_and(|c| c.is_ascii_digit()) { diff --git a/crates/oxc_parser/src/lexer/regex.rs b/crates/oxc_parser/src/lexer/regex.rs index 084e1175d..96159296a 100644 --- a/crates/oxc_parser/src/lexer/regex.rs +++ b/crates/oxc_parser/src/lexer/regex.rs @@ -28,7 +28,7 @@ impl<'a> Lexer<'a> { let mut in_escape = false; let mut in_character_class = false; loop { - match self.current.chars.next() { + match self.next_char() { None => { self.error(diagnostics::UnterminatedRegExp(self.unterminated_range())); return (self.offset(), RegExpFlags::empty()); @@ -59,7 +59,7 @@ impl<'a> Lexer<'a> { let mut flags = RegExpFlags::empty(); while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() { - self.current.chars.next(); + self.consume_char(); let flag = if let Ok(flag) = RegExpFlags::try_from(ch) { flag } else { diff --git a/crates/oxc_parser/src/lexer/string.rs b/crates/oxc_parser/src/lexer/string.rs index f2f0c14b0..5fd5e2132 100644 --- a/crates/oxc_parser/src/lexer/string.rs +++ b/crates/oxc_parser/src/lexer/string.rs @@ -6,7 +6,7 @@ impl<'a> Lexer<'a> { pub(super) fn read_string_literal(&mut self, delimiter: char) -> Kind { let mut builder = AutoCow::new(self); loop { - match self.current.chars.next() { + match self.next_char() { None | Some('\r' | '\n') => { self.error(diagnostics::UnterminatedString(self.unterminated_range())); return Kind::Undetermined; diff --git a/crates/oxc_parser/src/lexer/string_builder.rs b/crates/oxc_parser/src/lexer/string_builder.rs index 8f648e3ed..eee31a9d9 100644 --- a/crates/oxc_parser/src/lexer/string_builder.rs +++ b/crates/oxc_parser/src/lexer/string_builder.rs @@ -15,14 +15,14 @@ impl<'a> AutoCow<'a> { AutoCow { start, value: None } } - // Push a char that matches lexer.chars().next() + // Push a char that matches lexer.current.chars().next() pub fn push_matching(&mut self, c: char) { if let Some(text) = &mut self.value { text.push(c); } } - // Push a different character than lexer.chars().next(). + // Push a different character than lexer.current.chars().next(). // force_allocation_without_current_ascii_char must be called before this. pub fn push_different(&mut self, c: char) { debug_assert!(self.value.is_some()); diff --git a/crates/oxc_parser/src/lexer/template.rs b/crates/oxc_parser/src/lexer/template.rs index 661bfda4f..812cd8622 100644 --- a/crates/oxc_parser/src/lexer/template.rs +++ b/crates/oxc_parser/src/lexer/template.rs @@ -8,7 +8,7 @@ impl<'a> Lexer<'a> { pub(super) fn read_template_literal(&mut self, substitute: Kind, tail: Kind) -> Kind { let mut builder = AutoCow::new(self); let mut is_valid_escape_sequence = true; - while let Some(c) = self.current.chars.next() { + while let Some(c) = self.next_char() { match c { '$' if self.peek() == Some('{') => { self.save_template_string( @@ -16,7 +16,7 @@ impl<'a> Lexer<'a> { builder.has_escape(), builder.finish_without_push(self), ); - self.current.chars.next(); + self.consume_char(); return substitute; } '`' => { diff --git a/crates/oxc_parser/src/lexer/unicode.rs b/crates/oxc_parser/src/lexer/unicode.rs index fe8f08f49..0a122b674 100644 --- a/crates/oxc_parser/src/lexer/unicode.rs +++ b/crates/oxc_parser/src/lexer/unicode.rs @@ -18,7 +18,7 @@ enum SurrogatePair { impl<'a> Lexer<'a> { pub(super) fn unicode_char_handler(&mut self) -> Kind { - let c = self.current.chars.clone().next().unwrap(); + let c = self.peek().unwrap(); match c { c if is_identifier_start_unicode(c) => { let mut builder = AutoCow::new(self); @@ -55,7 +55,7 @@ impl<'a> Lexer<'a> { check_identifier_start: bool, ) { let start = self.offset(); - if self.current.chars.next() != Some('u') { + if self.next_char() != Some('u') { let range = Span::new(start, self.offset()); self.error(diagnostics::UnicodeEscapeSequence(range)); return; @@ -167,7 +167,7 @@ impl<'a> Lexer<'a> { Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32), _ => return None, }; - self.current.chars.next(); + self.consume_char(); Some(value) } @@ -196,8 +196,8 @@ impl<'a> Lexer<'a> { return Some(SurrogatePair::CodePoint(high)); } - self.current.chars.next(); - self.current.chars.next(); + self.next_char(); + self.next_char(); let low = self.hex_4_digits()?; @@ -219,7 +219,7 @@ impl<'a> Lexer<'a> { in_template: bool, is_valid_escape_sequence: &mut bool, ) { - match self.current.chars.next() { + match self.next_char() { None => { self.error(diagnostics::UnterminatedString(self.unterminated_range())); } @@ -299,7 +299,7 @@ impl<'a> Lexer<'a> { text.push(value); } '0' if in_template && self.peek().is_some_and(|c| c.is_ascii_digit()) => { - self.current.chars.next(); + self.consume_char(); // error raised within the parser by `diagnostics::TemplateLiteral` *is_valid_escape_sequence = false; }