From 622a2c37fa565b9f55b9af355de25f9cb42a2806 Mon Sep 17 00:00:00 2001
From: overlookmotel <theoverlookmotel@gmail.com>
Date: Wed, 31 Jan 2024 13:35:46 +0000
Subject: [PATCH] refactor(lexer): don't use `lexer.current.chars` directly
 (#2237)

This PR replaces most usages of `lexer.current.chars.next()` with
`lexer.consume_char()`, or a new function `lexer.next_char()`.

This is a preparatory step towards replacing the `Chars` iterator with
something more flexible which can also consume bytes (not `char`s), and
this PR was intended as pure refactor. But surprised to see there is a
small performance bump (no idea why!).

There's an additional benefit: Using `consume_char()` everywhere where
we believe there's definitely a char there to be consumed will make
logic errors produce a panic, rather than silently outputting garbage.
---
 crates/oxc_parser/src/lexer/byte_handlers.rs  |  6 +--
 crates/oxc_parser/src/lexer/comment.rs        |  6 +--
 crates/oxc_parser/src/lexer/identifier.rs     |  6 +--
 crates/oxc_parser/src/lexer/jsx.rs            | 12 ++---
 crates/oxc_parser/src/lexer/mod.rs            |  6 +++
 crates/oxc_parser/src/lexer/numeric.rs        | 46 +++++++++----------
 crates/oxc_parser/src/lexer/punctuation.rs    |  4 +-
 crates/oxc_parser/src/lexer/regex.rs          |  4 +-
 crates/oxc_parser/src/lexer/string.rs         |  2 +-
 crates/oxc_parser/src/lexer/string_builder.rs |  4 +-
 crates/oxc_parser/src/lexer/template.rs       |  4 +-
 crates/oxc_parser/src/lexer/unicode.rs        | 14 +++---
 12 files changed, 60 insertions(+), 54 deletions(-)

diff --git a/crates/oxc_parser/src/lexer/byte_handlers.rs b/crates/oxc_parser/src/lexer/byte_handlers.rs
index 4c2926ad1..b486e10e0 100644
--- a/crates/oxc_parser/src/lexer/byte_handlers.rs
+++ b/crates/oxc_parser/src/lexer/byte_handlers.rs
@@ -242,11 +242,11 @@ ascii_byte_handler!(SLH(lexer) {
     lexer.consume_char();
     match lexer.peek() {
         Some('/') => {
-            lexer.current.chars.next();
+            lexer.consume_char();
             lexer.skip_single_line_comment()
         }
         Some('*') => {
-            lexer.current.chars.next();
+            lexer.consume_char();
             lexer.skip_multi_line_comment()
         }
         _ => {
@@ -327,7 +327,7 @@ ascii_byte_handler!(QST(lexer) {
         if lexer.peek2().is_some_and(|c| c.is_ascii_digit()) {
             Kind::Question
         } else {
-            lexer.current.chars.next();
+            lexer.consume_char();
             Kind::QuestionDot
         }
     } else {
diff --git a/crates/oxc_parser/src/lexer/comment.rs b/crates/oxc_parser/src/lexer/comment.rs
index f195796ba..5ac3ef5f0 100644
--- a/crates/oxc_parser/src/lexer/comment.rs
+++ b/crates/oxc_parser/src/lexer/comment.rs
@@ -8,7 +8,7 @@ impl<'a> Lexer<'a> {
     #[allow(clippy::cast_possible_truncation)]
     pub(super) fn skip_single_line_comment(&mut self) -> Kind {
         let start = self.current.token.start;
-        while let Some(c) = self.current.chars.next() {
+        while let Some(c) = self.next_char() {
             if is_line_terminator(c) {
                 self.current.token.is_on_new_line = true;
                 self.trivia_builder
@@ -23,7 +23,7 @@ impl<'a> Lexer<'a> {
 
     /// Section 12.4 Multi Line Comment
     pub(super) fn skip_multi_line_comment(&mut self) -> Kind {
-        while let Some(c) = self.current.chars.next() {
+        while let Some(c) = self.next_char() {
             if c == '*' && self.next_eq('/') {
                 self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
                 return Kind::Skip;
@@ -38,7 +38,7 @@ impl<'a> Lexer<'a> {
 
     /// Section 12.5 Hashbang Comments
     pub(super) fn read_hashbang_comment(&mut self) -> Kind {
-        while let Some(c) = self.current.chars.next().as_ref() {
+        while let Some(c) = self.next_char().as_ref() {
             if is_line_terminator(*c) {
                 break;
             }
diff --git a/crates/oxc_parser/src/lexer/identifier.rs b/crates/oxc_parser/src/lexer/identifier.rs
index 272dd32f8..f28a3d6ec 100644
--- a/crates/oxc_parser/src/lexer/identifier.rs
+++ b/crates/oxc_parser/src/lexer/identifier.rs
@@ -18,7 +18,7 @@ impl<'a> Lexer<'a> {
     pub(super) fn private_identifier(&mut self) -> Kind {
         let mut builder = AutoCow::new(self);
         let start = self.offset();
-        match self.current.chars.next() {
+        match self.next_char() {
             Some(c) if is_identifier_start(c) => {
                 builder.push_matching(c);
             }
@@ -48,14 +48,14 @@ impl<'a> Lexer<'a> {
         while let Some(c) = self.peek() {
             if !is_identifier_part(c) {
                 if c == '\\' {
-                    self.current.chars.next();
+                    self.consume_char();
                     builder.force_allocation_without_current_ascii_char(self);
                     self.identifier_unicode_escape_sequence(&mut builder, false);
                     continue;
                 }
                 break;
             }
-            self.current.chars.next();
+            self.consume_char();
             builder.push_matching(c);
         }
         let has_escape = builder.has_escape();
diff --git a/crates/oxc_parser/src/lexer/jsx.rs b/crates/oxc_parser/src/lexer/jsx.rs
index 94b4d0e7e..d04275f53 100644
--- a/crates/oxc_parser/src/lexer/jsx.rs
+++ b/crates/oxc_parser/src/lexer/jsx.rs
@@ -17,7 +17,7 @@ impl<'a> Lexer<'a> {
     pub(super) fn read_jsx_string_literal(&mut self, delimiter: char) -> Kind {
         let mut builder = AutoCow::new(self);
         loop {
-            match self.current.chars.next() {
+            match self.next_char() {
                 Some(c @ ('"' | '\'')) => {
                     if c == delimiter {
                         self.save_string(builder.has_escape(), builder.finish_without_push(self));
@@ -58,11 +58,11 @@ impl<'a> Lexer<'a> {
     fn read_jsx_child(&mut self) -> Kind {
         match self.peek() {
             Some('<') => {
-                self.current.chars.next();
+                self.consume_char();
                 Kind::LAngle
             }
             Some('{') => {
-                self.current.chars.next();
+                self.consume_char();
                 Kind::LCurly
             }
             Some(_) => {
@@ -74,7 +74,7 @@ impl<'a> Lexer<'a> {
                     if self.peek().is_some_and(|c| c == '{' || c == '<') {
                         break;
                     }
-                    if self.current.chars.next().is_none() {
+                    if self.next_char().is_none() {
                         break;
                     }
                 }
@@ -91,10 +91,10 @@ impl<'a> Lexer<'a> {
     fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind {
         while let Some(c) = self.peek() {
             if c == '-' || is_identifier_start(c) {
-                self.current.chars.next();
+                self.consume_char();
                 while let Some(c) = self.peek() {
                     if is_identifier_part(c) {
-                        self.current.chars.next();
+                        self.consume_char();
                     } else {
                         break;
                     }
diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs
index 2c14e2d85..3051a6244 100644
--- a/crates/oxc_parser/src/lexer/mod.rs
+++ b/crates/oxc_parser/src/lexer/mod.rs
@@ -212,6 +212,12 @@ impl<'a> Lexer<'a> {
         Span::new(self.current.token.start, self.offset())
     }
 
+    /// Consume the current char if not at EOF
+    #[inline]
+    fn next_char(&mut self) -> Option<char> {
+        self.current.chars.next()
+    }
+
     /// Consume the current char
     #[inline]
     fn consume_char(&mut self) -> char {
diff --git a/crates/oxc_parser/src/lexer/numeric.rs b/crates/oxc_parser/src/lexer/numeric.rs
index 8dcc27d05..560bab2fd 100644
--- a/crates/oxc_parser/src/lexer/numeric.rs
+++ b/crates/oxc_parser/src/lexer/numeric.rs
@@ -11,15 +11,15 @@ impl<'a> Lexer<'a> {
             Some('o' | 'O') => self.read_non_decimal(Kind::Octal),
             Some('x' | 'X') => self.read_non_decimal(Kind::Hex),
             Some('e' | 'E') => {
-                self.current.chars.next();
+                self.consume_char();
                 self.read_decimal_exponent()
             }
             Some('.') => {
-                self.current.chars.next();
+                self.consume_char();
                 self.decimal_literal_after_decimal_point_after_digits()
             }
             Some('n') => {
-                self.current.chars.next();
+                self.consume_char();
                 self.check_after_numeric_literal(Kind::Decimal)
             }
             Some(n) if n.is_ascii_digit() => self.read_legacy_octal(),
@@ -40,10 +40,10 @@ impl<'a> Lexer<'a> {
     }
 
     fn read_non_decimal(&mut self, kind: Kind) -> Kind {
-        self.current.chars.next();
+        self.consume_char();
 
         if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
-            self.current.chars.next();
+            self.consume_char();
         } else {
             self.unexpected_err();
             return Kind::Undetermined;
@@ -52,22 +52,22 @@ impl<'a> Lexer<'a> {
         while let Some(c) = self.peek() {
             match c {
                 '_' => {
-                    self.current.chars.next();
+                    self.consume_char();
                     if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
-                        self.current.chars.next();
+                        self.consume_char();
                     } else {
                         self.unexpected_err();
                         return Kind::Undetermined;
                     }
                 }
                 c if kind.matches_number_char(c) => {
-                    self.current.chars.next();
+                    self.consume_char();
                 }
                 _ => break,
             }
         }
         if self.peek() == Some('n') {
-            self.current.chars.next();
+            self.consume_char();
         }
         self.check_after_numeric_literal(kind)
     }
@@ -77,10 +77,10 @@ impl<'a> Lexer<'a> {
         loop {
             match self.peek() {
                 Some('0'..='7') => {
-                    self.current.chars.next();
+                    self.consume_char();
                 }
                 Some('8'..='9') => {
-                    self.current.chars.next();
+                    self.consume_char();
                     kind = Kind::Decimal;
                 }
                 _ => break,
@@ -90,12 +90,12 @@ impl<'a> Lexer<'a> {
         match self.peek() {
             // allow 08.5 and 09.5
             Some('.') if kind == Kind::Decimal => {
-                self.current.chars.next();
+                self.consume_char();
                 self.decimal_literal_after_decimal_point_after_digits()
             }
             // allow 08e1 and 09e1
             Some('e') if kind == Kind::Decimal => {
-                self.current.chars.next();
+                self.consume_char();
                 self.read_decimal_exponent()
             }
             _ => self.check_after_numeric_literal(kind),
@@ -105,11 +105,11 @@ impl<'a> Lexer<'a> {
     fn read_decimal_exponent(&mut self) -> Kind {
         let kind = match self.peek() {
             Some('-') => {
-                self.current.chars.next();
+                self.consume_char();
                 Kind::NegativeExponential
             }
             Some('+') => {
-                self.current.chars.next();
+                self.consume_char();
                 Kind::PositiveExponential
             }
             _ => Kind::PositiveExponential,
@@ -120,7 +120,7 @@ impl<'a> Lexer<'a> {
 
     fn read_decimal_digits(&mut self) {
         if self.peek().is_some_and(|c| c.is_ascii_digit()) {
-            self.current.chars.next();
+            self.consume_char();
         } else {
             self.unexpected_err();
             return;
@@ -133,16 +133,16 @@ impl<'a> Lexer<'a> {
         while let Some(c) = self.peek() {
             match c {
                 '_' => {
-                    self.current.chars.next();
+                    self.consume_char();
                     if self.peek().is_some_and(|c| c.is_ascii_digit()) {
-                        self.current.chars.next();
+                        self.consume_char();
                     } else {
                         self.unexpected_err();
                         return;
                     }
                 }
                 '0'..='9' => {
-                    self.current.chars.next();
+                    self.consume_char();
                 }
                 _ => break,
             }
@@ -163,7 +163,7 @@ impl<'a> Lexer<'a> {
 
     fn optional_decimal_digits(&mut self) {
         if self.peek().is_some_and(|c| c.is_ascii_digit()) {
-            self.current.chars.next();
+            self.consume_char();
         } else {
             return;
         }
@@ -172,7 +172,7 @@ impl<'a> Lexer<'a> {
 
     fn optional_exponent(&mut self) -> Option<Kind> {
         if matches!(self.peek(), Some('e' | 'E')) {
-            self.current.chars.next();
+            self.consume_char();
             return Some(self.read_decimal_exponent());
         }
         None
@@ -185,10 +185,10 @@ impl<'a> Lexer<'a> {
         if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start(ch)) {
             return kind;
         }
-        self.current.chars.next();
+        self.consume_char();
         while let Some(c) = self.peek() {
             if is_identifier_start(c) {
-                self.current.chars.next();
+                self.consume_char();
             } else {
                 break;
             }
diff --git a/crates/oxc_parser/src/lexer/punctuation.rs b/crates/oxc_parser/src/lexer/punctuation.rs
index e119a45b5..067f41d35 100644
--- a/crates/oxc_parser/src/lexer/punctuation.rs
+++ b/crates/oxc_parser/src/lexer/punctuation.rs
@@ -4,8 +4,8 @@ impl<'a> Lexer<'a> {
     /// Section 12.8 Punctuators
     pub(super) fn read_dot(&mut self) -> Kind {
         if self.peek() == Some('.') && self.peek2() == Some('.') {
-            self.current.chars.next();
-            self.current.chars.next();
+            self.consume_char();
+            self.consume_char();
             return Kind::Dot3;
         }
         if self.peek().is_some_and(|c| c.is_ascii_digit()) {
diff --git a/crates/oxc_parser/src/lexer/regex.rs b/crates/oxc_parser/src/lexer/regex.rs
index 084e1175d..96159296a 100644
--- a/crates/oxc_parser/src/lexer/regex.rs
+++ b/crates/oxc_parser/src/lexer/regex.rs
@@ -28,7 +28,7 @@ impl<'a> Lexer<'a> {
         let mut in_escape = false;
         let mut in_character_class = false;
         loop {
-            match self.current.chars.next() {
+            match self.next_char() {
                 None => {
                     self.error(diagnostics::UnterminatedRegExp(self.unterminated_range()));
                     return (self.offset(), RegExpFlags::empty());
@@ -59,7 +59,7 @@ impl<'a> Lexer<'a> {
         let mut flags = RegExpFlags::empty();
 
         while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
-            self.current.chars.next();
+            self.consume_char();
             let flag = if let Ok(flag) = RegExpFlags::try_from(ch) {
                 flag
             } else {
diff --git a/crates/oxc_parser/src/lexer/string.rs b/crates/oxc_parser/src/lexer/string.rs
index f2f0c14b0..5fd5e2132 100644
--- a/crates/oxc_parser/src/lexer/string.rs
+++ b/crates/oxc_parser/src/lexer/string.rs
@@ -6,7 +6,7 @@ impl<'a> Lexer<'a> {
     pub(super) fn read_string_literal(&mut self, delimiter: char) -> Kind {
         let mut builder = AutoCow::new(self);
         loop {
-            match self.current.chars.next() {
+            match self.next_char() {
                 None | Some('\r' | '\n') => {
                     self.error(diagnostics::UnterminatedString(self.unterminated_range()));
                     return Kind::Undetermined;
diff --git a/crates/oxc_parser/src/lexer/string_builder.rs b/crates/oxc_parser/src/lexer/string_builder.rs
index 8f648e3ed..eee31a9d9 100644
--- a/crates/oxc_parser/src/lexer/string_builder.rs
+++ b/crates/oxc_parser/src/lexer/string_builder.rs
@@ -15,14 +15,14 @@ impl<'a> AutoCow<'a> {
         AutoCow { start, value: None }
     }
 
-    // Push a char that matches lexer.chars().next()
+    // Push a char that matches lexer.current.chars().next()
     pub fn push_matching(&mut self, c: char) {
         if let Some(text) = &mut self.value {
             text.push(c);
         }
     }
 
-    // Push a different character than lexer.chars().next().
+    // Push a different character than lexer.current.chars().next().
     // force_allocation_without_current_ascii_char must be called before this.
     pub fn push_different(&mut self, c: char) {
         debug_assert!(self.value.is_some());
diff --git a/crates/oxc_parser/src/lexer/template.rs b/crates/oxc_parser/src/lexer/template.rs
index 661bfda4f..812cd8622 100644
--- a/crates/oxc_parser/src/lexer/template.rs
+++ b/crates/oxc_parser/src/lexer/template.rs
@@ -8,7 +8,7 @@ impl<'a> Lexer<'a> {
     pub(super) fn read_template_literal(&mut self, substitute: Kind, tail: Kind) -> Kind {
         let mut builder = AutoCow::new(self);
         let mut is_valid_escape_sequence = true;
-        while let Some(c) = self.current.chars.next() {
+        while let Some(c) = self.next_char() {
             match c {
                 '$' if self.peek() == Some('{') => {
                     self.save_template_string(
@@ -16,7 +16,7 @@ impl<'a> Lexer<'a> {
                         builder.has_escape(),
                         builder.finish_without_push(self),
                     );
-                    self.current.chars.next();
+                    self.consume_char();
                     return substitute;
                 }
                 '`' => {
diff --git a/crates/oxc_parser/src/lexer/unicode.rs b/crates/oxc_parser/src/lexer/unicode.rs
index fe8f08f49..0a122b674 100644
--- a/crates/oxc_parser/src/lexer/unicode.rs
+++ b/crates/oxc_parser/src/lexer/unicode.rs
@@ -18,7 +18,7 @@ enum SurrogatePair {
 
 impl<'a> Lexer<'a> {
     pub(super) fn unicode_char_handler(&mut self) -> Kind {
-        let c = self.current.chars.clone().next().unwrap();
+        let c = self.peek().unwrap();
         match c {
             c if is_identifier_start_unicode(c) => {
                 let mut builder = AutoCow::new(self);
@@ -55,7 +55,7 @@ impl<'a> Lexer<'a> {
         check_identifier_start: bool,
     ) {
         let start = self.offset();
-        if self.current.chars.next() != Some('u') {
+        if self.next_char() != Some('u') {
             let range = Span::new(start, self.offset());
             self.error(diagnostics::UnicodeEscapeSequence(range));
             return;
@@ -167,7 +167,7 @@ impl<'a> Lexer<'a> {
             Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32),
             _ => return None,
         };
-        self.current.chars.next();
+        self.consume_char();
         Some(value)
     }
 
@@ -196,8 +196,8 @@ impl<'a> Lexer<'a> {
             return Some(SurrogatePair::CodePoint(high));
         }
 
-        self.current.chars.next();
-        self.current.chars.next();
+        self.next_char();
+        self.next_char();
 
         let low = self.hex_4_digits()?;
 
@@ -219,7 +219,7 @@ impl<'a> Lexer<'a> {
         in_template: bool,
         is_valid_escape_sequence: &mut bool,
     ) {
-        match self.current.chars.next() {
+        match self.next_char() {
             None => {
                 self.error(diagnostics::UnterminatedString(self.unterminated_range()));
             }
@@ -299,7 +299,7 @@ impl<'a> Lexer<'a> {
                     text.push(value);
                 }
                 '0' if in_template && self.peek().is_some_and(|c| c.is_ascii_digit()) => {
-                    self.current.chars.next();
+                    self.consume_char();
                     // error raised within the parser by `diagnostics::TemplateLiteral`
                     *is_valid_escape_sequence = false;
                 }