From 24ded3cb1530cb5cf69d021cb96d67318082954f Mon Sep 17 00:00:00 2001
From: overlookmotel <theoverlookmotel@gmail.com>
Date: Wed, 28 Feb 2024 06:39:23 +0000
Subject: [PATCH] perf(parser): lex JSX strings with `memchr` (#2528)

Simplify lexing JSX string attributes. As the search is purely for 1
byte value (the closing quote), and so doesn't require a byte table, use
`memchr`.

This change doesn't really register on benchmarks, but it's one step
closer to removing `AutoCow`, and transitioning all the searches in the
lexer to byte-by-byte.
---
 crates/oxc_parser/src/lexer/jsx.rs    | 45 +++++++++++++++------------
 crates/oxc_parser/src/lexer/string.rs | 31 ++++++++----------
 2 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/crates/oxc_parser/src/lexer/jsx.rs b/crates/oxc_parser/src/lexer/jsx.rs
index 307109c34..c1c506fef 100644
--- a/crates/oxc_parser/src/lexer/jsx.rs
+++ b/crates/oxc_parser/src/lexer/jsx.rs
@@ -1,6 +1,7 @@
-use super::{AutoCow, Kind, Lexer, Token};
+use super::{Kind, Lexer, Token};
 use crate::diagnostics;
 
+use memchr::memchr;
 use oxc_syntax::identifier::{is_identifier_part, is_identifier_start};
 
 impl<'a> Lexer<'a> {
@@ -14,25 +15,29 @@ impl<'a> Lexer<'a> {
     ///   `JSXStringCharacter` but not '
     /// `JSXStringCharacter` ::
     ///   `SourceCharacter` but not one of `HTMLCharacterReference`
-    pub(super) fn read_jsx_string_literal(&mut self, delimiter: char) -> Kind {
-        let mut builder = AutoCow::new(self);
-        loop {
-            match self.next_char() {
-                Some(c @ ('"' | '\'')) => {
-                    if c == delimiter {
-                        self.save_string(builder.has_escape(), builder.finish_without_push(self));
-                        return Kind::Str;
-                    }
-                    builder.push_matching(c);
-                }
-                Some(other) => {
-                    builder.push_matching(other);
-                }
-                None => {
-                    self.error(diagnostics::UnterminatedString(self.unterminated_range()));
-                    return Kind::Undetermined;
-                }
-            }
+
+    /// Read JSX string literal.
+    /// # SAFETY
+    /// * `delimiter` must be an ASCII character.
+    /// * Next char in `lexer.source` must be ASCII.
+    pub(super) unsafe fn read_jsx_string_literal(&mut self, delimiter: u8) -> Kind {
+        // Skip opening quote
+        debug_assert!(delimiter.is_ascii());
+        // SAFETY: Caller guarantees next byte is ASCII, so `.add(1)` is a UTF-8 char boundary
+        let after_opening_quote = self.source.position().add(1);
+        let remaining = self.source.str_from_pos_to_end(after_opening_quote);
+
+        let len = memchr(delimiter, remaining.as_bytes());
+        if let Some(len) = len {
+            // SAFETY: `after_opening_quote` + `len` is position of delimiter.
+            // Caller guarantees delimiter is ASCII, so 1 byte after it is a UTF-8 char boundary.
+            let after_closing_quote = after_opening_quote.add(len + 1);
+            self.source.set_position(after_closing_quote);
+            Kind::Str
+        } else {
+            self.source.advance_to_end();
+            self.error(diagnostics::UnterminatedString(self.unterminated_range()));
+            Kind::Undetermined
         }
     }
 
diff --git a/crates/oxc_parser/src/lexer/string.rs b/crates/oxc_parser/src/lexer/string.rs
index 8325d2f95..a37aee099 100644
--- a/crates/oxc_parser/src/lexer/string.rs
+++ b/crates/oxc_parser/src/lexer/string.rs
@@ -25,6 +25,13 @@ static SINGLE_QUOTE_STRING_END_TABLE: SafeByteMatchTable =
 /// `$table` must only match `$delimiter`, '\', '\r' or '\n'.
 macro_rules! handle_string_literal {
     ($lexer:ident, $delimiter:expr, $table:ident) => {{
+        debug_assert!($delimiter.is_ascii());
+
+        if $lexer.context == LexerContext::JsxAttributeValue {
+            // SAFETY: Caller guarantees `$delimiter` is ASCII, and next char is ASCII
+            return $lexer.read_jsx_string_literal($delimiter);
+        }
+
         // Skip opening quote.
         // SAFETY: Caller guarantees next byte is ASCII, so safe to advance past it.
         let after_opening_quote = $lexer.source.position().add(1);
@@ -157,30 +164,18 @@ impl<'a> Lexer<'a> {
     /// # SAFETY
     /// Next character must be `"`.
     pub(super) unsafe fn read_string_literal_double_quote(&mut self) -> Kind {
-        if self.context == LexerContext::JsxAttributeValue {
-            // SAFETY: Caller guarantees next char is `"`
-            self.source.next_byte_unchecked();
-            self.read_jsx_string_literal('"')
-        } else {
-            // SAFETY: Caller guarantees next char is `"`, which is ASCII.
-            // b'"' is an ASCII byte. `DOUBLE_QUOTE_STRING_END_TABLE` is a `SafeByteMatchTable`.
-            unsafe { handle_string_literal!(self, b'"', DOUBLE_QUOTE_STRING_END_TABLE) }
-        }
+        // SAFETY: Caller guarantees next char is `"`, which is ASCII.
+        // b'"' is an ASCII byte. `DOUBLE_QUOTE_STRING_END_TABLE` is a `SafeByteMatchTable`.
+        unsafe { handle_string_literal!(self, b'"', DOUBLE_QUOTE_STRING_END_TABLE) }
     }
 
     /// Read string literal delimited with `'`.
     /// # SAFETY
     /// Next character must be `'`.
     pub(super) unsafe fn read_string_literal_single_quote(&mut self) -> Kind {
-        if self.context == LexerContext::JsxAttributeValue {
-            // SAFETY: Caller guarantees next char is `'`
-            self.source.next_byte_unchecked();
-            self.read_jsx_string_literal('\'')
-        } else {
-            // SAFETY: Caller guarantees next char is `"`, which is ASCII.
-            // b'\'' is an ASCII byte. `SINGLE_QUOTE_STRING_END_TABLE` is a `SafeByteMatchTable`.
-            unsafe { handle_string_literal!(self, b'\'', SINGLE_QUOTE_STRING_END_TABLE) }
-        }
+        // SAFETY: Caller guarantees next char is `'`, which is ASCII.
+        // b'\'' is an ASCII byte. `SINGLE_QUOTE_STRING_END_TABLE` is a `SafeByteMatchTable`.
+        unsafe { handle_string_literal!(self, b'\'', SINGLE_QUOTE_STRING_END_TABLE) }
     }
 
     /// Save the string if it is escaped