perf(codegen): check last char with byte methods (#6509)

When checking what last char in buffer is, avoid calculating a `char` when only comparing to an ASCII char (byte) anyway.
2026-05-19 04:08:41 +00:00 · 2024-10-14 01:09:02 +00:00 · 2024-10-14 01:09:02 +00:00 · 77f3a1a9dd
commit 77f3a1a9dd
parent 18b68ff90e
4 changed files with 106 additions and 22 deletions
--- a/crates/oxc_codegen/src/code_buffer.rs
+++ b/crates/oxc_codegen/src/code_buffer.rs
@ -125,17 +125,55 @@ impl CodeBuffer {
    /// let mut code = CodeBuffer::new();
    /// code.print_str("foo");
    ///
-    /// assert_eq!(code.peek_nth_back(0), Some('o'));
-    /// assert_eq!(code.peek_nth_back(2), Some('f'));
-    /// assert_eq!(code.peek_nth_back(3), None);
+    /// assert_eq!(code.peek_nth_char_back(0), Some('o'));
+    /// assert_eq!(code.peek_nth_char_back(2), Some('f'));
+    /// assert_eq!(code.peek_nth_char_back(3), None);
    /// ```
    #[inline]
    #[must_use = "Peeking is pointless if the peeked char isn't used"]
-    pub fn peek_nth_back(&self, n: usize) -> Option<char> {
+    pub fn peek_nth_char_back(&self, n: usize) -> Option<char> {
        // SAFETY: All methods of `CodeBuffer` ensure `buf` is valid UTF-8
        unsafe { std::str::from_utf8_unchecked(&self.buf) }.chars().nth_back(n)
    }

+    /// Peek the `n`th byte from the end of the buffer.
+    ///
+    /// When `n` is zero, the last byte is returned.
+    /// Returns [`None`] if `n` exceeds the length of the buffer.
+    ///
+    /// # Example
+    /// ```
+    /// use oxc_codegen::CodeBuffer;
+    /// let mut code = CodeBuffer::new();
+    /// code.print_str("foo");
+    ///
+    /// assert_eq!(code.peek_nth_byte_back(0), Some(b'o'));
+    /// assert_eq!(code.peek_nth_byte_back(2), Some(b'f'));
+    /// assert_eq!(code.peek_nth_byte_back(3), None);
+    /// ```
+    #[inline]
+    #[must_use = "Peeking is pointless if the peeked char isn't used"]
+    pub fn peek_nth_byte_back(&self, n: usize) -> Option<u8> {
+        let len = self.len();
+        if n < len {
+            Some(self.buf[len - 1 - n])
+        } else {
+            None
+        }
+    }
+
+    /// Peek the last byte from the end of the buffer.
+    #[inline]
+    pub fn last_byte(&self) -> Option<u8> {
+        self.buf.last().copied()
+    }
+
+    /// Peek the last char from the end of the buffer.
+    #[inline]
+    pub fn last_char(&self) -> Option<char> {
+        self.peek_nth_char_back(0)
+    }
+
    /// Push a single ASCII byte into the buffer.
    ///
    /// # Panics
@ -460,12 +498,40 @@ mod test {
    }

    #[test]
-    fn peek_nth_back() {
+    fn peek_nth_char_back() {
        let mut code = CodeBuffer::new();
-        code.print_str("foo");
+        code.print_str("bar");

-        assert_eq!(code.peek_nth_back(0), Some('o'));
-        assert_eq!(code.peek_nth_back(2), Some('f'));
-        assert_eq!(code.peek_nth_back(3), None);
+        assert_eq!(code.peek_nth_char_back(0), Some('r'));
+        assert_eq!(code.peek_nth_char_back(1), Some('a'));
+        assert_eq!(code.peek_nth_char_back(2), Some('b'));
+        assert_eq!(code.peek_nth_char_back(3), None);
+    }
+
+    #[test]
+    fn peek_nth_byte_back() {
+        let mut code = CodeBuffer::new();
+        code.print_str("bar");
+
+        assert_eq!(code.peek_nth_byte_back(0), Some(b'r'));
+        assert_eq!(code.peek_nth_byte_back(1), Some(b'a'));
+        assert_eq!(code.peek_nth_byte_back(2), Some(b'b'));
+        assert_eq!(code.peek_nth_byte_back(3), None);
+    }
+
+    #[test]
+    fn last_byte() {
+        let mut code = CodeBuffer::new();
+        assert_eq!(code.last_byte(), None);
+        code.print_str("bar");
+        assert_eq!(code.last_byte(), Some(b'r'));
+    }
+
+    #[test]
+    fn last_char() {
+        let mut code = CodeBuffer::new();
+        assert_eq!(code.last_char(), None);
+        code.print_str("bar");
+        assert_eq!(code.last_char(), Some('r'));
    }
 }
--- a/crates/oxc_codegen/src/comment.rs
+++ b/crates/oxc_codegen/src/comment.rs
@ -87,7 +87,7 @@ impl<'a> Codegen<'a> {

        if comments.first().is_some_and(|c| c.preceded_by_newline) {
            // Skip printing newline if this comment is already on a newline.
-            if self.peek_nth_back(0).is_some_and(|c| c != '\n' && c != '\t') {
+            if self.last_byte().is_some_and(|b| b != b'\n' && b != b'\t') {
                self.print_hard_newline();
                self.print_indent();
            }
--- a/crates/oxc_codegen/src/gen.rs
+++ b/crates/oxc_codegen/src/gen.rs
@ -1194,11 +1194,11 @@ impl<'a> Gen for BigIntLiteral<'a> {
 impl<'a> Gen for RegExpLiteral<'a> {
    fn gen(&self, p: &mut Codegen, _ctx: Context) {
        p.add_source_mapping(self.span.start);
-        let last = p.peek_nth_back(0);
+        let last = p.last_byte();
        let pattern_text = self.regex.pattern.source_text(p.source_text);
        // Avoid forming a single-line comment or "</script" sequence
-        if Some('/') == last
-            || (Some('<') == last && pattern_text.cow_to_lowercase().starts_with("script"))
+        if last == Some(b'/')
+            || (last == Some(b'<') && pattern_text.cow_to_lowercase().starts_with("script"))
        {
            p.print_hard_space();
        }
--- a/crates/oxc_codegen/src/lib.rs
+++ b/crates/oxc_codegen/src/lib.rs
@ -19,7 +19,7 @@ use oxc_ast::ast::{
 use oxc_mangler::Mangler;
 use oxc_span::{GetSpan, Span};
 use oxc_syntax::{
-    identifier::is_identifier_part,
+    identifier::{is_identifier_part, is_identifier_part_ascii},
    operator::{BinaryOperator, UnaryOperator, UpdateOperator},
    precedence::Precedence,
 };
@ -292,17 +292,31 @@ impl<'a> Codegen<'a> {

    #[inline]
    fn print_space_before_identifier(&mut self) {
-        if self
-            .peek_nth_back(0)
-            .is_some_and(|ch| is_identifier_part(ch) || self.prev_reg_exp_end == self.code.len())
-        {
-            self.print_hard_space();
+        let Some(byte) = self.last_byte() else { return };
+
+        if self.prev_reg_exp_end != self.code.len() {
+            let is_identifier = if byte.is_ascii() {
+                // Fast path for ASCII (very common case)
+                is_identifier_part_ascii(byte as char)
+            } else {
+                is_identifier_part(self.last_char().unwrap())
+            };
+            if !is_identifier {
+                return;
+            }
        }
+
+        self.print_hard_space();
    }

    #[inline]
-    fn peek_nth_back(&self, n: usize) -> Option<char> {
-        self.code.peek_nth_back(n)
+    fn last_byte(&self) -> Option<u8> {
+        self.code.last_byte()
+    }
+
+    #[inline]
+    fn last_char(&self) -> Option<char> {
+        self.code.last_char()
    }

    #[inline]
@ -533,7 +547,11 @@ impl<'a> Codegen<'a> {
            || ((prev == bin_op_sub || prev == un_op_neg)
                && (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec))
            || (prev == un_op_post_dec && next == bin_op_gt)
-            || (prev == un_op_not && next == un_op_pre_dec && self.peek_nth_back(1) == Some('<'))
+            || (prev == un_op_not
+                && next == un_op_pre_dec
+                // `prev == UnaryOperator::LogicalNot` which means last byte is ASCII,
+                // and therefore previous character is 1 byte from end of buffer
+                && self.code.peek_nth_byte_back(1) == Some(b'<'))
        {
            self.print_hard_space();
        }