perf(codegen): faster writing indentation (#7820)

Write indentation faster. Previously was writing indentation to buffer with a call to `memset`, which is relatively expensive. Where indentation `<= 16` (common case), write 16 tabs with a single 16-byte XMM write, which is faster.
2026-05-19 04:08:41 +00:00 · 2024-12-13 05:17:32 +00:00 · 2024-12-13 05:17:32 +00:00 · 4448b63692
commit 4448b63692
parent 14896cb318
2 changed files with 59 additions and 4 deletions
--- a/crates/oxc_codegen/src/code_buffer.rs
+++ b/crates/oxc_codegen/src/code_buffer.rs
@ -75,6 +75,15 @@ impl CodeBuffer {
        self.buf.len()
    }

+    /// Returns the capacity of the buffer in bytes.
+    ///
+    /// This is *not* the same as capacity in characters,
+    /// since non-ASCII characters require multiple bytes.
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        self.buf.capacity()
+    }
+
    /// Returns `true` if the buffer contains no characters.
    ///
    /// # Example
@ -365,6 +374,55 @@ impl CodeBuffer {
        self.buf.extend(bytes);
    }

+    /// Print `n` tab characters into the buffer (indentation).
+    ///
+    /// Optimized on assumption that more that 16 levels of indentation is rare.
+    ///
+    /// Fast path is to write 16 bytes of tabs in a single load + store,
+    /// but only advance `len` by `n` bytes. This avoids a `memset` function call.
+    ///
+    /// Take alternative slow path if either:
+    /// 1. `n > 16`.
+    /// 2. Less than 16 bytes spare capacity in buffer (needs to grow).
+    /// Both of these cases should be rare.
+    ///
+    /// <https://godbolt.org/z/e1EP5cnPc>
+    #[inline]
+    pub fn print_indent(&mut self, n: usize) {
+        /// Size of chunks to write indent in.
+        /// 16 is largest register size (XMM) available on all x86_84 targets.
+        const CHUNK_SIZE: usize = 16;
+
+        #[cold]
+        #[inline(never)]
+        fn write_slow(code_buffer: &mut CodeBuffer, n: usize) {
+            code_buffer.buf.extend(std::iter::repeat(b'\t').take(n));
+        }
+
+        let len = self.len();
+        let spare_capacity = self.capacity() - len;
+        if n > CHUNK_SIZE || spare_capacity < CHUNK_SIZE {
+            write_slow(self, n);
+            return;
+        }
+
+        // Write 16 tabs into buffer.
+        // On x86_86, this is 1 XMM register load + 1 XMM store (16 byte copy).
+        // SAFETY: We checked there are at least 16 bytes spare capacity.
+        unsafe {
+            let ptr = self.buf.as_mut_ptr().add(len).cast::<[u8; CHUNK_SIZE]>();
+            ptr.write([b'\t'; CHUNK_SIZE]);
+        }
+
+        // Update length of buffer.
+        // SAFETY: We checked there's at least 16 bytes spare capacity, and `n <= 16`,
+        // so `len + n` cannot exceed capacity.
+        // `len` cannot exceed `isize::MAX`, so `len + n` cannot wrap around.
+        unsafe {
+            self.buf.set_len(len + n);
+        }
+    }
+
    /// Get contents of buffer as a byte slice.
    ///
    /// # Example
--- a/crates/oxc_codegen/src/lib.rs
+++ b/crates/oxc_codegen/src/lib.rs
@ -350,10 +350,7 @@ impl<'a> Codegen<'a> {
            self.print_next_indent_as_space = false;
            return;
        }
-        // SAFETY: this iterator only yields tabs, which are always valid ASCII characters.
-        unsafe {
-            self.code.print_bytes_unchecked(std::iter::repeat(b'\t').take(self.indent as usize));
-        }
+        self.code.print_indent(self.indent as usize);
    }

    #[inline]