mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 04:08:41 +00:00
perf(codegen): faster writing indentation (#7820)
Write indentation faster. Previously was writing indentation to buffer with a call to `memset`, which is relatively expensive. Where indentation `<= 16` (common case), write 16 tabs with a single 16-byte XMM write, which is faster.
This commit is contained in:
parent
14896cb318
commit
4448b63692
2 changed files with 59 additions and 4 deletions
|
|
@ -75,6 +75,15 @@ impl CodeBuffer {
|
|||
self.buf.len()
|
||||
}
|
||||
|
||||
/// Returns the capacity of the buffer in bytes.
|
||||
///
|
||||
/// This is *not* the same as capacity in characters,
|
||||
/// since non-ASCII characters require multiple bytes.
|
||||
#[inline]
|
||||
pub fn capacity(&self) -> usize {
|
||||
self.buf.capacity()
|
||||
}
|
||||
|
||||
/// Returns `true` if the buffer contains no characters.
|
||||
///
|
||||
/// # Example
|
||||
|
|
@ -365,6 +374,55 @@ impl CodeBuffer {
|
|||
self.buf.extend(bytes);
|
||||
}
|
||||
|
||||
/// Print `n` tab characters into the buffer (indentation).
|
||||
///
|
||||
/// Optimized on assumption that more that 16 levels of indentation is rare.
|
||||
///
|
||||
/// Fast path is to write 16 bytes of tabs in a single load + store,
|
||||
/// but only advance `len` by `n` bytes. This avoids a `memset` function call.
|
||||
///
|
||||
/// Take alternative slow path if either:
|
||||
/// 1. `n > 16`.
|
||||
/// 2. Less than 16 bytes spare capacity in buffer (needs to grow).
|
||||
/// Both of these cases should be rare.
|
||||
///
|
||||
/// <https://godbolt.org/z/e1EP5cnPc>
|
||||
#[inline]
|
||||
pub fn print_indent(&mut self, n: usize) {
|
||||
/// Size of chunks to write indent in.
|
||||
/// 16 is largest register size (XMM) available on all x86_84 targets.
|
||||
const CHUNK_SIZE: usize = 16;
|
||||
|
||||
#[cold]
|
||||
#[inline(never)]
|
||||
fn write_slow(code_buffer: &mut CodeBuffer, n: usize) {
|
||||
code_buffer.buf.extend(std::iter::repeat(b'\t').take(n));
|
||||
}
|
||||
|
||||
let len = self.len();
|
||||
let spare_capacity = self.capacity() - len;
|
||||
if n > CHUNK_SIZE || spare_capacity < CHUNK_SIZE {
|
||||
write_slow(self, n);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write 16 tabs into buffer.
|
||||
// On x86_86, this is 1 XMM register load + 1 XMM store (16 byte copy).
|
||||
// SAFETY: We checked there are at least 16 bytes spare capacity.
|
||||
unsafe {
|
||||
let ptr = self.buf.as_mut_ptr().add(len).cast::<[u8; CHUNK_SIZE]>();
|
||||
ptr.write([b'\t'; CHUNK_SIZE]);
|
||||
}
|
||||
|
||||
// Update length of buffer.
|
||||
// SAFETY: We checked there's at least 16 bytes spare capacity, and `n <= 16`,
|
||||
// so `len + n` cannot exceed capacity.
|
||||
// `len` cannot exceed `isize::MAX`, so `len + n` cannot wrap around.
|
||||
unsafe {
|
||||
self.buf.set_len(len + n);
|
||||
}
|
||||
}
|
||||
|
||||
/// Get contents of buffer as a byte slice.
|
||||
///
|
||||
/// # Example
|
||||
|
|
|
|||
|
|
@ -350,10 +350,7 @@ impl<'a> Codegen<'a> {
|
|||
self.print_next_indent_as_space = false;
|
||||
return;
|
||||
}
|
||||
// SAFETY: this iterator only yields tabs, which are always valid ASCII characters.
|
||||
unsafe {
|
||||
self.code.print_bytes_unchecked(std::iter::repeat(b'\t').take(self.indent as usize));
|
||||
}
|
||||
self.code.print_indent(self.indent as usize);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
|
|||
Loading…
Reference in a new issue