mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
perf(codegen): faster writing indentation (#7820)
Write indentation faster. Previously was writing indentation to buffer with a call to `memset`, which is relatively expensive. Where indentation `<= 16` (common case), write 16 tabs with a single 16-byte XMM write, which is faster.
This commit is contained in:
parent
14896cb318
commit
4448b63692
2 changed files with 59 additions and 4 deletions
|
|
@ -75,6 +75,15 @@ impl CodeBuffer {
|
||||||
self.buf.len()
|
self.buf.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the capacity of the buffer in bytes.
|
||||||
|
///
|
||||||
|
/// This is *not* the same as capacity in characters,
|
||||||
|
/// since non-ASCII characters require multiple bytes.
|
||||||
|
#[inline]
|
||||||
|
pub fn capacity(&self) -> usize {
|
||||||
|
self.buf.capacity()
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns `true` if the buffer contains no characters.
|
/// Returns `true` if the buffer contains no characters.
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
|
|
@ -365,6 +374,55 @@ impl CodeBuffer {
|
||||||
self.buf.extend(bytes);
|
self.buf.extend(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Print `n` tab characters into the buffer (indentation).
|
||||||
|
///
|
||||||
|
/// Optimized on assumption that more that 16 levels of indentation is rare.
|
||||||
|
///
|
||||||
|
/// Fast path is to write 16 bytes of tabs in a single load + store,
|
||||||
|
/// but only advance `len` by `n` bytes. This avoids a `memset` function call.
|
||||||
|
///
|
||||||
|
/// Take alternative slow path if either:
|
||||||
|
/// 1. `n > 16`.
|
||||||
|
/// 2. Less than 16 bytes spare capacity in buffer (needs to grow).
|
||||||
|
/// Both of these cases should be rare.
|
||||||
|
///
|
||||||
|
/// <https://godbolt.org/z/e1EP5cnPc>
|
||||||
|
#[inline]
|
||||||
|
pub fn print_indent(&mut self, n: usize) {
|
||||||
|
/// Size of chunks to write indent in.
|
||||||
|
/// 16 is largest register size (XMM) available on all x86_84 targets.
|
||||||
|
const CHUNK_SIZE: usize = 16;
|
||||||
|
|
||||||
|
#[cold]
|
||||||
|
#[inline(never)]
|
||||||
|
fn write_slow(code_buffer: &mut CodeBuffer, n: usize) {
|
||||||
|
code_buffer.buf.extend(std::iter::repeat(b'\t').take(n));
|
||||||
|
}
|
||||||
|
|
||||||
|
let len = self.len();
|
||||||
|
let spare_capacity = self.capacity() - len;
|
||||||
|
if n > CHUNK_SIZE || spare_capacity < CHUNK_SIZE {
|
||||||
|
write_slow(self, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write 16 tabs into buffer.
|
||||||
|
// On x86_86, this is 1 XMM register load + 1 XMM store (16 byte copy).
|
||||||
|
// SAFETY: We checked there are at least 16 bytes spare capacity.
|
||||||
|
unsafe {
|
||||||
|
let ptr = self.buf.as_mut_ptr().add(len).cast::<[u8; CHUNK_SIZE]>();
|
||||||
|
ptr.write([b'\t'; CHUNK_SIZE]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update length of buffer.
|
||||||
|
// SAFETY: We checked there's at least 16 bytes spare capacity, and `n <= 16`,
|
||||||
|
// so `len + n` cannot exceed capacity.
|
||||||
|
// `len` cannot exceed `isize::MAX`, so `len + n` cannot wrap around.
|
||||||
|
unsafe {
|
||||||
|
self.buf.set_len(len + n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Get contents of buffer as a byte slice.
|
/// Get contents of buffer as a byte slice.
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
|
|
|
||||||
|
|
@ -350,10 +350,7 @@ impl<'a> Codegen<'a> {
|
||||||
self.print_next_indent_as_space = false;
|
self.print_next_indent_as_space = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// SAFETY: this iterator only yields tabs, which are always valid ASCII characters.
|
self.code.print_indent(self.indent as usize);
|
||||||
unsafe {
|
|
||||||
self.code.print_bytes_unchecked(std::iter::repeat(b'\t').take(self.indent as usize));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue