mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 04:08:41 +00:00
perf(codegen): check last char with byte methods (#6509)
When checking what last char in buffer is, avoid calculating a `char` when only comparing to an ASCII char (byte) anyway.
This commit is contained in:
parent
18b68ff90e
commit
77f3a1a9dd
4 changed files with 106 additions and 22 deletions
|
|
@ -125,17 +125,55 @@ impl CodeBuffer {
|
|||
/// let mut code = CodeBuffer::new();
|
||||
/// code.print_str("foo");
|
||||
///
|
||||
/// assert_eq!(code.peek_nth_back(0), Some('o'));
|
||||
/// assert_eq!(code.peek_nth_back(2), Some('f'));
|
||||
/// assert_eq!(code.peek_nth_back(3), None);
|
||||
/// assert_eq!(code.peek_nth_char_back(0), Some('o'));
|
||||
/// assert_eq!(code.peek_nth_char_back(2), Some('f'));
|
||||
/// assert_eq!(code.peek_nth_char_back(3), None);
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use = "Peeking is pointless if the peeked char isn't used"]
|
||||
pub fn peek_nth_back(&self, n: usize) -> Option<char> {
|
||||
pub fn peek_nth_char_back(&self, n: usize) -> Option<char> {
|
||||
// SAFETY: All methods of `CodeBuffer` ensure `buf` is valid UTF-8
|
||||
unsafe { std::str::from_utf8_unchecked(&self.buf) }.chars().nth_back(n)
|
||||
}
|
||||
|
||||
/// Peek the `n`th byte from the end of the buffer.
|
||||
///
|
||||
/// When `n` is zero, the last byte is returned.
|
||||
/// Returns [`None`] if `n` exceeds the length of the buffer.
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use oxc_codegen::CodeBuffer;
|
||||
/// let mut code = CodeBuffer::new();
|
||||
/// code.print_str("foo");
|
||||
///
|
||||
/// assert_eq!(code.peek_nth_byte_back(0), Some(b'o'));
|
||||
/// assert_eq!(code.peek_nth_byte_back(2), Some(b'f'));
|
||||
/// assert_eq!(code.peek_nth_byte_back(3), None);
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use = "Peeking is pointless if the peeked char isn't used"]
|
||||
pub fn peek_nth_byte_back(&self, n: usize) -> Option<u8> {
|
||||
let len = self.len();
|
||||
if n < len {
|
||||
Some(self.buf[len - 1 - n])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Peek the last byte from the end of the buffer.
|
||||
#[inline]
|
||||
pub fn last_byte(&self) -> Option<u8> {
|
||||
self.buf.last().copied()
|
||||
}
|
||||
|
||||
/// Peek the last char from the end of the buffer.
|
||||
#[inline]
|
||||
pub fn last_char(&self) -> Option<char> {
|
||||
self.peek_nth_char_back(0)
|
||||
}
|
||||
|
||||
/// Push a single ASCII byte into the buffer.
|
||||
///
|
||||
/// # Panics
|
||||
|
|
@ -460,12 +498,40 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn peek_nth_back() {
|
||||
fn peek_nth_char_back() {
|
||||
let mut code = CodeBuffer::new();
|
||||
code.print_str("foo");
|
||||
code.print_str("bar");
|
||||
|
||||
assert_eq!(code.peek_nth_back(0), Some('o'));
|
||||
assert_eq!(code.peek_nth_back(2), Some('f'));
|
||||
assert_eq!(code.peek_nth_back(3), None);
|
||||
assert_eq!(code.peek_nth_char_back(0), Some('r'));
|
||||
assert_eq!(code.peek_nth_char_back(1), Some('a'));
|
||||
assert_eq!(code.peek_nth_char_back(2), Some('b'));
|
||||
assert_eq!(code.peek_nth_char_back(3), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn peek_nth_byte_back() {
|
||||
let mut code = CodeBuffer::new();
|
||||
code.print_str("bar");
|
||||
|
||||
assert_eq!(code.peek_nth_byte_back(0), Some(b'r'));
|
||||
assert_eq!(code.peek_nth_byte_back(1), Some(b'a'));
|
||||
assert_eq!(code.peek_nth_byte_back(2), Some(b'b'));
|
||||
assert_eq!(code.peek_nth_byte_back(3), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn last_byte() {
|
||||
let mut code = CodeBuffer::new();
|
||||
assert_eq!(code.last_byte(), None);
|
||||
code.print_str("bar");
|
||||
assert_eq!(code.last_byte(), Some(b'r'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn last_char() {
|
||||
let mut code = CodeBuffer::new();
|
||||
assert_eq!(code.last_char(), None);
|
||||
code.print_str("bar");
|
||||
assert_eq!(code.last_char(), Some('r'));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ impl<'a> Codegen<'a> {
|
|||
|
||||
if comments.first().is_some_and(|c| c.preceded_by_newline) {
|
||||
// Skip printing newline if this comment is already on a newline.
|
||||
if self.peek_nth_back(0).is_some_and(|c| c != '\n' && c != '\t') {
|
||||
if self.last_byte().is_some_and(|b| b != b'\n' && b != b'\t') {
|
||||
self.print_hard_newline();
|
||||
self.print_indent();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1194,11 +1194,11 @@ impl<'a> Gen for BigIntLiteral<'a> {
|
|||
impl<'a> Gen for RegExpLiteral<'a> {
|
||||
fn gen(&self, p: &mut Codegen, _ctx: Context) {
|
||||
p.add_source_mapping(self.span.start);
|
||||
let last = p.peek_nth_back(0);
|
||||
let last = p.last_byte();
|
||||
let pattern_text = self.regex.pattern.source_text(p.source_text);
|
||||
// Avoid forming a single-line comment or "</script" sequence
|
||||
if Some('/') == last
|
||||
|| (Some('<') == last && pattern_text.cow_to_lowercase().starts_with("script"))
|
||||
if last == Some(b'/')
|
||||
|| (last == Some(b'<') && pattern_text.cow_to_lowercase().starts_with("script"))
|
||||
{
|
||||
p.print_hard_space();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ use oxc_ast::ast::{
|
|||
use oxc_mangler::Mangler;
|
||||
use oxc_span::{GetSpan, Span};
|
||||
use oxc_syntax::{
|
||||
identifier::is_identifier_part,
|
||||
identifier::{is_identifier_part, is_identifier_part_ascii},
|
||||
operator::{BinaryOperator, UnaryOperator, UpdateOperator},
|
||||
precedence::Precedence,
|
||||
};
|
||||
|
|
@ -292,17 +292,31 @@ impl<'a> Codegen<'a> {
|
|||
|
||||
#[inline]
|
||||
fn print_space_before_identifier(&mut self) {
|
||||
if self
|
||||
.peek_nth_back(0)
|
||||
.is_some_and(|ch| is_identifier_part(ch) || self.prev_reg_exp_end == self.code.len())
|
||||
{
|
||||
self.print_hard_space();
|
||||
let Some(byte) = self.last_byte() else { return };
|
||||
|
||||
if self.prev_reg_exp_end != self.code.len() {
|
||||
let is_identifier = if byte.is_ascii() {
|
||||
// Fast path for ASCII (very common case)
|
||||
is_identifier_part_ascii(byte as char)
|
||||
} else {
|
||||
is_identifier_part(self.last_char().unwrap())
|
||||
};
|
||||
if !is_identifier {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
self.print_hard_space();
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn peek_nth_back(&self, n: usize) -> Option<char> {
|
||||
self.code.peek_nth_back(n)
|
||||
fn last_byte(&self) -> Option<u8> {
|
||||
self.code.last_byte()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn last_char(&self) -> Option<char> {
|
||||
self.code.last_char()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
@ -533,7 +547,11 @@ impl<'a> Codegen<'a> {
|
|||
|| ((prev == bin_op_sub || prev == un_op_neg)
|
||||
&& (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec))
|
||||
|| (prev == un_op_post_dec && next == bin_op_gt)
|
||||
|| (prev == un_op_not && next == un_op_pre_dec && self.peek_nth_back(1) == Some('<'))
|
||||
|| (prev == un_op_not
|
||||
&& next == un_op_pre_dec
|
||||
// `prev == UnaryOperator::LogicalNot` which means last byte is ASCII,
|
||||
// and therefore previous character is 1 byte from end of buffer
|
||||
&& self.code.peek_nth_byte_back(1) == Some(b'<'))
|
||||
{
|
||||
self.print_hard_space();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue