mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 20:32:10 +00:00
refactor(parser): name byte handler functions (#2301)
This PR solves the problem of lexer byte handlers all being called `core::ops::function::FnOnce::call_once` in the flame graphs on CodSpeed, by defining them as named functions instead of closures. Pure refactor, no substantive changes.
This commit is contained in:
parent
018674c56c
commit
9811c3a2c3
1 changed files with 63 additions and 17 deletions
|
|
@ -8,7 +8,7 @@ use crate::diagnostics;
|
||||||
/// * Lexer must not be at end of file.
|
/// * Lexer must not be at end of file.
|
||||||
/// * `byte` must be next byte of source code, corresponding to current position
|
/// * `byte` must be next byte of source code, corresponding to current position
|
||||||
/// of `lexer.current.chars`.
|
/// of `lexer.current.chars`.
|
||||||
/// * Only `BYTE_HANDLERS` for ASCII characters may use the `ascii_byte_handler!()` macro.
|
/// * Only `BYTE_HANDLERS` for ASCII characters may use the `ascii_byte_handler!` macro.
|
||||||
pub(super) unsafe fn handle_byte(byte: u8, lexer: &mut Lexer) -> Kind {
|
pub(super) unsafe fn handle_byte(byte: u8, lexer: &mut Lexer) -> Kind {
|
||||||
BYTE_HANDLERS[byte as usize](lexer)
|
BYTE_HANDLERS[byte as usize](lexer)
|
||||||
}
|
}
|
||||||
|
|
@ -38,6 +38,41 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [
|
||||||
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UER, UER, UER, UER, UER, UER, UER, UER, // F
|
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UER, UER, UER, UER, UER, UER, UER, UER, // F
|
||||||
];
|
];
|
||||||
|
|
||||||
|
/// Macro for defining a byte handler.
|
||||||
|
///
|
||||||
|
/// Use `ascii_byte_handler!` macro for ASCII characters, which adds optimizations for ASCII.
|
||||||
|
///
|
||||||
|
/// Handlers are defined as functions instead of closures, so they have names in flame graphs.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// byte_handler!(UNI(lexer) {
|
||||||
|
/// lexer.unicode_char_handler()
|
||||||
|
/// });
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// expands to:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// const UNI: ByteHandler = {
|
||||||
|
/// #[allow(non_snake_case)]
|
||||||
|
/// fn UNI(lexer: &mut Lexer) -> Kind {
|
||||||
|
/// lexer.unicode_char_handler()
|
||||||
|
/// }
|
||||||
|
/// UNI
|
||||||
|
/// };
|
||||||
|
/// ```
|
||||||
|
macro_rules! byte_handler {
|
||||||
|
($id:ident($lex:ident) $body:expr) => {
|
||||||
|
const $id: ByteHandler = {
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
fn $id($lex: &mut Lexer) -> Kind {
|
||||||
|
$body
|
||||||
|
}
|
||||||
|
$id
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::unnecessary_safety_comment)]
|
#[allow(clippy::unnecessary_safety_comment)]
|
||||||
/// Macro for defining byte handler for an ASCII character.
|
/// Macro for defining byte handler for an ASCII character.
|
||||||
///
|
///
|
||||||
|
|
@ -53,7 +88,8 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [
|
||||||
///
|
///
|
||||||
/// These assertions are unchecked (i.e. won't panic) and will cause UB if they're incorrect.
|
/// These assertions are unchecked (i.e. won't panic) and will cause UB if they're incorrect.
|
||||||
///
|
///
|
||||||
/// SAFETY: Only use this macro to define byte handlers for ASCII characters.
|
/// # SAFETY
|
||||||
|
/// Only use this macro to define byte handlers for ASCII characters.
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// ascii_byte_handler!(SPS(lexer) {
|
/// ascii_byte_handler!(SPS(lexer) {
|
||||||
|
|
@ -65,20 +101,27 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [
|
||||||
/// expands to:
|
/// expands to:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// const SPS: ByteHandler = |lexer| {
|
/// const SPS: ByteHandler = {
|
||||||
/// unsafe {
|
/// #[allow(non_snake_case)]
|
||||||
/// use assert_unchecked::assert_unchecked;
|
/// fn SPS(lexer: &mut Lexer) {
|
||||||
/// let s = lexer.current.chars.as_str();
|
/// // SAFETY: This macro is only used for ASCII characters
|
||||||
/// assert_unchecked!(!s.is_empty());
|
/// unsafe {
|
||||||
/// assert_unchecked!(s.as_bytes()[0] < 128);
|
/// use assert_unchecked::assert_unchecked;
|
||||||
|
/// let s = lexer.current.chars.as_str();
|
||||||
|
/// assert_unchecked!(!s.is_empty());
|
||||||
|
/// assert_unchecked!(s.as_bytes()[0] < 128);
|
||||||
|
/// }
|
||||||
|
/// {
|
||||||
|
/// lexer.consume_char();
|
||||||
|
/// Kind::WhiteSpace
|
||||||
|
/// }
|
||||||
/// }
|
/// }
|
||||||
/// lexer.consume_char();
|
/// SPS
|
||||||
/// Kind::WhiteSpace
|
|
||||||
/// };
|
/// };
|
||||||
/// ```
|
/// ```
|
||||||
macro_rules! ascii_byte_handler {
|
macro_rules! ascii_byte_handler {
|
||||||
($id:ident($lex:ident) $body:expr) => {
|
($id:ident($lex:ident) $body:expr) => {
|
||||||
const $id: ByteHandler = |$lex| {
|
byte_handler!($id($lex) {
|
||||||
// SAFETY: This macro is only used for ASCII characters
|
// SAFETY: This macro is only used for ASCII characters
|
||||||
unsafe {
|
unsafe {
|
||||||
use assert_unchecked::assert_unchecked;
|
use assert_unchecked::assert_unchecked;
|
||||||
|
|
@ -87,7 +130,7 @@ macro_rules! ascii_byte_handler {
|
||||||
assert_unchecked!(s.as_bytes()[0] < 128);
|
assert_unchecked!(s.as_bytes()[0] < 128);
|
||||||
}
|
}
|
||||||
$body
|
$body
|
||||||
};
|
});
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -590,14 +633,17 @@ ascii_byte_handler!(L_Y(lexer) match &lexer.identifier_name_handler()[1..] {
|
||||||
});
|
});
|
||||||
|
|
||||||
// Non-ASCII characters.
|
// Non-ASCII characters.
|
||||||
// NB: Must not use `ascii_byte_handler!()` macro, as this handler is for non-ASCII chars.
|
// NB: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII chars.
|
||||||
#[allow(clippy::redundant_closure_for_method_calls)]
|
byte_handler!(UNI(lexer) {
|
||||||
const UNI: ByteHandler = |lexer| lexer.unicode_char_handler();
|
lexer.unicode_char_handler()
|
||||||
|
});
|
||||||
|
|
||||||
// UTF-8 continuation bytes (128-191) (i.e. middle of a multi-byte UTF-8 sequence)
|
// UTF-8 continuation bytes (128-191) (i.e. middle of a multi-byte UTF-8 sequence)
|
||||||
// + and byte values which are not legal in UTF-8 strings (248-255).
|
// + and byte values which are not legal in UTF-8 strings (248-255).
|
||||||
// `handle_byte()` should only be called with 1st byte of a valid UTF-8 char,
|
// `handle_byte()` should only be called with 1st byte of a valid UTF-8 char,
|
||||||
// so something has gone wrong if we get here.
|
// so something has gone wrong if we get here.
|
||||||
// https://en.wikipedia.org/wiki/UTF-8
|
// https://en.wikipedia.org/wiki/UTF-8
|
||||||
// NB: Must not use `ascii_byte_handler!()` macro, as this handler is for non-ASCII bytes.
|
// NB: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII bytes.
|
||||||
const UER: ByteHandler = |_| unreachable!();
|
byte_handler!(UER(_lexer) {
|
||||||
|
unreachable!();
|
||||||
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue