mirror of
https://github.com/danbulant/oxc
synced 2026-05-21 05:08:45 +00:00
perf(lexer): dedupe numeric separator check (#3283)
## What This PR Does Updates numeric literal token lexing to record when separator characters (`_`) are found in a new `Token` flag. This then gets passed to `parse_int` and `parse_float`, removing the need for a second `_` check in those two functions. When run locally, I see no change to lexer benchmarks and minor improvements to codegen benchmarks. For some reason, semantic and source map benches seem to be doing slightly worse. Note that I attempted to implement this with `bitflags!` (making `escaped` and `is_on_newline` flags as well) and this caused performance degradation. My best guess is that it turned reads on these flags from a `mov` to a `mov` + a binary and. --------- Co-authored-by: Boshen <boshenc@gmail.com>
This commit is contained in:
parent
dad47a553e
commit
508dae6f8f
4 changed files with 68 additions and 11 deletions
|
|
@ -286,8 +286,12 @@ impl<'a> ParserImpl<'a> {
|
|||
let token = self.cur_token();
|
||||
let src = self.cur_src();
|
||||
let value = match token.kind {
|
||||
Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => parse_int(src, token.kind),
|
||||
Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => parse_float(src),
|
||||
Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => {
|
||||
parse_int(src, token.kind, token.has_separator())
|
||||
}
|
||||
Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => {
|
||||
parse_float(src, token.has_separator())
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
.map_err(|err| diagnostics::invalid_number(err, token.span()))?;
|
||||
|
|
|
|||
|
|
@ -6,13 +6,35 @@ use std::borrow::Cow;
|
|||
|
||||
use super::kind::Kind;
|
||||
|
||||
// the string passed in has `_` removed from the lexer
|
||||
pub fn parse_int(s: &str, kind: Kind) -> Result<f64, &'static str> {
|
||||
if kind == Kind::Decimal {
|
||||
return parse_float(s);
|
||||
}
|
||||
let s = if s.contains('_') { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
|
||||
pub fn parse_int(s: &str, kind: Kind, has_sep: bool) -> Result<f64, &'static str> {
|
||||
let s = if has_sep { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
|
||||
let s = s.as_ref();
|
||||
debug_assert!(!s.contains('_'));
|
||||
|
||||
// SAFETY: we just checked that `s` has no `_` characters
|
||||
unsafe { parse_int_without_underscores_unchecked(s, kind) }
|
||||
}
|
||||
|
||||
pub fn parse_float(s: &str, has_sep: bool) -> Result<f64, &'static str> {
|
||||
let s = if has_sep { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
|
||||
debug_assert!(!s.contains('_'));
|
||||
|
||||
// SAFETY: we just checked that `s` has no `_` characters
|
||||
unsafe { parse_float_without_underscores_unchecked(&s) }
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// This function assumes that all `_` characters have been stripped from `s`.
|
||||
/// Violating this assumption does _not_ cause UB. However, this function is
|
||||
/// marked as unsafe to ensure consumers are aware of the assumption.
|
||||
unsafe fn parse_int_without_underscores_unchecked(
|
||||
s: &str,
|
||||
kind: Kind,
|
||||
) -> Result<f64, &'static str> {
|
||||
if kind == Kind::Decimal {
|
||||
return parse_float_without_underscores_unchecked(s);
|
||||
}
|
||||
match kind {
|
||||
Kind::Binary => Ok(parse_binary(&s[2..])),
|
||||
Kind::Octal => {
|
||||
|
|
@ -28,8 +50,12 @@ pub fn parse_int(s: &str, kind: Kind) -> Result<f64, &'static str> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_float(s: &str) -> Result<f64, &'static str> {
|
||||
let s = if s.contains('_') { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
|
||||
/// # Safety
|
||||
///
|
||||
/// This function assumes that all `_` characters have been stripped from `s`.
|
||||
/// Violating this assumption does _not_ cause UB. However, this function is
|
||||
/// marked as unsafe to ensure consumers are aware of the assumption.
|
||||
unsafe fn parse_float_without_underscores_unchecked(s: &str) -> Result<f64, &'static str> {
|
||||
s.parse::<f64>().map_err(|_| "invalid float")
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -53,6 +53,11 @@ impl<'a> Lexer<'a> {
|
|||
match c {
|
||||
'_' => {
|
||||
self.consume_char();
|
||||
// NOTE: it looks invalid numeric tokens are still parsed.
|
||||
// This seems to be a waste. It also requires us to put this
|
||||
// call here instead of after we ensure the next character
|
||||
// is a number character
|
||||
self.token.set_has_separator();
|
||||
if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
|
||||
self.consume_char();
|
||||
} else {
|
||||
|
|
@ -134,6 +139,11 @@ impl<'a> Lexer<'a> {
|
|||
match c {
|
||||
'_' => {
|
||||
self.consume_char();
|
||||
// NOTE: it looks invalid numeric tokens are still parsed.
|
||||
// This seems to be a waste. It also requires us to put this
|
||||
// call here instead of after we ensure the next character
|
||||
// is an ASCII digit
|
||||
self.token.set_has_separator();
|
||||
if self.peek().is_some_and(|c| c.is_ascii_digit()) {
|
||||
self.consume_char();
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -26,10 +26,16 @@ pub struct Token {
|
|||
/// [Lexer::escaped_templates]: [super::Lexer::escaped_templates]
|
||||
pub escaped: bool,
|
||||
|
||||
/// True if for numeric literal tokens that contain separator characters (`_`).
|
||||
///
|
||||
/// Numeric literals are defined in Section 12.9.3 of the ECMAScript
|
||||
/// standard and include [`Kind::Decimal`], [`Kind::Binary`],
|
||||
/// [`Kind::Octal`], [`Kind::Hex`], etc.
|
||||
has_separator: bool,
|
||||
|
||||
// Padding to fill to 16 bytes.
|
||||
// This makes copying a `Token` 1 x xmmword load & store, rather than 1 x dword + 1 x qword
|
||||
// and `Token::default()` is 1 x xmmword store, rather than 1 x dword + 1 x qword.
|
||||
_padding1: u8,
|
||||
_padding2: u32,
|
||||
}
|
||||
|
||||
|
|
@ -50,4 +56,15 @@ impl Token {
|
|||
pub fn escaped(&self) -> bool {
|
||||
self.escaped
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn has_separator(&self) -> bool {
|
||||
debug_assert!(!self.has_separator || self.kind.is_number());
|
||||
self.has_separator
|
||||
}
|
||||
|
||||
pub(crate) fn set_has_separator(&mut self) {
|
||||
debug_assert!(!self.has_separator || self.kind.is_number() || self.kind == Kind::default());
|
||||
self.has_separator = true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue