mirror of
https://github.com/danbulant/oxc
synced 2026-05-25 04:42:10 +00:00
perf(parser): lex JSX strings with memchr (#2528)
Simplify lexing JSX string attributes. As the search is purely for 1 byte value (the closing quote), and so doesn't require a byte table, use `memchr`. This change doesn't really register on benchmarks, but it's one step closer to removing `AutoCow`, and transitioning all the searches in the lexer to byte-by-byte.
This commit is contained in:
parent
f760108094
commit
24ded3cb15
2 changed files with 38 additions and 38 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
use super::{AutoCow, Kind, Lexer, Token};
|
use super::{Kind, Lexer, Token};
|
||||||
use crate::diagnostics;
|
use crate::diagnostics;
|
||||||
|
|
||||||
|
use memchr::memchr;
|
||||||
use oxc_syntax::identifier::{is_identifier_part, is_identifier_start};
|
use oxc_syntax::identifier::{is_identifier_part, is_identifier_start};
|
||||||
|
|
||||||
impl<'a> Lexer<'a> {
|
impl<'a> Lexer<'a> {
|
||||||
|
|
@ -14,25 +15,29 @@ impl<'a> Lexer<'a> {
|
||||||
/// `JSXStringCharacter` but not '
|
/// `JSXStringCharacter` but not '
|
||||||
/// `JSXStringCharacter` ::
|
/// `JSXStringCharacter` ::
|
||||||
/// `SourceCharacter` but not one of `HTMLCharacterReference`
|
/// `SourceCharacter` but not one of `HTMLCharacterReference`
|
||||||
pub(super) fn read_jsx_string_literal(&mut self, delimiter: char) -> Kind {
|
|
||||||
let mut builder = AutoCow::new(self);
|
/// Read JSX string literal.
|
||||||
loop {
|
/// # SAFETY
|
||||||
match self.next_char() {
|
/// * `delimiter` must be an ASCII character.
|
||||||
Some(c @ ('"' | '\'')) => {
|
/// * Next char in `lexer.source` must be ASCII.
|
||||||
if c == delimiter {
|
pub(super) unsafe fn read_jsx_string_literal(&mut self, delimiter: u8) -> Kind {
|
||||||
self.save_string(builder.has_escape(), builder.finish_without_push(self));
|
// Skip opening quote
|
||||||
return Kind::Str;
|
debug_assert!(delimiter.is_ascii());
|
||||||
}
|
// SAFETY: Caller guarantees next byte is ASCII, so `.add(1)` is a UTF-8 char boundary
|
||||||
builder.push_matching(c);
|
let after_opening_quote = self.source.position().add(1);
|
||||||
}
|
let remaining = self.source.str_from_pos_to_end(after_opening_quote);
|
||||||
Some(other) => {
|
|
||||||
builder.push_matching(other);
|
let len = memchr(delimiter, remaining.as_bytes());
|
||||||
}
|
if let Some(len) = len {
|
||||||
None => {
|
// SAFETY: `after_opening_quote` + `len` is position of delimiter.
|
||||||
self.error(diagnostics::UnterminatedString(self.unterminated_range()));
|
// Caller guarantees delimiter is ASCII, so 1 byte after it is a UTF-8 char boundary.
|
||||||
return Kind::Undetermined;
|
let after_closing_quote = after_opening_quote.add(len + 1);
|
||||||
}
|
self.source.set_position(after_closing_quote);
|
||||||
}
|
Kind::Str
|
||||||
|
} else {
|
||||||
|
self.source.advance_to_end();
|
||||||
|
self.error(diagnostics::UnterminatedString(self.unterminated_range()));
|
||||||
|
Kind::Undetermined
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,13 @@ static SINGLE_QUOTE_STRING_END_TABLE: SafeByteMatchTable =
|
||||||
/// `$table` must only match `$delimiter`, '\', '\r' or '\n'.
|
/// `$table` must only match `$delimiter`, '\', '\r' or '\n'.
|
||||||
macro_rules! handle_string_literal {
|
macro_rules! handle_string_literal {
|
||||||
($lexer:ident, $delimiter:expr, $table:ident) => {{
|
($lexer:ident, $delimiter:expr, $table:ident) => {{
|
||||||
|
debug_assert!($delimiter.is_ascii());
|
||||||
|
|
||||||
|
if $lexer.context == LexerContext::JsxAttributeValue {
|
||||||
|
// SAFETY: Caller guarantees `$delimiter` is ASCII, and next char is ASCII
|
||||||
|
return $lexer.read_jsx_string_literal($delimiter);
|
||||||
|
}
|
||||||
|
|
||||||
// Skip opening quote.
|
// Skip opening quote.
|
||||||
// SAFETY: Caller guarantees next byte is ASCII, so safe to advance past it.
|
// SAFETY: Caller guarantees next byte is ASCII, so safe to advance past it.
|
||||||
let after_opening_quote = $lexer.source.position().add(1);
|
let after_opening_quote = $lexer.source.position().add(1);
|
||||||
|
|
@ -157,30 +164,18 @@ impl<'a> Lexer<'a> {
|
||||||
/// # SAFETY
|
/// # SAFETY
|
||||||
/// Next character must be `"`.
|
/// Next character must be `"`.
|
||||||
pub(super) unsafe fn read_string_literal_double_quote(&mut self) -> Kind {
|
pub(super) unsafe fn read_string_literal_double_quote(&mut self) -> Kind {
|
||||||
if self.context == LexerContext::JsxAttributeValue {
|
// SAFETY: Caller guarantees next char is `"`, which is ASCII.
|
||||||
// SAFETY: Caller guarantees next char is `"`
|
// b'"' is an ASCII byte. `DOUBLE_QUOTE_STRING_END_TABLE` is a `SafeByteMatchTable`.
|
||||||
self.source.next_byte_unchecked();
|
unsafe { handle_string_literal!(self, b'"', DOUBLE_QUOTE_STRING_END_TABLE) }
|
||||||
self.read_jsx_string_literal('"')
|
|
||||||
} else {
|
|
||||||
// SAFETY: Caller guarantees next char is `"`, which is ASCII.
|
|
||||||
// b'"' is an ASCII byte. `DOUBLE_QUOTE_STRING_END_TABLE` is a `SafeByteMatchTable`.
|
|
||||||
unsafe { handle_string_literal!(self, b'"', DOUBLE_QUOTE_STRING_END_TABLE) }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read string literal delimited with `'`.
|
/// Read string literal delimited with `'`.
|
||||||
/// # SAFETY
|
/// # SAFETY
|
||||||
/// Next character must be `'`.
|
/// Next character must be `'`.
|
||||||
pub(super) unsafe fn read_string_literal_single_quote(&mut self) -> Kind {
|
pub(super) unsafe fn read_string_literal_single_quote(&mut self) -> Kind {
|
||||||
if self.context == LexerContext::JsxAttributeValue {
|
// SAFETY: Caller guarantees next char is `'`, which is ASCII.
|
||||||
// SAFETY: Caller guarantees next char is `'`
|
// b'\'' is an ASCII byte. `SINGLE_QUOTE_STRING_END_TABLE` is a `SafeByteMatchTable`.
|
||||||
self.source.next_byte_unchecked();
|
unsafe { handle_string_literal!(self, b'\'', SINGLE_QUOTE_STRING_END_TABLE) }
|
||||||
self.read_jsx_string_literal('\'')
|
|
||||||
} else {
|
|
||||||
// SAFETY: Caller guarantees next char is `"`, which is ASCII.
|
|
||||||
// b'\'' is an ASCII byte. `SINGLE_QUOTE_STRING_END_TABLE` is a `SafeByteMatchTable`.
|
|
||||||
unsafe { handle_string_literal!(self, b'\'', SINGLE_QUOTE_STRING_END_TABLE) }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Save the string if it is escaped
|
/// Save the string if it is escaped
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue