mirror of
https://github.com/danbulant/oxc
synced 2026-05-25 04:42:10 +00:00
refactor(parser): simplify byte_search macro (#2552)
This PR greatly simplifies the `byte_search!` macro. Mainly removing `cold_branch()` from the "not enough bytes remaining for a batch" branch, which allows refactoring so that `handle_match` and `continue_if` don't need to be repeated twice. Result for performance is inconsistent - a little better on some benchmarks, a little worse on others. But not by significant amounts either way. In my view, the benefit of making the macro simpler outweighs a small speed loss anyway.
This commit is contained in:
parent
25e03cb0ef
commit
34ecdd58d8
1 changed files with 32 additions and 54 deletions
|
|
@ -479,9 +479,8 @@ macro_rules! byte_search {
|
||||||
|
|
||||||
let mut $pos = $start;
|
let mut $pos = $start;
|
||||||
#[allow(unused_unsafe)] // Silence warnings if macro called in unsafe code
|
#[allow(unused_unsafe)] // Silence warnings if macro called in unsafe code
|
||||||
'outer: loop {
|
let $match_byte = 'outer: loop {
|
||||||
#[allow(clippy::redundant_else)]
|
let $continue_byte = if $pos.addr() <= $lexer.source.end_for_batch_search_addr() {
|
||||||
if $pos.addr() <= $lexer.source.end_for_batch_search_addr() {
|
|
||||||
// Search a batch of `SEARCH_BATCH_SIZE` bytes.
|
// Search a batch of `SEARCH_BATCH_SIZE` bytes.
|
||||||
//
|
//
|
||||||
// `'inner: loop {}` is not a real loop - it always exits on first turn.
|
// `'inner: loop {}` is not a real loop - it always exits on first turn.
|
||||||
|
|
@ -494,7 +493,7 @@ macro_rules! byte_search {
|
||||||
// `$pos.addr() <= lexer.source.end_for_batch_search_addr()` check above ensures
|
// `$pos.addr() <= lexer.source.end_for_batch_search_addr()` check above ensures
|
||||||
// there are at least `SEARCH_BATCH_SIZE` bytes remaining in `lexer.source`.
|
// there are at least `SEARCH_BATCH_SIZE` bytes remaining in `lexer.source`.
|
||||||
// So calls to `$pos.read()` and `$pos.add(1)` in this loop cannot go out of bounds.
|
// So calls to `$pos.read()` and `$pos.add(1)` in this loop cannot go out of bounds.
|
||||||
let $match_byte = 'inner: loop {
|
'inner: loop {
|
||||||
for _i in 0..crate::lexer::search::SEARCH_BATCH_SIZE {
|
for _i in 0..crate::lexer::search::SEARCH_BATCH_SIZE {
|
||||||
// SAFETY: `$pos` cannot go out of bounds in this loop (see above)
|
// SAFETY: `$pos` cannot go out of bounds in this loop (see above)
|
||||||
let byte = unsafe { $pos.read() };
|
let byte = unsafe { $pos.read() };
|
||||||
|
|
@ -509,57 +508,17 @@ macro_rules! byte_search {
|
||||||
}
|
}
|
||||||
// No match in batch - search next batch
|
// No match in batch - search next batch
|
||||||
continue 'outer;
|
continue 'outer;
|
||||||
};
|
|
||||||
|
|
||||||
// Found match. Check if should continue.
|
|
||||||
{
|
|
||||||
let $continue_byte = $match_byte;
|
|
||||||
if $should_continue {
|
|
||||||
// Not a match after all - continue searching.
|
|
||||||
// SAFETY: `pos` is not at end of source, so safe to advance 1 byte.
|
|
||||||
// See above about UTF-8 character boundaries invariant.
|
|
||||||
$pos = unsafe { $pos.add(1) };
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Advance `lexer.source`'s position up to `$pos`, consuming unmatched bytes.
|
|
||||||
// SAFETY: See above about UTF-8 character boundaries invariant.
|
|
||||||
$lexer.source.set_position($pos);
|
|
||||||
|
|
||||||
let $match_start = $start;
|
|
||||||
return $match_handler;
|
|
||||||
} else {
|
} else {
|
||||||
// Not enough bytes remaining to process as a batch.
|
// Not enough bytes remaining for a batch. Process byte-by-byte.
|
||||||
// This branch marked `#[cold]` as should be very uncommon in normal-length JS files.
|
// Same as above, `'inner: loop {}` is not a real loop here - always exits on first turn.
|
||||||
// Very short JS files will be penalized, but they'll be very fast to parse anyway.
|
let end_addr = $lexer.source.end_addr();
|
||||||
// TODO: Could extend very short files with padding during parser initialization
|
'inner: loop {
|
||||||
// to remove that problem.
|
|
||||||
return crate::lexer::cold_branch(|| {
|
|
||||||
let end_addr = $lexer.source.end_addr();
|
|
||||||
while $pos.addr() < end_addr {
|
while $pos.addr() < end_addr {
|
||||||
// SAFETY: `pos` is not at end of source, so safe to read a byte
|
// SAFETY: `pos` is not at end of source, so safe to read a byte
|
||||||
let $match_byte = unsafe { $pos.read() };
|
let byte = unsafe { $pos.read() };
|
||||||
if $table.matches($match_byte) {
|
if $table.matches(byte) {
|
||||||
// Found match.
|
break 'inner byte;
|
||||||
// Check if should continue.
|
|
||||||
{
|
|
||||||
let $continue_byte = $match_byte;
|
|
||||||
if $should_continue {
|
|
||||||
// Not a match after all - continue searching.
|
|
||||||
// SAFETY: `pos` is not at end of source, so safe to advance 1 byte.
|
|
||||||
// See above about UTF-8 character boundaries invariant.
|
|
||||||
$pos = unsafe { $pos.add(1) };
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Advance `lexer.source`'s position up to `pos`, consuming unmatched bytes.
|
|
||||||
// SAFETY: See above about UTF-8 character boundaries invariant.
|
|
||||||
$lexer.source.set_position($pos);
|
|
||||||
|
|
||||||
let $match_start = $start;
|
|
||||||
return $match_handler;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// No match - continue searching
|
// No match - continue searching
|
||||||
|
|
@ -573,10 +532,29 @@ macro_rules! byte_search {
|
||||||
$lexer.source.set_position($pos);
|
$lexer.source.set_position($pos);
|
||||||
|
|
||||||
let $eof_start = $start;
|
let $eof_start = $start;
|
||||||
$eof_handler
|
return $eof_handler;
|
||||||
});
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Found match. Check if should continue.
|
||||||
|
if $should_continue {
|
||||||
|
// Not a match after all - continue searching.
|
||||||
|
// SAFETY: `pos` is not at end of source, so safe to advance 1 byte.
|
||||||
|
// See above about UTF-8 character boundaries invariant.
|
||||||
|
$pos = unsafe { $pos.add(1) };
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
// Match confirmed
|
||||||
|
break $continue_byte;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Advance `lexer.source`'s position up to `$pos`, consuming unmatched bytes.
|
||||||
|
// SAFETY: See above about UTF-8 character boundaries invariant.
|
||||||
|
$lexer.source.set_position($pos);
|
||||||
|
|
||||||
|
let $match_start = $start;
|
||||||
|
return $match_handler;
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
pub(crate) use byte_search;
|
pub(crate) use byte_search;
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue