mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
refactor(parser): small efficiencies in byte_search macro usage (#2554)
A few small efficiencies in usage of `byte_search` macro for lexing comments.
This commit is contained in:
parent
3d354d44a3
commit
c579620701
1 changed files with 17 additions and 11 deletions
|
|
@ -38,7 +38,7 @@ impl<'a> Lexer<'a> {
|
||||||
self.trivia_builder
|
self.trivia_builder
|
||||||
.add_single_line_comment(self.token.start, self.source.offset_of(pos));
|
.add_single_line_comment(self.token.start, self.source.offset_of(pos));
|
||||||
// SAFETY: Safe to consume `\r` or `\n` as both are ASCII
|
// SAFETY: Safe to consume `\r` or `\n` as both are ASCII
|
||||||
unsafe { pos = pos.add(1) };
|
pos = unsafe { pos.add(1) };
|
||||||
// We've found the end. Do not continue searching.
|
// We've found the end. Do not continue searching.
|
||||||
false
|
false
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -48,14 +48,14 @@ impl<'a> Lexer<'a> {
|
||||||
// SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
|
// SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
|
||||||
// So safe to advance `pos` by 1 and read 2 bytes.
|
// So safe to advance `pos` by 1 and read 2 bytes.
|
||||||
let next2 = unsafe { pos.add(1).read2() };
|
let next2 = unsafe { pos.add(1).read2() };
|
||||||
if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
|
if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) {
|
||||||
// Irregular line break
|
// Irregular line break
|
||||||
self.trivia_builder
|
self.trivia_builder
|
||||||
.add_single_line_comment(self.token.start, self.source.offset_of(pos));
|
.add_single_line_comment(self.token.start, self.source.offset_of(pos));
|
||||||
// Advance `pos` to after this char.
|
// Advance `pos` to after this char.
|
||||||
// SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
|
// SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
|
||||||
// so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
|
// so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
|
||||||
unsafe { pos = pos.add(3) };
|
pos = unsafe { pos.add(3) };
|
||||||
// We've found the end. Do not continue searching.
|
// We've found the end. Do not continue searching.
|
||||||
false
|
false
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -63,7 +63,7 @@ impl<'a> Lexer<'a> {
|
||||||
// Skip 3 bytes (macro skips 1 already, so skip 2 here), and continue searching.
|
// Skip 3 bytes (macro skips 1 already, so skip 2 here), and continue searching.
|
||||||
// SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
|
// SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
|
||||||
// so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
|
// so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
|
||||||
unsafe { pos = pos.add(2) };
|
pos = unsafe { pos.add(2) };
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
@ -93,13 +93,15 @@ impl<'a> Lexer<'a> {
|
||||||
continue_if: |next_byte, pos| {
|
continue_if: |next_byte, pos| {
|
||||||
// Match found. Decide whether to continue searching.
|
// Match found. Decide whether to continue searching.
|
||||||
if next_byte == b'*' {
|
if next_byte == b'*' {
|
||||||
if pos.addr() < self.source.end_addr() - 1 {
|
// SAFETY: Next byte is `*` (ASCII) so after it is UTF-8 char boundary
|
||||||
|
let after_star = unsafe { pos.add(1) };
|
||||||
|
if after_star.addr() < self.source.end_addr() {
|
||||||
// If next byte isn't `/`, continue
|
// If next byte isn't `/`, continue
|
||||||
// SAFETY: Have checked there's at least 1 further byte to read
|
// SAFETY: Have checked there's at least 1 further byte to read
|
||||||
if unsafe { pos.add(1).read() } == b'/' {
|
if unsafe { after_star.read() } == b'/' {
|
||||||
// Consume `*/`
|
// Consume `*/`
|
||||||
// SAFETY: Consuming `*/` leaves `pos` on a UTF-8 char boundary
|
// SAFETY: Consuming `*/` leaves `pos` on a UTF-8 char boundary
|
||||||
unsafe { pos = pos.add(2) };
|
pos = unsafe { pos.add(2) };
|
||||||
false
|
false
|
||||||
} else {
|
} else {
|
||||||
true
|
true
|
||||||
|
|
@ -115,8 +117,11 @@ impl<'a> Lexer<'a> {
|
||||||
cold_branch(|| {
|
cold_branch(|| {
|
||||||
// SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
|
// SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
|
||||||
// So safe to advance `pos` by 1 and read 2 bytes.
|
// So safe to advance `pos` by 1 and read 2 bytes.
|
||||||
let next2 = unsafe { pos.add(1).read2() };
|
let next2 = unsafe {
|
||||||
if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
|
pos = pos.add(1);
|
||||||
|
pos.read2()
|
||||||
|
};
|
||||||
|
if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) {
|
||||||
// Irregular line break
|
// Irregular line break
|
||||||
self.token.is_on_new_line = true;
|
self.token.is_on_new_line = true;
|
||||||
// Ideally we'd go on to `skip_multi_line_comment_after_line_break` here
|
// Ideally we'd go on to `skip_multi_line_comment_after_line_break` here
|
||||||
|
|
@ -124,10 +129,11 @@ impl<'a> Lexer<'a> {
|
||||||
// But irregular line breaks are rare anyway.
|
// But irregular line breaks are rare anyway.
|
||||||
}
|
}
|
||||||
// Either way, continue searching.
|
// Either way, continue searching.
|
||||||
// Skip 3 bytes (macro skips 1 already, so skip 2 here), and continue searching.
|
// Skip 3 bytes (skipped 1 byte above, macro skips 1 more, so skip 1 more here
|
||||||
|
// to make 3), and continue searching.
|
||||||
// SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
|
// SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
|
||||||
// so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
|
// so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
|
||||||
unsafe { pos = pos.add(2) };
|
pos = unsafe { pos.add(1) };
|
||||||
true
|
true
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue