refactor(parser): small efficiencies in byte_search macro usage (#2554)

A few small efficiencies in usage of `byte_search` macro for lexing comments.
2026-05-24 12:21:58 +00:00 · 2024-03-01 13:23:34 +00:00 · 2024-03-01 13:23:34 +00:00 · c579620701
commit c579620701
parent 3d354d44a3
1 changed files with 17 additions and 11 deletions
--- a/crates/oxc_parser/src/lexer/comment.rs
+++ b/crates/oxc_parser/src/lexer/comment.rs
@ -38,7 +38,7 @@ impl<'a> Lexer<'a> {
                    self.trivia_builder
                        .add_single_line_comment(self.token.start, self.source.offset_of(pos));
                    // SAFETY: Safe to consume `\r` or `\n` as both are ASCII
-                    unsafe { pos = pos.add(1) };
+                    pos = unsafe { pos.add(1) };
                    // We've found the end. Do not continue searching.
                    false
                } else {
@ -48,14 +48,14 @@ impl<'a> Lexer<'a> {
                        // SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
                        // So safe to advance `pos` by 1 and read 2 bytes.
                        let next2 = unsafe { pos.add(1).read2() };
-                        if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
+                        if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) {
                            // Irregular line break
                            self.trivia_builder
                                .add_single_line_comment(self.token.start, self.source.offset_of(pos));
                            // Advance `pos` to after this char.
                            // SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
                            // so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
-                            unsafe { pos = pos.add(3) };
+                            pos = unsafe { pos.add(3) };
                            // We've found the end. Do not continue searching.
                            false
                        } else {
@ -63,7 +63,7 @@ impl<'a> Lexer<'a> {
                            // Skip 3 bytes (macro skips 1 already, so skip 2 here), and continue searching.
                            // SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
                            // so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
-                            unsafe { pos = pos.add(2) };
+                            pos = unsafe { pos.add(2) };
                            true
                        }
                    })
@ -93,13 +93,15 @@ impl<'a> Lexer<'a> {
            continue_if: |next_byte, pos| {
                // Match found. Decide whether to continue searching.
                if next_byte == b'*' {
-                    if pos.addr() < self.source.end_addr() - 1 {
+                    // SAFETY: Next byte is `*` (ASCII) so after it is UTF-8 char boundary
                    let after_star = unsafe { pos.add(1) };
                    if after_star.addr() < self.source.end_addr() {
                        // If next byte isn't `/`, continue
                        // SAFETY: Have checked there's at least 1 further byte to read
-                        if unsafe { pos.add(1).read() } == b'/' {
+                        if unsafe { after_star.read() } == b'/' {
                            // Consume `*/`
                            // SAFETY: Consuming `*/` leaves `pos` on a UTF-8 char boundary
-                            unsafe { pos = pos.add(2) };
+                            pos = unsafe { pos.add(2) };
                            false
                        } else {
                            true
@ -115,8 +117,11 @@ impl<'a> Lexer<'a> {
                    cold_branch(|| {
                        // SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
                        // So safe to advance `pos` by 1 and read 2 bytes.
-                        let next2 = unsafe { pos.add(1).read2() };
+                        let next2 = unsafe {
-                        if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
+                            pos = pos.add(1);
                            pos.read2()
                        };
                        if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) {
                            // Irregular line break
                            self.token.is_on_new_line = true;
                            // Ideally we'd go on to `skip_multi_line_comment_after_line_break` here
@ -124,10 +129,11 @@ impl<'a> Lexer<'a> {
                            // But irregular line breaks are rare anyway.
                        }
                        // Either way, continue searching.
-                        // Skip 3 bytes (macro skips 1 already, so skip 2 here), and continue searching.
+                        // Skip 3 bytes (skipped 1 byte above, macro skips 1 more, so skip 1 more here
                        // to make 3), and continue searching.
                        // SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
                        // so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
-                        unsafe { pos = pos.add(2) };
+                        pos = unsafe { pos.add(1) };
                        true
                    })
                } else {