refactor(parser): improve lexer pointer maths (#5233)

Small tweaks to pointer maths in lexer which may result in slightly more compact assembly.
This commit is contained in:
overlookmotel 2024-08-26 14:22:36 +00:00
parent 7ccde4b853
commit a3ddfdd9d7
2 changed files with 5 additions and 9 deletions

View file

@ -115,10 +115,7 @@ impl<'a> Lexer<'a> {
cold_branch(|| {
// SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
// So safe to advance `pos` by 1 and read 2 bytes.
let next2 = unsafe {
pos = pos.add(1);
pos.read2()
};
let next2 = unsafe { pos.add(1).read2() };
if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) {
// Irregular line break
self.token.is_on_new_line = true;
@ -127,11 +124,10 @@ impl<'a> Lexer<'a> {
// But irregular line breaks are rare anyway.
}
// Either way, continue searching.
// Skip 3 bytes (skipped 1 byte above, macro skips 1 more, so skip 1 more here
// to make 3), and continue searching.
// Skip 3 bytes (macro skips 1 already, so skip 2 here), and continue searching.
// SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
// so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
pos = unsafe { pos.add(1) };
pos = unsafe { pos.add(2) };
true
})
} else {

View file

@ -40,7 +40,7 @@ impl<'a> Lexer<'a> {
if unsafe { after_dollar.read() } == b'{' {
// Skip `${` and stop searching.
// SAFETY: Consuming `${` leaves `pos` on a UTF-8 char boundary.
pos = unsafe { after_dollar.add(1) };
pos = unsafe { pos.add(2) };
false
} else {
// Not `${`. Continue searching.
@ -212,7 +212,7 @@ impl<'a> Lexer<'a> {
// Skip `${` and stop searching.
// SAFETY: Consuming `${` leaves `pos` on a UTF-8 char boundary.
pos = after_dollar.add(1);
pos = pos.add(2);
false
} else {
// Not `${`. Continue searching.