From 20679d1e1e91104282f71e04ebcb23134e70c594 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Tue, 30 Jan 2024 03:47:26 +0000 Subject: [PATCH] perf(parser): pad `Token` to 16 bytes (#2211) Counter-intuitively, it seems that *increasing* the size of `Token` improves performance slightly. This appears to be because when `Token` is 16 bytes, copying `Token` is a single 16-byte load/store. At present, it's 12 bytes which requires an 8-byte load/store + a 4-byte load/store. https://godbolt.org/z/KPYsn3ab7 This suggests that either: 1. #2010 could be reverted at no cost, and the overhead of the hash table removed. or: 2. We need to get `Token` down to 8 bytes! I have an idea how to *maybe* do (2), so I'd suggest leaving it as is for now until I've been able to research that. NB I also tried putting `#[repr(align(16))]` on `Token` so that copying uses aligned loads/stores. That [hurt the benchmarks very slightly](https://codspeed.io/overlookmotel/oxc/branches/lexer-pad-token), though it might produce a gain on architectures where unaligned loads are more expensive (ARM64 I think?). But I can't test that theory, so have left it out. --- crates/oxc_parser/src/lexer/mod.rs | 7 ++----- crates/oxc_parser/src/lexer/token.rs | 12 +++++++++++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 0589f46a8..b275d657b 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -83,11 +83,8 @@ impl<'a> Lexer<'a> { source = "\0"; } - let token = Token { - // the first token is at the start of file, so is allows on a new line - is_on_new_line: true, - ..Token::default() - }; + // The first token is at the start of file, so is allows on a new line + let token = Token::new_on_new_line(); let current = LexerCheckpoint { chars: source.chars(), token, errors_pos: 0 }; Self { allocator, diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index 5d2f57641..122c5080b 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -25,14 +25,24 @@ pub struct Token { /// [Lexer::escaped_strings]: [super::Lexer::escaped_strings] /// [Lexer::escaped_templates]: [super::Lexer::escaped_templates] pub escaped: bool, + + // Padding to fill to 16 bytes. + // This makes copying a `Token` 1 x xmmword load & store, rather than 1 x dword + 1 x qword + // and `Token::default()` is 1 x xmmword store, rather than 1 x dword + 1 x qword. + _padding1: u8, + _padding2: u32, } #[cfg(target_pointer_width = "64")] mod size_asserts { - oxc_index::assert_eq_size!(super::Token, [u8; 12]); + oxc_index::assert_eq_size!(super::Token, [u8; 16]); } impl Token { + pub(super) fn new_on_new_line() -> Self { + Self { is_on_new_line: true, ..Self::default() } + } + pub fn span(&self) -> Span { Span::new(self.start, self.end) }