From 36c718ee82301bed4af294c21ffd4a039dcdec46 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sun, 21 Jan 2024 14:32:50 +0000 Subject: [PATCH] feat(tasks): benchmarks for lexer (#2101) This PR adds benchmarks for the lexer. I'm doing some work on optimizing the lexer and I thought it'd be useful to see the effects of changes in isolation, separate from the parser. These benchmarks may not be ideal to keep long-term, but for now it'd be useful. In order to do so, it's necessary for `oxc_parser` crate to expose the lexer, but have done that without adding it to the docs, and using an alias `__lexer`. --- crates/oxc_parser/src/lexer/token.rs | 5 ++- crates/oxc_parser/src/lib.rs | 6 ++++ tasks/benchmark/Cargo.toml | 4 +++ tasks/benchmark/benches/lexer.rs | 46 ++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 tasks/benchmark/benches/lexer.rs diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index bd07135c8..5d2f57641 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -20,7 +20,10 @@ pub struct Token { /// True if the identifier / string / template kinds has escaped strings. /// The escaped strings are saved in [Lexer::escaped_strings] and [Lexer::escaped_templates] by - /// [Token::start] + /// [Token::start]. + /// + /// [Lexer::escaped_strings]: [super::Lexer::escaped_strings] + /// [Lexer::escaped_templates]: [super::Lexer::escaped_templates] pub escaped: bool, } diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs index 96308407f..278f00daf 100644 --- a/crates/oxc_parser/src/lib.rs +++ b/crates/oxc_parser/src/lib.rs @@ -84,6 +84,12 @@ use crate::{ state::ParserState, }; +// Expose lexer for benchmarks +#[doc(hidden)] +pub mod __lexer { + pub use super::lexer::{Kind, Lexer, Token}; +} + /// Maximum length of source in bytes which can be parsed (~4 GiB). // Span's start and end are u32s, so size limit is u32::MAX bytes. pub const MAX_LEN: usize = u32::MAX as usize; diff --git a/tasks/benchmark/Cargo.toml b/tasks/benchmark/Cargo.toml index 69f966372..14f83ab1e 100644 --- a/tasks/benchmark/Cargo.toml +++ b/tasks/benchmark/Cargo.toml @@ -43,6 +43,10 @@ harness = false name = "minifier" harness = false +[[bench]] +name = "lexer" +harness = false + [dependencies] oxc_allocator = { workspace = true } oxc_linter = { workspace = true } diff --git a/tasks/benchmark/benches/lexer.rs b/tasks/benchmark/benches/lexer.rs new file mode 100644 index 000000000..4ac5cdce6 --- /dev/null +++ b/tasks/benchmark/benches/lexer.rs @@ -0,0 +1,46 @@ +use oxc_allocator::Allocator; +use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use oxc_parser::__lexer::{Kind, Lexer}; +use oxc_span::SourceType; +use oxc_tasks_common::{TestFile, TestFiles}; + +fn bench_lexer(criterion: &mut Criterion) { + let mut group = criterion.benchmark_group("lexer"); + + // Lexer lacks awareness of JS grammar, so it gets confused by a few things without the parser + // driving it, notably escapes in regexps and template strings. + // So simplify the input for it, by removing backslashes and converting template strings to + // normal string literals. + let files = TestFiles::complicated() + .files() + .iter() + .map(|file| TestFile { + url: file.url.clone(), + file_name: file.file_name.clone(), + source_text: file.source_text.replace('\\', " ").replace('`', "'"), + }) + .collect::>(); + + for file in files { + let source_type = SourceType::from_path(&file.file_name).unwrap(); + group.bench_with_input( + BenchmarkId::from_parameter(&file.file_name), + &file.source_text, + |b, source_text| { + b.iter_with_large_drop(|| { + // Include the allocator drop time to make time measurement consistent. + // Otherwise the allocator will allocate huge memory chunks (by power of two) from the + // system allocator, which makes time measurement unequal during long runs. + let allocator = Allocator::default(); + let mut lexer = Lexer::new(&allocator, source_text, source_type); + while lexer.next_token().kind != Kind::Eof {} + allocator + }); + }, + ); + } + group.finish(); +} + +criterion_group!(lexer, bench_lexer); +criterion_main!(lexer);