feat(tasks): benchmarks for lexer (#2101)

This PR adds benchmarks for the lexer. I'm doing some work on optimizing the lexer and I thought it'd be useful to see the effects of changes in isolation, separate from the parser. These benchmarks may not be ideal to keep long-term, but for now it'd be useful. In order to do so, it's necessary for `oxc_parser` crate to expose the lexer, but have done that without adding it to the docs, and using an alias `__lexer`.
2026-05-25 12:51:57 +00:00 · 2024-01-21 14:32:50 +00:00 · 2024-01-21 14:32:50 +00:00 · 36c718ee82
commit 36c718ee82
parent 16b32616c4
4 changed files with 60 additions and 1 deletions
--- a/crates/oxc_parser/src/lexer/token.rs
+++ b/crates/oxc_parser/src/lexer/token.rs
@ -20,7 +20,10 @@ pub struct Token {
    /// True if the identifier / string / template kinds has escaped strings.
    /// The escaped strings are saved in [Lexer::escaped_strings] and [Lexer::escaped_templates] by
-    /// [Token::start]
+    /// [Token::start].
    ///
    /// [Lexer::escaped_strings]: [super::Lexer::escaped_strings]
    /// [Lexer::escaped_templates]: [super::Lexer::escaped_templates]
    pub escaped: bool,
 }
--- a/crates/oxc_parser/src/lib.rs
+++ b/crates/oxc_parser/src/lib.rs
@ -84,6 +84,12 @@ use crate::{
    state::ParserState,
 };
 // Expose lexer for benchmarks
 #[doc(hidden)]
 pub mod __lexer {
    pub use super::lexer::{Kind, Lexer, Token};
 }
 /// Maximum length of source in bytes which can be parsed (~4 GiB).
 // Span's start and end are u32s, so size limit is u32::MAX bytes.
 pub const MAX_LEN: usize = u32::MAX as usize;
--- a/tasks/benchmark/Cargo.toml
+++ b/tasks/benchmark/Cargo.toml
@ -43,6 +43,10 @@ harness = false
 name    = "minifier"
 harness = false
 [[bench]]
 name    = "lexer"
 harness = false
 [dependencies]
 oxc_allocator    = { workspace = true }
 oxc_linter       = { workspace = true }
--- a/tasks/benchmark/benches/lexer.rs
+++ b/tasks/benchmark/benches/lexer.rs
@ -0,0 +1,46 @@
 use oxc_allocator::Allocator;
 use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion};
 use oxc_parser::__lexer::{Kind, Lexer};
 use oxc_span::SourceType;
 use oxc_tasks_common::{TestFile, TestFiles};
 fn bench_lexer(criterion: &mut Criterion) {
    let mut group = criterion.benchmark_group("lexer");
    // Lexer lacks awareness of JS grammar, so it gets confused by a few things without the parser
    // driving it, notably escapes in regexps and template strings.
    // So simplify the input for it, by removing backslashes and converting template strings to
    // normal string literals.
    let files = TestFiles::complicated()
        .files()
        .iter()
        .map(|file| TestFile {
            url: file.url.clone(),
            file_name: file.file_name.clone(),
            source_text: file.source_text.replace('\\', " ").replace('`', "'"),
        })
        .collect::<Vec<_>>();
    for file in files {
        let source_type = SourceType::from_path(&file.file_name).unwrap();
        group.bench_with_input(
            BenchmarkId::from_parameter(&file.file_name),
            &file.source_text,
            |b, source_text| {
                b.iter_with_large_drop(|| {
                    // Include the allocator drop time to make time measurement consistent.
                    // Otherwise the allocator will allocate huge memory chunks (by power of two) from the
                    // system allocator, which makes time measurement unequal during long runs.
                    let allocator = Allocator::default();
                    let mut lexer = Lexer::new(&allocator, source_text, source_type);
                    while lexer.next_token().kind != Kind::Eof {}
                    allocator
                });
            },
        );
    }
    group.finish();
 }
 criterion_group!(lexer, bench_lexer);
 criterion_main!(lexer);