mirror of
https://github.com/danbulant/oxc
synced 2026-05-25 12:51:57 +00:00
feat(tasks): benchmarks for lexer (#2101)
This PR adds benchmarks for the lexer. I'm doing some work on optimizing the lexer and I thought it'd be useful to see the effects of changes in isolation, separate from the parser. These benchmarks may not be ideal to keep long-term, but for now it'd be useful. In order to do so, it's necessary for `oxc_parser` crate to expose the lexer, but have done that without adding it to the docs, and using an alias `__lexer`.
This commit is contained in:
parent
16b32616c4
commit
36c718ee82
4 changed files with 60 additions and 1 deletions
|
|
@ -20,7 +20,10 @@ pub struct Token {
|
||||||
|
|
||||||
/// True if the identifier / string / template kinds has escaped strings.
|
/// True if the identifier / string / template kinds has escaped strings.
|
||||||
/// The escaped strings are saved in [Lexer::escaped_strings] and [Lexer::escaped_templates] by
|
/// The escaped strings are saved in [Lexer::escaped_strings] and [Lexer::escaped_templates] by
|
||||||
/// [Token::start]
|
/// [Token::start].
|
||||||
|
///
|
||||||
|
/// [Lexer::escaped_strings]: [super::Lexer::escaped_strings]
|
||||||
|
/// [Lexer::escaped_templates]: [super::Lexer::escaped_templates]
|
||||||
pub escaped: bool,
|
pub escaped: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,12 @@ use crate::{
|
||||||
state::ParserState,
|
state::ParserState,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Expose lexer for benchmarks
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub mod __lexer {
|
||||||
|
pub use super::lexer::{Kind, Lexer, Token};
|
||||||
|
}
|
||||||
|
|
||||||
/// Maximum length of source in bytes which can be parsed (~4 GiB).
|
/// Maximum length of source in bytes which can be parsed (~4 GiB).
|
||||||
// Span's start and end are u32s, so size limit is u32::MAX bytes.
|
// Span's start and end are u32s, so size limit is u32::MAX bytes.
|
||||||
pub const MAX_LEN: usize = u32::MAX as usize;
|
pub const MAX_LEN: usize = u32::MAX as usize;
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,10 @@ harness = false
|
||||||
name = "minifier"
|
name = "minifier"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "lexer"
|
||||||
|
harness = false
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
oxc_allocator = { workspace = true }
|
oxc_allocator = { workspace = true }
|
||||||
oxc_linter = { workspace = true }
|
oxc_linter = { workspace = true }
|
||||||
|
|
|
||||||
46
tasks/benchmark/benches/lexer.rs
Normal file
46
tasks/benchmark/benches/lexer.rs
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
use oxc_allocator::Allocator;
|
||||||
|
use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||||
|
use oxc_parser::__lexer::{Kind, Lexer};
|
||||||
|
use oxc_span::SourceType;
|
||||||
|
use oxc_tasks_common::{TestFile, TestFiles};
|
||||||
|
|
||||||
|
fn bench_lexer(criterion: &mut Criterion) {
|
||||||
|
let mut group = criterion.benchmark_group("lexer");
|
||||||
|
|
||||||
|
// Lexer lacks awareness of JS grammar, so it gets confused by a few things without the parser
|
||||||
|
// driving it, notably escapes in regexps and template strings.
|
||||||
|
// So simplify the input for it, by removing backslashes and converting template strings to
|
||||||
|
// normal string literals.
|
||||||
|
let files = TestFiles::complicated()
|
||||||
|
.files()
|
||||||
|
.iter()
|
||||||
|
.map(|file| TestFile {
|
||||||
|
url: file.url.clone(),
|
||||||
|
file_name: file.file_name.clone(),
|
||||||
|
source_text: file.source_text.replace('\\', " ").replace('`', "'"),
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
for file in files {
|
||||||
|
let source_type = SourceType::from_path(&file.file_name).unwrap();
|
||||||
|
group.bench_with_input(
|
||||||
|
BenchmarkId::from_parameter(&file.file_name),
|
||||||
|
&file.source_text,
|
||||||
|
|b, source_text| {
|
||||||
|
b.iter_with_large_drop(|| {
|
||||||
|
// Include the allocator drop time to make time measurement consistent.
|
||||||
|
// Otherwise the allocator will allocate huge memory chunks (by power of two) from the
|
||||||
|
// system allocator, which makes time measurement unequal during long runs.
|
||||||
|
let allocator = Allocator::default();
|
||||||
|
let mut lexer = Lexer::new(&allocator, source_text, source_type);
|
||||||
|
while lexer.next_token().kind != Kind::Eof {}
|
||||||
|
allocator
|
||||||
|
});
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
group.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(lexer, bench_lexer);
|
||||||
|
criterion_main!(lexer);
|
||||||
Loading…
Reference in a new issue