mirror of
https://github.com/danbulant/oxc
synced 2026-05-21 13:18:59 +00:00
parser(refactor): promise only one Source on a thread at a time (#2340)
Introduce invariant that only a single `lexer::Source` can exist on a thread at one time. This is a preparatory step for #2341. 2 notes: Restriction is only 1 x `ParserImpl` / `Lexer` / `Source` on 1 *thread* at a time, not globally. So this does not prevent parsing multiple files simultaneously on different threads. Restriction does not apply to public type `Parser`, only `ParserImpl`. `ParserImpl`s are not created in created in `Parser::new`, but instead in `Parser::parse`, where they're created and then immediately consumed. So the end user is also free to create multiple `Parser` instances (if they want to for some reason) on the same thread.
This commit is contained in:
parent
ebc08d4e1e
commit
aef593fb50
7 changed files with 119 additions and 24 deletions
|
|
@ -35,3 +35,7 @@ oxc_ast = { workspace = true, features = ["serde"] }
|
|||
miette = { workspace = true, features = ["fancy-no-backtrace"] }
|
||||
serde_json = { workspace = true }
|
||||
ouroboros = "0.18.3" # for `multi-thread` example
|
||||
|
||||
[features]
|
||||
# Expose Lexer for benchmarks
|
||||
benchmarking = []
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ pub use self::{
|
|||
number::{parse_big_int, parse_float, parse_int},
|
||||
token::Token,
|
||||
};
|
||||
use crate::diagnostics;
|
||||
use crate::{diagnostics, UniquePromise};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct LexerCheckpoint<'a> {
|
||||
|
|
@ -97,8 +97,17 @@ pub struct Lexer<'a> {
|
|||
|
||||
#[allow(clippy::unused_self)]
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self {
|
||||
let source = Source::new(source_text);
|
||||
/// Create new `Lexer`.
|
||||
///
|
||||
/// Requiring a `UniquePromise` to be provided guarantees only 1 `Lexer` can exist
|
||||
/// on a single thread at one time.
|
||||
pub(super) fn new(
|
||||
allocator: &'a Allocator,
|
||||
source_text: &'a str,
|
||||
source_type: SourceType,
|
||||
unique: UniquePromise,
|
||||
) -> Self {
|
||||
let source = Source::new(source_text, unique);
|
||||
|
||||
// The first token is at the start of file, so is allows on a new line
|
||||
let token = Token::new_on_new_line();
|
||||
|
|
@ -116,6 +125,18 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Backdoor to create a `Lexer` without holding a `UniquePromise`, for benchmarks.
|
||||
/// This function must NOT be exposed in public API as it breaks safety invariants.
|
||||
#[cfg(feature = "benchmarking")]
|
||||
pub fn new_for_benchmarks(
|
||||
allocator: &'a Allocator,
|
||||
source_text: &'a str,
|
||||
source_type: SourceType,
|
||||
) -> Self {
|
||||
let unique = UniquePromise::new_for_tests();
|
||||
Self::new(allocator, source_text, source_type, unique)
|
||||
}
|
||||
|
||||
/// Remaining string from `Chars`
|
||||
pub fn remaining(&self) -> &'a str {
|
||||
self.source.remaining()
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#![allow(clippy::unnecessary_safety_comment)]
|
||||
|
||||
use crate::MAX_LEN;
|
||||
use crate::{UniquePromise, MAX_LEN};
|
||||
|
||||
use std::{marker::PhantomData, slice, str};
|
||||
|
||||
|
|
@ -72,7 +72,11 @@ pub(super) struct Source<'a> {
|
|||
|
||||
impl<'a> Source<'a> {
|
||||
/// Create `Source` from `&str`.
|
||||
pub(super) fn new(mut source_text: &'a str) -> Self {
|
||||
///
|
||||
/// Requiring a `UniquePromise` to be provided guarantees only 1 `Source` can exist
|
||||
/// on a single thread at one time.
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
pub(super) fn new(mut source_text: &'a str, _unique: UniquePromise) -> Self {
|
||||
// If source text exceeds size limit, substitute a short source text which will fail to parse.
|
||||
// `Parser::parse` will convert error to `diagnostics::OverlongSource`.
|
||||
if source_text.len() > MAX_LEN {
|
||||
|
|
|
|||
|
|
@ -71,7 +71,13 @@ mod jsx;
|
|||
mod ts;
|
||||
|
||||
mod diagnostics;
|
||||
|
||||
// Expose lexer only in benchmarks
|
||||
#[cfg(not(feature = "benchmarking"))]
|
||||
mod lexer;
|
||||
#[cfg(feature = "benchmarking")]
|
||||
#[doc(hidden)]
|
||||
pub mod lexer;
|
||||
|
||||
use context::{Context, StatementContext};
|
||||
use oxc_allocator::Allocator;
|
||||
|
|
@ -84,12 +90,6 @@ use crate::{
|
|||
state::ParserState,
|
||||
};
|
||||
|
||||
// Expose lexer for benchmarks
|
||||
#[doc(hidden)]
|
||||
pub mod __lexer {
|
||||
pub use super::lexer::{Kind, Lexer, Token};
|
||||
}
|
||||
|
||||
/// Maximum length of source which can be parsed (in bytes).
|
||||
/// ~4 GiB on 64-bit systems, ~2 GiB on 32-bit systems.
|
||||
// Length is constrained by 2 factors:
|
||||
|
|
@ -165,17 +165,63 @@ impl<'a> Parser<'a> {
|
|||
self.options.preserve_parens = allow;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Main entry point
|
||||
mod parser_parse {
|
||||
use super::*;
|
||||
|
||||
/// `UniquePromise` is a way to use the type system to enforce the invariant that only
|
||||
/// a single `ParserImpl`, `Lexer` and `lexer::Source` can exist at any time on a thread.
|
||||
/// This constraint is required to guarantee the soundness of some methods of these types
|
||||
/// e.g. `Source::set_position`.
|
||||
///
|
||||
/// Returns an empty `Program` on unrecoverable error,
|
||||
/// Recoverable errors are stored inside `errors`.
|
||||
pub fn parse(self) -> ParserReturn<'a> {
|
||||
let parser =
|
||||
ParserImpl::new(self.allocator, self.source_text, self.source_type, self.options);
|
||||
parser.parse()
|
||||
/// `ParserImpl::new`, `Lexer::new` and `lexer::Source::new` all require a `UniquePromise`
|
||||
/// to be provided to them. `UniquePromise::new` is not visible outside this module, so only
|
||||
/// `Parser::parse` can create one, and it only calls `ParserImpl::new` once.
|
||||
/// This enforces the invariant throughout the entire parser.
|
||||
///
|
||||
/// `UniquePromise` is a zero-sized type and has no runtime cost. It's purely for the type-checker.
|
||||
///
|
||||
/// `UniquePromise::new_for_tests` is a backdoor for unit tests and benchmarks, so they can create a
|
||||
/// `ParserImpl` or `Lexer`, and manipulate it directly, for testing/benchmarking purposes.
|
||||
pub(crate) struct UniquePromise {
|
||||
_dummy: (),
|
||||
}
|
||||
|
||||
impl UniquePromise {
|
||||
#[inline]
|
||||
fn new() -> Self {
|
||||
Self { _dummy: () }
|
||||
}
|
||||
|
||||
/// Backdoor for tests/benchmarks to create a `UniquePromise` (see above).
|
||||
/// This function must NOT be exposed outside of tests and benchmarks,
|
||||
/// as it allows circumventing safety invariants of the parser.
|
||||
#[cfg(any(test, feature = "benchmarking"))]
|
||||
pub fn new_for_tests() -> Self {
|
||||
Self { _dummy: () }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
/// Main entry point
|
||||
///
|
||||
/// Returns an empty `Program` on unrecoverable error,
|
||||
/// Recoverable errors are stored inside `errors`.
|
||||
pub fn parse(self) -> ParserReturn<'a> {
|
||||
let unique = UniquePromise::new();
|
||||
let parser = ParserImpl::new(
|
||||
self.allocator,
|
||||
self.source_text,
|
||||
self.source_type,
|
||||
self.options,
|
||||
unique,
|
||||
);
|
||||
parser.parse()
|
||||
}
|
||||
}
|
||||
}
|
||||
use parser_parse::UniquePromise;
|
||||
|
||||
/// Implementation of parser.
|
||||
/// `Parser` is just a public wrapper, the guts of the implementation is in this type.
|
||||
|
|
@ -213,15 +259,20 @@ struct ParserImpl<'a> {
|
|||
}
|
||||
|
||||
impl<'a> ParserImpl<'a> {
|
||||
/// Create a new parser
|
||||
/// Create a new `ParserImpl`.
|
||||
///
|
||||
/// Requiring a `UniquePromise` to be provided guarantees only 1 `ParserImpl` can exist
|
||||
/// on a single thread at one time.
|
||||
#[inline]
|
||||
pub fn new(
|
||||
allocator: &'a Allocator,
|
||||
source_text: &'a str,
|
||||
source_type: SourceType,
|
||||
options: ParserOptions,
|
||||
unique: UniquePromise,
|
||||
) -> Self {
|
||||
Self {
|
||||
lexer: Lexer::new(allocator, source_text, source_type),
|
||||
lexer: Lexer::new(allocator, source_text, source_type, unique),
|
||||
source_type,
|
||||
source_text,
|
||||
errors: vec![],
|
||||
|
|
@ -234,10 +285,24 @@ impl<'a> ParserImpl<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Backdoor to create a `ParserImpl` without holding a `UniquePromise`, for unit tests.
|
||||
/// This function must NOT be exposed in public API as it breaks safety invariants.
|
||||
#[cfg(test)]
|
||||
fn new_for_tests(
|
||||
allocator: &'a Allocator,
|
||||
source_text: &'a str,
|
||||
source_type: SourceType,
|
||||
options: ParserOptions,
|
||||
) -> Self {
|
||||
let unique = UniquePromise::new_for_tests();
|
||||
Self::new(allocator, source_text, source_type, options, unique)
|
||||
}
|
||||
|
||||
/// Main entry point
|
||||
///
|
||||
/// Returns an empty `Program` on unrecoverable error,
|
||||
/// Recoverable errors are stored inside `errors`.
|
||||
#[inline]
|
||||
pub fn parse(mut self) -> ParserReturn<'a> {
|
||||
let (program, panicked) = match self.parse_program() {
|
||||
Ok(program) => (program, false),
|
||||
|
|
|
|||
|
|
@ -85,7 +85,8 @@ mod test_is_declaration {
|
|||
fn run_check(source: &str, expected: bool) {
|
||||
let alloc = Allocator::default();
|
||||
let source_type = SourceType::default().with_typescript(true);
|
||||
let mut parser = ParserImpl::new(&alloc, source, source_type, ParserOptions::default());
|
||||
let mut parser =
|
||||
ParserImpl::new_for_tests(&alloc, source, source_type, ParserOptions::default());
|
||||
// Get the parser to the first token.
|
||||
parser.bump_any();
|
||||
assert_eq!(expected, parser.at_start_of_ts_declaration());
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ harness = false
|
|||
oxc_allocator = { workspace = true }
|
||||
oxc_linter = { workspace = true }
|
||||
oxc_minifier = { workspace = true }
|
||||
oxc_parser = { workspace = true }
|
||||
oxc_parser = { workspace = true, features = ["benchmarking"] }
|
||||
oxc_prettier = { workspace = true }
|
||||
oxc_semantic = { workspace = true }
|
||||
oxc_span = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use oxc_allocator::Allocator;
|
||||
use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use oxc_parser::__lexer::{Kind, Lexer};
|
||||
use oxc_parser::lexer::{Kind, Lexer};
|
||||
use oxc_span::SourceType;
|
||||
use oxc_tasks_common::{TestFile, TestFiles};
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ fn bench_lexer(criterion: &mut Criterion) {
|
|||
// Otherwise the allocator will allocate huge memory chunks (by power of two) from the
|
||||
// system allocator, which makes time measurement unequal during long runs.
|
||||
let allocator = Allocator::default();
|
||||
let mut lexer = Lexer::new(&allocator, source_text, source_type);
|
||||
let mut lexer = Lexer::new_for_benchmarks(&allocator, source_text, source_type);
|
||||
while lexer.next_token().kind != Kind::Eof {}
|
||||
allocator
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in a new issue