mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
parser(refactor): promise only one Source on a thread at a time (#2340)
Introduce invariant that only a single `lexer::Source` can exist on a thread at one time. This is a preparatory step for #2341. 2 notes: Restriction is only 1 x `ParserImpl` / `Lexer` / `Source` on 1 *thread* at a time, not globally. So this does not prevent parsing multiple files simultaneously on different threads. Restriction does not apply to public type `Parser`, only `ParserImpl`. `ParserImpl`s are not created in created in `Parser::new`, but instead in `Parser::parse`, where they're created and then immediately consumed. So the end user is also free to create multiple `Parser` instances (if they want to for some reason) on the same thread.
This commit is contained in:
parent
ebc08d4e1e
commit
aef593fb50
7 changed files with 119 additions and 24 deletions
|
|
@ -35,3 +35,7 @@ oxc_ast = { workspace = true, features = ["serde"] }
|
||||||
miette = { workspace = true, features = ["fancy-no-backtrace"] }
|
miette = { workspace = true, features = ["fancy-no-backtrace"] }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
ouroboros = "0.18.3" # for `multi-thread` example
|
ouroboros = "0.18.3" # for `multi-thread` example
|
||||||
|
|
||||||
|
[features]
|
||||||
|
# Expose Lexer for benchmarks
|
||||||
|
benchmarking = []
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ pub use self::{
|
||||||
number::{parse_big_int, parse_float, parse_int},
|
number::{parse_big_int, parse_float, parse_int},
|
||||||
token::Token,
|
token::Token,
|
||||||
};
|
};
|
||||||
use crate::diagnostics;
|
use crate::{diagnostics, UniquePromise};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub struct LexerCheckpoint<'a> {
|
pub struct LexerCheckpoint<'a> {
|
||||||
|
|
@ -97,8 +97,17 @@ pub struct Lexer<'a> {
|
||||||
|
|
||||||
#[allow(clippy::unused_self)]
|
#[allow(clippy::unused_self)]
|
||||||
impl<'a> Lexer<'a> {
|
impl<'a> Lexer<'a> {
|
||||||
pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self {
|
/// Create new `Lexer`.
|
||||||
let source = Source::new(source_text);
|
///
|
||||||
|
/// Requiring a `UniquePromise` to be provided guarantees only 1 `Lexer` can exist
|
||||||
|
/// on a single thread at one time.
|
||||||
|
pub(super) fn new(
|
||||||
|
allocator: &'a Allocator,
|
||||||
|
source_text: &'a str,
|
||||||
|
source_type: SourceType,
|
||||||
|
unique: UniquePromise,
|
||||||
|
) -> Self {
|
||||||
|
let source = Source::new(source_text, unique);
|
||||||
|
|
||||||
// The first token is at the start of file, so is allows on a new line
|
// The first token is at the start of file, so is allows on a new line
|
||||||
let token = Token::new_on_new_line();
|
let token = Token::new_on_new_line();
|
||||||
|
|
@ -116,6 +125,18 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Backdoor to create a `Lexer` without holding a `UniquePromise`, for benchmarks.
|
||||||
|
/// This function must NOT be exposed in public API as it breaks safety invariants.
|
||||||
|
#[cfg(feature = "benchmarking")]
|
||||||
|
pub fn new_for_benchmarks(
|
||||||
|
allocator: &'a Allocator,
|
||||||
|
source_text: &'a str,
|
||||||
|
source_type: SourceType,
|
||||||
|
) -> Self {
|
||||||
|
let unique = UniquePromise::new_for_tests();
|
||||||
|
Self::new(allocator, source_text, source_type, unique)
|
||||||
|
}
|
||||||
|
|
||||||
/// Remaining string from `Chars`
|
/// Remaining string from `Chars`
|
||||||
pub fn remaining(&self) -> &'a str {
|
pub fn remaining(&self) -> &'a str {
|
||||||
self.source.remaining()
|
self.source.remaining()
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
#![allow(clippy::unnecessary_safety_comment)]
|
#![allow(clippy::unnecessary_safety_comment)]
|
||||||
|
|
||||||
use crate::MAX_LEN;
|
use crate::{UniquePromise, MAX_LEN};
|
||||||
|
|
||||||
use std::{marker::PhantomData, slice, str};
|
use std::{marker::PhantomData, slice, str};
|
||||||
|
|
||||||
|
|
@ -72,7 +72,11 @@ pub(super) struct Source<'a> {
|
||||||
|
|
||||||
impl<'a> Source<'a> {
|
impl<'a> Source<'a> {
|
||||||
/// Create `Source` from `&str`.
|
/// Create `Source` from `&str`.
|
||||||
pub(super) fn new(mut source_text: &'a str) -> Self {
|
///
|
||||||
|
/// Requiring a `UniquePromise` to be provided guarantees only 1 `Source` can exist
|
||||||
|
/// on a single thread at one time.
|
||||||
|
#[allow(clippy::needless_pass_by_value)]
|
||||||
|
pub(super) fn new(mut source_text: &'a str, _unique: UniquePromise) -> Self {
|
||||||
// If source text exceeds size limit, substitute a short source text which will fail to parse.
|
// If source text exceeds size limit, substitute a short source text which will fail to parse.
|
||||||
// `Parser::parse` will convert error to `diagnostics::OverlongSource`.
|
// `Parser::parse` will convert error to `diagnostics::OverlongSource`.
|
||||||
if source_text.len() > MAX_LEN {
|
if source_text.len() > MAX_LEN {
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,13 @@ mod jsx;
|
||||||
mod ts;
|
mod ts;
|
||||||
|
|
||||||
mod diagnostics;
|
mod diagnostics;
|
||||||
|
|
||||||
|
// Expose lexer only in benchmarks
|
||||||
|
#[cfg(not(feature = "benchmarking"))]
|
||||||
mod lexer;
|
mod lexer;
|
||||||
|
#[cfg(feature = "benchmarking")]
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub mod lexer;
|
||||||
|
|
||||||
use context::{Context, StatementContext};
|
use context::{Context, StatementContext};
|
||||||
use oxc_allocator::Allocator;
|
use oxc_allocator::Allocator;
|
||||||
|
|
@ -84,12 +90,6 @@ use crate::{
|
||||||
state::ParserState,
|
state::ParserState,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Expose lexer for benchmarks
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub mod __lexer {
|
|
||||||
pub use super::lexer::{Kind, Lexer, Token};
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Maximum length of source which can be parsed (in bytes).
|
/// Maximum length of source which can be parsed (in bytes).
|
||||||
/// ~4 GiB on 64-bit systems, ~2 GiB on 32-bit systems.
|
/// ~4 GiB on 64-bit systems, ~2 GiB on 32-bit systems.
|
||||||
// Length is constrained by 2 factors:
|
// Length is constrained by 2 factors:
|
||||||
|
|
@ -165,17 +165,63 @@ impl<'a> Parser<'a> {
|
||||||
self.options.preserve_parens = allow;
|
self.options.preserve_parens = allow;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mod parser_parse {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// `UniquePromise` is a way to use the type system to enforce the invariant that only
|
||||||
|
/// a single `ParserImpl`, `Lexer` and `lexer::Source` can exist at any time on a thread.
|
||||||
|
/// This constraint is required to guarantee the soundness of some methods of these types
|
||||||
|
/// e.g. `Source::set_position`.
|
||||||
|
///
|
||||||
|
/// `ParserImpl::new`, `Lexer::new` and `lexer::Source::new` all require a `UniquePromise`
|
||||||
|
/// to be provided to them. `UniquePromise::new` is not visible outside this module, so only
|
||||||
|
/// `Parser::parse` can create one, and it only calls `ParserImpl::new` once.
|
||||||
|
/// This enforces the invariant throughout the entire parser.
|
||||||
|
///
|
||||||
|
/// `UniquePromise` is a zero-sized type and has no runtime cost. It's purely for the type-checker.
|
||||||
|
///
|
||||||
|
/// `UniquePromise::new_for_tests` is a backdoor for unit tests and benchmarks, so they can create a
|
||||||
|
/// `ParserImpl` or `Lexer`, and manipulate it directly, for testing/benchmarking purposes.
|
||||||
|
pub(crate) struct UniquePromise {
|
||||||
|
_dummy: (),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UniquePromise {
|
||||||
|
#[inline]
|
||||||
|
fn new() -> Self {
|
||||||
|
Self { _dummy: () }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Backdoor for tests/benchmarks to create a `UniquePromise` (see above).
|
||||||
|
/// This function must NOT be exposed outside of tests and benchmarks,
|
||||||
|
/// as it allows circumventing safety invariants of the parser.
|
||||||
|
#[cfg(any(test, feature = "benchmarking"))]
|
||||||
|
pub fn new_for_tests() -> Self {
|
||||||
|
Self { _dummy: () }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Parser<'a> {
|
||||||
/// Main entry point
|
/// Main entry point
|
||||||
///
|
///
|
||||||
/// Returns an empty `Program` on unrecoverable error,
|
/// Returns an empty `Program` on unrecoverable error,
|
||||||
/// Recoverable errors are stored inside `errors`.
|
/// Recoverable errors are stored inside `errors`.
|
||||||
pub fn parse(self) -> ParserReturn<'a> {
|
pub fn parse(self) -> ParserReturn<'a> {
|
||||||
let parser =
|
let unique = UniquePromise::new();
|
||||||
ParserImpl::new(self.allocator, self.source_text, self.source_type, self.options);
|
let parser = ParserImpl::new(
|
||||||
|
self.allocator,
|
||||||
|
self.source_text,
|
||||||
|
self.source_type,
|
||||||
|
self.options,
|
||||||
|
unique,
|
||||||
|
);
|
||||||
parser.parse()
|
parser.parse()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
use parser_parse::UniquePromise;
|
||||||
|
|
||||||
/// Implementation of parser.
|
/// Implementation of parser.
|
||||||
/// `Parser` is just a public wrapper, the guts of the implementation is in this type.
|
/// `Parser` is just a public wrapper, the guts of the implementation is in this type.
|
||||||
|
|
@ -213,15 +259,20 @@ struct ParserImpl<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ParserImpl<'a> {
|
impl<'a> ParserImpl<'a> {
|
||||||
/// Create a new parser
|
/// Create a new `ParserImpl`.
|
||||||
|
///
|
||||||
|
/// Requiring a `UniquePromise` to be provided guarantees only 1 `ParserImpl` can exist
|
||||||
|
/// on a single thread at one time.
|
||||||
|
#[inline]
|
||||||
pub fn new(
|
pub fn new(
|
||||||
allocator: &'a Allocator,
|
allocator: &'a Allocator,
|
||||||
source_text: &'a str,
|
source_text: &'a str,
|
||||||
source_type: SourceType,
|
source_type: SourceType,
|
||||||
options: ParserOptions,
|
options: ParserOptions,
|
||||||
|
unique: UniquePromise,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
lexer: Lexer::new(allocator, source_text, source_type),
|
lexer: Lexer::new(allocator, source_text, source_type, unique),
|
||||||
source_type,
|
source_type,
|
||||||
source_text,
|
source_text,
|
||||||
errors: vec![],
|
errors: vec![],
|
||||||
|
|
@ -234,10 +285,24 @@ impl<'a> ParserImpl<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Backdoor to create a `ParserImpl` without holding a `UniquePromise`, for unit tests.
|
||||||
|
/// This function must NOT be exposed in public API as it breaks safety invariants.
|
||||||
|
#[cfg(test)]
|
||||||
|
fn new_for_tests(
|
||||||
|
allocator: &'a Allocator,
|
||||||
|
source_text: &'a str,
|
||||||
|
source_type: SourceType,
|
||||||
|
options: ParserOptions,
|
||||||
|
) -> Self {
|
||||||
|
let unique = UniquePromise::new_for_tests();
|
||||||
|
Self::new(allocator, source_text, source_type, options, unique)
|
||||||
|
}
|
||||||
|
|
||||||
/// Main entry point
|
/// Main entry point
|
||||||
///
|
///
|
||||||
/// Returns an empty `Program` on unrecoverable error,
|
/// Returns an empty `Program` on unrecoverable error,
|
||||||
/// Recoverable errors are stored inside `errors`.
|
/// Recoverable errors are stored inside `errors`.
|
||||||
|
#[inline]
|
||||||
pub fn parse(mut self) -> ParserReturn<'a> {
|
pub fn parse(mut self) -> ParserReturn<'a> {
|
||||||
let (program, panicked) = match self.parse_program() {
|
let (program, panicked) = match self.parse_program() {
|
||||||
Ok(program) => (program, false),
|
Ok(program) => (program, false),
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,8 @@ mod test_is_declaration {
|
||||||
fn run_check(source: &str, expected: bool) {
|
fn run_check(source: &str, expected: bool) {
|
||||||
let alloc = Allocator::default();
|
let alloc = Allocator::default();
|
||||||
let source_type = SourceType::default().with_typescript(true);
|
let source_type = SourceType::default().with_typescript(true);
|
||||||
let mut parser = ParserImpl::new(&alloc, source, source_type, ParserOptions::default());
|
let mut parser =
|
||||||
|
ParserImpl::new_for_tests(&alloc, source, source_type, ParserOptions::default());
|
||||||
// Get the parser to the first token.
|
// Get the parser to the first token.
|
||||||
parser.bump_any();
|
parser.bump_any();
|
||||||
assert_eq!(expected, parser.at_start_of_ts_declaration());
|
assert_eq!(expected, parser.at_start_of_ts_declaration());
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,7 @@ harness = false
|
||||||
oxc_allocator = { workspace = true }
|
oxc_allocator = { workspace = true }
|
||||||
oxc_linter = { workspace = true }
|
oxc_linter = { workspace = true }
|
||||||
oxc_minifier = { workspace = true }
|
oxc_minifier = { workspace = true }
|
||||||
oxc_parser = { workspace = true }
|
oxc_parser = { workspace = true, features = ["benchmarking"] }
|
||||||
oxc_prettier = { workspace = true }
|
oxc_prettier = { workspace = true }
|
||||||
oxc_semantic = { workspace = true }
|
oxc_semantic = { workspace = true }
|
||||||
oxc_span = { workspace = true }
|
oxc_span = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
use oxc_allocator::Allocator;
|
use oxc_allocator::Allocator;
|
||||||
use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion};
|
use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||||
use oxc_parser::__lexer::{Kind, Lexer};
|
use oxc_parser::lexer::{Kind, Lexer};
|
||||||
use oxc_span::SourceType;
|
use oxc_span::SourceType;
|
||||||
use oxc_tasks_common::{TestFile, TestFiles};
|
use oxc_tasks_common::{TestFile, TestFiles};
|
||||||
|
|
||||||
|
|
@ -32,7 +32,7 @@ fn bench_lexer(criterion: &mut Criterion) {
|
||||||
// Otherwise the allocator will allocate huge memory chunks (by power of two) from the
|
// Otherwise the allocator will allocate huge memory chunks (by power of two) from the
|
||||||
// system allocator, which makes time measurement unequal during long runs.
|
// system allocator, which makes time measurement unequal during long runs.
|
||||||
let allocator = Allocator::default();
|
let allocator = Allocator::default();
|
||||||
let mut lexer = Lexer::new(&allocator, source_text, source_type);
|
let mut lexer = Lexer::new_for_benchmarks(&allocator, source_text, source_type);
|
||||||
while lexer.next_token().kind != Kind::Eof {}
|
while lexer.next_token().kind != Kind::Eof {}
|
||||||
allocator
|
allocator
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue