From aef593fb50c22bf06ea5ee3a5e4e7d688f71b893 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Thu, 8 Feb 2024 06:51:17 +0000 Subject: [PATCH] parser(refactor): promise only one `Source` on a thread at a time (#2340) Introduce invariant that only a single `lexer::Source` can exist on a thread at one time. This is a preparatory step for #2341. 2 notes: Restriction is only 1 x `ParserImpl` / `Lexer` / `Source` on 1 *thread* at a time, not globally. So this does not prevent parsing multiple files simultaneously on different threads. Restriction does not apply to public type `Parser`, only `ParserImpl`. `ParserImpl`s are not created in created in `Parser::new`, but instead in `Parser::parse`, where they're created and then immediately consumed. So the end user is also free to create multiple `Parser` instances (if they want to for some reason) on the same thread. --- crates/oxc_parser/Cargo.toml | 4 ++ crates/oxc_parser/src/lexer/mod.rs | 27 ++++++- crates/oxc_parser/src/lexer/source.rs | 8 ++- crates/oxc_parser/src/lib.rs | 95 +++++++++++++++++++++---- crates/oxc_parser/src/ts/declaration.rs | 3 +- tasks/benchmark/Cargo.toml | 2 +- tasks/benchmark/benches/lexer.rs | 4 +- 7 files changed, 119 insertions(+), 24 deletions(-) diff --git a/crates/oxc_parser/Cargo.toml b/crates/oxc_parser/Cargo.toml index 0e92950e9..e6cc4f48b 100644 --- a/crates/oxc_parser/Cargo.toml +++ b/crates/oxc_parser/Cargo.toml @@ -35,3 +35,7 @@ oxc_ast = { workspace = true, features = ["serde"] } miette = { workspace = true, features = ["fancy-no-backtrace"] } serde_json = { workspace = true } ouroboros = "0.18.3" # for `multi-thread` example + +[features] +# Expose Lexer for benchmarks +benchmarking = [] diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 3134f3422..120abb841 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -44,7 +44,7 @@ pub use self::{ number::{parse_big_int, parse_float, parse_int}, token::Token, }; -use crate::diagnostics; +use crate::{diagnostics, UniquePromise}; #[derive(Debug, Clone, Copy)] pub struct LexerCheckpoint<'a> { @@ -97,8 +97,17 @@ pub struct Lexer<'a> { #[allow(clippy::unused_self)] impl<'a> Lexer<'a> { - pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self { - let source = Source::new(source_text); + /// Create new `Lexer`. + /// + /// Requiring a `UniquePromise` to be provided guarantees only 1 `Lexer` can exist + /// on a single thread at one time. + pub(super) fn new( + allocator: &'a Allocator, + source_text: &'a str, + source_type: SourceType, + unique: UniquePromise, + ) -> Self { + let source = Source::new(source_text, unique); // The first token is at the start of file, so is allows on a new line let token = Token::new_on_new_line(); @@ -116,6 +125,18 @@ impl<'a> Lexer<'a> { } } + /// Backdoor to create a `Lexer` without holding a `UniquePromise`, for benchmarks. + /// This function must NOT be exposed in public API as it breaks safety invariants. + #[cfg(feature = "benchmarking")] + pub fn new_for_benchmarks( + allocator: &'a Allocator, + source_text: &'a str, + source_type: SourceType, + ) -> Self { + let unique = UniquePromise::new_for_tests(); + Self::new(allocator, source_text, source_type, unique) + } + /// Remaining string from `Chars` pub fn remaining(&self) -> &'a str { self.source.remaining() diff --git a/crates/oxc_parser/src/lexer/source.rs b/crates/oxc_parser/src/lexer/source.rs index 25a492288..07f4397ca 100644 --- a/crates/oxc_parser/src/lexer/source.rs +++ b/crates/oxc_parser/src/lexer/source.rs @@ -1,6 +1,6 @@ #![allow(clippy::unnecessary_safety_comment)] -use crate::MAX_LEN; +use crate::{UniquePromise, MAX_LEN}; use std::{marker::PhantomData, slice, str}; @@ -72,7 +72,11 @@ pub(super) struct Source<'a> { impl<'a> Source<'a> { /// Create `Source` from `&str`. - pub(super) fn new(mut source_text: &'a str) -> Self { + /// + /// Requiring a `UniquePromise` to be provided guarantees only 1 `Source` can exist + /// on a single thread at one time. + #[allow(clippy::needless_pass_by_value)] + pub(super) fn new(mut source_text: &'a str, _unique: UniquePromise) -> Self { // If source text exceeds size limit, substitute a short source text which will fail to parse. // `Parser::parse` will convert error to `diagnostics::OverlongSource`. if source_text.len() > MAX_LEN { diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs index 719c0cbc4..25cadf86d 100644 --- a/crates/oxc_parser/src/lib.rs +++ b/crates/oxc_parser/src/lib.rs @@ -71,7 +71,13 @@ mod jsx; mod ts; mod diagnostics; + +// Expose lexer only in benchmarks +#[cfg(not(feature = "benchmarking"))] mod lexer; +#[cfg(feature = "benchmarking")] +#[doc(hidden)] +pub mod lexer; use context::{Context, StatementContext}; use oxc_allocator::Allocator; @@ -84,12 +90,6 @@ use crate::{ state::ParserState, }; -// Expose lexer for benchmarks -#[doc(hidden)] -pub mod __lexer { - pub use super::lexer::{Kind, Lexer, Token}; -} - /// Maximum length of source which can be parsed (in bytes). /// ~4 GiB on 64-bit systems, ~2 GiB on 32-bit systems. // Length is constrained by 2 factors: @@ -165,17 +165,63 @@ impl<'a> Parser<'a> { self.options.preserve_parens = allow; self } +} - /// Main entry point +mod parser_parse { + use super::*; + + /// `UniquePromise` is a way to use the type system to enforce the invariant that only + /// a single `ParserImpl`, `Lexer` and `lexer::Source` can exist at any time on a thread. + /// This constraint is required to guarantee the soundness of some methods of these types + /// e.g. `Source::set_position`. /// - /// Returns an empty `Program` on unrecoverable error, - /// Recoverable errors are stored inside `errors`. - pub fn parse(self) -> ParserReturn<'a> { - let parser = - ParserImpl::new(self.allocator, self.source_text, self.source_type, self.options); - parser.parse() + /// `ParserImpl::new`, `Lexer::new` and `lexer::Source::new` all require a `UniquePromise` + /// to be provided to them. `UniquePromise::new` is not visible outside this module, so only + /// `Parser::parse` can create one, and it only calls `ParserImpl::new` once. + /// This enforces the invariant throughout the entire parser. + /// + /// `UniquePromise` is a zero-sized type and has no runtime cost. It's purely for the type-checker. + /// + /// `UniquePromise::new_for_tests` is a backdoor for unit tests and benchmarks, so they can create a + /// `ParserImpl` or `Lexer`, and manipulate it directly, for testing/benchmarking purposes. + pub(crate) struct UniquePromise { + _dummy: (), + } + + impl UniquePromise { + #[inline] + fn new() -> Self { + Self { _dummy: () } + } + + /// Backdoor for tests/benchmarks to create a `UniquePromise` (see above). + /// This function must NOT be exposed outside of tests and benchmarks, + /// as it allows circumventing safety invariants of the parser. + #[cfg(any(test, feature = "benchmarking"))] + pub fn new_for_tests() -> Self { + Self { _dummy: () } + } + } + + impl<'a> Parser<'a> { + /// Main entry point + /// + /// Returns an empty `Program` on unrecoverable error, + /// Recoverable errors are stored inside `errors`. + pub fn parse(self) -> ParserReturn<'a> { + let unique = UniquePromise::new(); + let parser = ParserImpl::new( + self.allocator, + self.source_text, + self.source_type, + self.options, + unique, + ); + parser.parse() + } } } +use parser_parse::UniquePromise; /// Implementation of parser. /// `Parser` is just a public wrapper, the guts of the implementation is in this type. @@ -213,15 +259,20 @@ struct ParserImpl<'a> { } impl<'a> ParserImpl<'a> { - /// Create a new parser + /// Create a new `ParserImpl`. + /// + /// Requiring a `UniquePromise` to be provided guarantees only 1 `ParserImpl` can exist + /// on a single thread at one time. + #[inline] pub fn new( allocator: &'a Allocator, source_text: &'a str, source_type: SourceType, options: ParserOptions, + unique: UniquePromise, ) -> Self { Self { - lexer: Lexer::new(allocator, source_text, source_type), + lexer: Lexer::new(allocator, source_text, source_type, unique), source_type, source_text, errors: vec![], @@ -234,10 +285,24 @@ impl<'a> ParserImpl<'a> { } } + /// Backdoor to create a `ParserImpl` without holding a `UniquePromise`, for unit tests. + /// This function must NOT be exposed in public API as it breaks safety invariants. + #[cfg(test)] + fn new_for_tests( + allocator: &'a Allocator, + source_text: &'a str, + source_type: SourceType, + options: ParserOptions, + ) -> Self { + let unique = UniquePromise::new_for_tests(); + Self::new(allocator, source_text, source_type, options, unique) + } + /// Main entry point /// /// Returns an empty `Program` on unrecoverable error, /// Recoverable errors are stored inside `errors`. + #[inline] pub fn parse(mut self) -> ParserReturn<'a> { let (program, panicked) = match self.parse_program() { Ok(program) => (program, false), diff --git a/crates/oxc_parser/src/ts/declaration.rs b/crates/oxc_parser/src/ts/declaration.rs index e70aee357..36f8136b6 100644 --- a/crates/oxc_parser/src/ts/declaration.rs +++ b/crates/oxc_parser/src/ts/declaration.rs @@ -85,7 +85,8 @@ mod test_is_declaration { fn run_check(source: &str, expected: bool) { let alloc = Allocator::default(); let source_type = SourceType::default().with_typescript(true); - let mut parser = ParserImpl::new(&alloc, source, source_type, ParserOptions::default()); + let mut parser = + ParserImpl::new_for_tests(&alloc, source, source_type, ParserOptions::default()); // Get the parser to the first token. parser.bump_any(); assert_eq!(expected, parser.at_start_of_ts_declaration()); diff --git a/tasks/benchmark/Cargo.toml b/tasks/benchmark/Cargo.toml index 14f83ab1e..c395881e1 100644 --- a/tasks/benchmark/Cargo.toml +++ b/tasks/benchmark/Cargo.toml @@ -51,7 +51,7 @@ harness = false oxc_allocator = { workspace = true } oxc_linter = { workspace = true } oxc_minifier = { workspace = true } -oxc_parser = { workspace = true } +oxc_parser = { workspace = true, features = ["benchmarking"] } oxc_prettier = { workspace = true } oxc_semantic = { workspace = true } oxc_span = { workspace = true } diff --git a/tasks/benchmark/benches/lexer.rs b/tasks/benchmark/benches/lexer.rs index 4ac5cdce6..6d2284767 100644 --- a/tasks/benchmark/benches/lexer.rs +++ b/tasks/benchmark/benches/lexer.rs @@ -1,6 +1,6 @@ use oxc_allocator::Allocator; use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion}; -use oxc_parser::__lexer::{Kind, Lexer}; +use oxc_parser::lexer::{Kind, Lexer}; use oxc_span::SourceType; use oxc_tasks_common::{TestFile, TestFiles}; @@ -32,7 +32,7 @@ fn bench_lexer(criterion: &mut Criterion) { // Otherwise the allocator will allocate huge memory chunks (by power of two) from the // system allocator, which makes time measurement unequal during long runs. let allocator = Allocator::default(); - let mut lexer = Lexer::new(&allocator, source_text, source_type); + let mut lexer = Lexer::new_for_benchmarks(&allocator, source_text, source_type); while lexer.next_token().kind != Kind::Eof {} allocator });