parser(refactor): promise only one Source on a thread at a time (#2340)

Introduce invariant that only a single `lexer::Source` can exist on a thread at one time. This is a preparatory step for #2341. 2 notes: Restriction is only 1 x `ParserImpl` / `Lexer` / `Source` on 1 *thread* at a time, not globally. So this does not prevent parsing multiple files simultaneously on different threads. Restriction does not apply to public type `Parser`, only `ParserImpl`. `ParserImpl`s are not created in created in `Parser::new`, but instead in `Parser::parse`, where they're created and then immediately consumed. So the end user is also free to create multiple `Parser` instances (if they want to for some reason) on the same thread.
2026-05-21 13:18:59 +00:00 · 2024-02-08 06:51:17 +00:00 · 2024-02-08 06:51:17 +00:00 · aef593fb50
commit aef593fb50
parent ebc08d4e1e
7 changed files with 119 additions and 24 deletions
--- a/crates/oxc_parser/Cargo.toml
+++ b/crates/oxc_parser/Cargo.toml
@ -35,3 +35,7 @@ oxc_ast    = { workspace = true, features = ["serde"] }
 miette     = { workspace = true, features = ["fancy-no-backtrace"] }
 serde_json = { workspace = true }
 ouroboros  = "0.18.3"                                                # for `multi-thread` example
+
+[features]
+# Expose Lexer for benchmarks
+benchmarking = []
--- a/crates/oxc_parser/src/lexer/mod.rs
+++ b/crates/oxc_parser/src/lexer/mod.rs
@ -44,7 +44,7 @@ pub use self::{
    number::{parse_big_int, parse_float, parse_int},
    token::Token,
 };
-use crate::diagnostics;
+use crate::{diagnostics, UniquePromise};

 #[derive(Debug, Clone, Copy)]
 pub struct LexerCheckpoint<'a> {
@ -97,8 +97,17 @@ pub struct Lexer<'a> {

 #[allow(clippy::unused_self)]
 impl<'a> Lexer<'a> {
-    pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self {
-        let source = Source::new(source_text);
+    /// Create new `Lexer`.
+    ///
+    /// Requiring a `UniquePromise` to be provided guarantees only 1 `Lexer` can exist
+    /// on a single thread at one time.
+    pub(super) fn new(
+        allocator: &'a Allocator,
+        source_text: &'a str,
+        source_type: SourceType,
+        unique: UniquePromise,
+    ) -> Self {
+        let source = Source::new(source_text, unique);

        // The first token is at the start of file, so is allows on a new line
        let token = Token::new_on_new_line();
@ -116,6 +125,18 @@ impl<'a> Lexer<'a> {
        }
    }

+    /// Backdoor to create a `Lexer` without holding a `UniquePromise`, for benchmarks.
+    /// This function must NOT be exposed in public API as it breaks safety invariants.
+    #[cfg(feature = "benchmarking")]
+    pub fn new_for_benchmarks(
+        allocator: &'a Allocator,
+        source_text: &'a str,
+        source_type: SourceType,
+    ) -> Self {
+        let unique = UniquePromise::new_for_tests();
+        Self::new(allocator, source_text, source_type, unique)
+    }
+
    /// Remaining string from `Chars`
    pub fn remaining(&self) -> &'a str {
        self.source.remaining()
--- a/crates/oxc_parser/src/lexer/source.rs
+++ b/crates/oxc_parser/src/lexer/source.rs
@ -1,6 +1,6 @@
 #![allow(clippy::unnecessary_safety_comment)]

-use crate::MAX_LEN;
+use crate::{UniquePromise, MAX_LEN};

 use std::{marker::PhantomData, slice, str};

@ -72,7 +72,11 @@ pub(super) struct Source<'a> {

 impl<'a> Source<'a> {
    /// Create `Source` from `&str`.
-    pub(super) fn new(mut source_text: &'a str) -> Self {
+    ///
+    /// Requiring a `UniquePromise` to be provided guarantees only 1 `Source` can exist
+    /// on a single thread at one time.
+    #[allow(clippy::needless_pass_by_value)]
+    pub(super) fn new(mut source_text: &'a str, _unique: UniquePromise) -> Self {
        // If source text exceeds size limit, substitute a short source text which will fail to parse.
        // `Parser::parse` will convert error to `diagnostics::OverlongSource`.
        if source_text.len() > MAX_LEN {
--- a/crates/oxc_parser/src/lib.rs
+++ b/crates/oxc_parser/src/lib.rs
@ -71,7 +71,13 @@ mod jsx;
 mod ts;

 mod diagnostics;
+
+// Expose lexer only in benchmarks
+#[cfg(not(feature = "benchmarking"))]
 mod lexer;
+#[cfg(feature = "benchmarking")]
+#[doc(hidden)]
+pub mod lexer;

 use context::{Context, StatementContext};
 use oxc_allocator::Allocator;
@ -84,12 +90,6 @@ use crate::{
    state::ParserState,
 };

-// Expose lexer for benchmarks
-#[doc(hidden)]
-pub mod __lexer {
-    pub use super::lexer::{Kind, Lexer, Token};
-}
-
 /// Maximum length of source which can be parsed (in bytes).
 /// ~4 GiB on 64-bit systems, ~2 GiB on 32-bit systems.
 // Length is constrained by 2 factors:
@ -165,17 +165,63 @@ impl<'a> Parser<'a> {
        self.options.preserve_parens = allow;
        self
    }
+}

-    /// Main entry point
+mod parser_parse {
+    use super::*;
+
+    /// `UniquePromise` is a way to use the type system to enforce the invariant that only
+    /// a single `ParserImpl`, `Lexer` and `lexer::Source` can exist at any time on a thread.
+    /// This constraint is required to guarantee the soundness of some methods of these types
+    /// e.g. `Source::set_position`.
    ///
-    /// Returns an empty `Program` on unrecoverable error,
-    /// Recoverable errors are stored inside `errors`.
-    pub fn parse(self) -> ParserReturn<'a> {
-        let parser =
-            ParserImpl::new(self.allocator, self.source_text, self.source_type, self.options);
-        parser.parse()
+    /// `ParserImpl::new`, `Lexer::new` and `lexer::Source::new` all require a `UniquePromise`
+    /// to be provided to them. `UniquePromise::new` is not visible outside this module, so only
+    /// `Parser::parse` can create one, and it only calls `ParserImpl::new` once.
+    /// This enforces the invariant throughout the entire parser.
+    ///
+    /// `UniquePromise` is a zero-sized type and has no runtime cost. It's purely for the type-checker.
+    ///
+    /// `UniquePromise::new_for_tests` is a backdoor for unit tests and benchmarks, so they can create a
+    /// `ParserImpl` or `Lexer`, and manipulate it directly, for testing/benchmarking purposes.
+    pub(crate) struct UniquePromise {
+        _dummy: (),
+    }
+
+    impl UniquePromise {
+        #[inline]
+        fn new() -> Self {
+            Self { _dummy: () }
+        }
+
+        /// Backdoor for tests/benchmarks to create a `UniquePromise` (see above).
+        /// This function must NOT be exposed outside of tests and benchmarks,
+        /// as it allows circumventing safety invariants of the parser.
+        #[cfg(any(test, feature = "benchmarking"))]
+        pub fn new_for_tests() -> Self {
+            Self { _dummy: () }
+        }
+    }
+
+    impl<'a> Parser<'a> {
+        /// Main entry point
+        ///
+        /// Returns an empty `Program` on unrecoverable error,
+        /// Recoverable errors are stored inside `errors`.
+        pub fn parse(self) -> ParserReturn<'a> {
+            let unique = UniquePromise::new();
+            let parser = ParserImpl::new(
+                self.allocator,
+                self.source_text,
+                self.source_type,
+                self.options,
+                unique,
+            );
+            parser.parse()
+        }
    }
 }
+use parser_parse::UniquePromise;

 /// Implementation of parser.
 /// `Parser` is just a public wrapper, the guts of the implementation is in this type.
@ -213,15 +259,20 @@ struct ParserImpl<'a> {
 }

 impl<'a> ParserImpl<'a> {
-    /// Create a new parser
+    /// Create a new `ParserImpl`.
+    ///
+    /// Requiring a `UniquePromise` to be provided guarantees only 1 `ParserImpl` can exist
+    /// on a single thread at one time.
+    #[inline]
    pub fn new(
        allocator: &'a Allocator,
        source_text: &'a str,
        source_type: SourceType,
        options: ParserOptions,
+        unique: UniquePromise,
    ) -> Self {
        Self {
-            lexer: Lexer::new(allocator, source_text, source_type),
+            lexer: Lexer::new(allocator, source_text, source_type, unique),
            source_type,
            source_text,
            errors: vec![],
@ -234,10 +285,24 @@ impl<'a> ParserImpl<'a> {
        }
    }

+    /// Backdoor to create a `ParserImpl` without holding a `UniquePromise`, for unit tests.
+    /// This function must NOT be exposed in public API as it breaks safety invariants.
+    #[cfg(test)]
+    fn new_for_tests(
+        allocator: &'a Allocator,
+        source_text: &'a str,
+        source_type: SourceType,
+        options: ParserOptions,
+    ) -> Self {
+        let unique = UniquePromise::new_for_tests();
+        Self::new(allocator, source_text, source_type, options, unique)
+    }
+
    /// Main entry point
    ///
    /// Returns an empty `Program` on unrecoverable error,
    /// Recoverable errors are stored inside `errors`.
+    #[inline]
    pub fn parse(mut self) -> ParserReturn<'a> {
        let (program, panicked) = match self.parse_program() {
            Ok(program) => (program, false),
--- a/crates/oxc_parser/src/ts/declaration.rs
+++ b/crates/oxc_parser/src/ts/declaration.rs
@ -85,7 +85,8 @@ mod test_is_declaration {
    fn run_check(source: &str, expected: bool) {
        let alloc = Allocator::default();
        let source_type = SourceType::default().with_typescript(true);
-        let mut parser = ParserImpl::new(&alloc, source, source_type, ParserOptions::default());
+        let mut parser =
+            ParserImpl::new_for_tests(&alloc, source, source_type, ParserOptions::default());
        // Get the parser to the first token.
        parser.bump_any();
        assert_eq!(expected, parser.at_start_of_ts_declaration());
--- a/tasks/benchmark/Cargo.toml
+++ b/tasks/benchmark/Cargo.toml
@ -51,7 +51,7 @@ harness = false
 oxc_allocator    = { workspace = true }
 oxc_linter       = { workspace = true }
 oxc_minifier     = { workspace = true }
-oxc_parser       = { workspace = true }
+oxc_parser       = { workspace = true, features = ["benchmarking"] }
 oxc_prettier     = { workspace = true }
 oxc_semantic     = { workspace = true }
 oxc_span         = { workspace = true }
--- a/tasks/benchmark/benches/lexer.rs
+++ b/tasks/benchmark/benches/lexer.rs
@ -1,6 +1,6 @@
 use oxc_allocator::Allocator;
 use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion};
-use oxc_parser::__lexer::{Kind, Lexer};
+use oxc_parser::lexer::{Kind, Lexer};
 use oxc_span::SourceType;
 use oxc_tasks_common::{TestFile, TestFiles};

@ -32,7 +32,7 @@ fn bench_lexer(criterion: &mut Criterion) {
                    // Otherwise the allocator will allocate huge memory chunks (by power of two) from the
                    // system allocator, which makes time measurement unequal during long runs.
                    let allocator = Allocator::default();
-                    let mut lexer = Lexer::new(&allocator, source_text, source_type);
+                    let mut lexer = Lexer::new_for_benchmarks(&allocator, source_text, source_type);
                    while lexer.next_token().kind != Kind::Eof {}
                    allocator
                });