diff --git a/crates/oxc_parser/src/diagnostics.rs b/crates/oxc_parser/src/diagnostics.rs index 8277557a0..997fc1bd8 100644 --- a/crates/oxc_parser/src/diagnostics.rs +++ b/crates/oxc_parser/src/diagnostics.rs @@ -4,6 +4,11 @@ use oxc_diagnostics::{ }; use oxc_span::Span; +#[derive(Debug, Error, Diagnostic)] +#[error("Source length exceeds 4 GiB limit")] +#[diagnostic()] +pub struct OverlongSource; + #[derive(Debug, Error, Diagnostic)] #[error("Flow is not supported")] #[diagnostic()] diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index ac74bfb38..7933e6379 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -32,7 +32,7 @@ use self::{ string_builder::AutoCow, trivia_builder::TriviaBuilder, }; -use crate::diagnostics; +use crate::{diagnostics, MAX_LEN}; #[derive(Debug, Clone)] pub struct LexerCheckpoint<'a> { @@ -72,6 +72,11 @@ pub struct Lexer<'a> { #[allow(clippy::unused_self)] impl<'a> Lexer<'a> { pub fn new(allocator: &'a Allocator, source: &'a str, source_type: SourceType) -> Self { + // Token's start and end are u32s, so limit for length of source is u32::MAX bytes. + // Only a debug assertion is required, as parser checks length of source before calling + // this method. + debug_assert!(source.len() <= MAX_LEN, "Source length exceeds MAX_LEN"); + let token = Token { // the first token is at the start of file, so is allows on a new line is_on_new_line: true, diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs index 8765b43a3..73f902c03 100644 --- a/crates/oxc_parser/src/lib.rs +++ b/crates/oxc_parser/src/lib.rs @@ -84,6 +84,10 @@ use crate::{ state::ParserState, }; +/// Maximum length of source in bytes which can be parsed (~4 GiB). +// Span's start and end are u32s, so size limit is u32::MAX bytes. +pub const MAX_LEN: usize = u32::MAX as usize; + /// Return value of parser consisting of AST, errors and comments /// /// The parser always return a valid AST. @@ -135,8 +139,12 @@ pub struct Parser<'a> { impl<'a> Parser<'a> { /// Create a new parser pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self { + // If source exceeds size limit, substitute a short source which will fail to parse. + // `parse()` will convert error to `diagnostics::OverlongSource`. + let source_text_for_lexer = if source_text.len() > MAX_LEN { "\0" } else { source_text }; + Self { - lexer: Lexer::new(allocator, source_text, source_type), + lexer: Lexer::new(allocator, source_text_for_lexer, source_type), source_type, source_text, errors: vec![], @@ -177,7 +185,9 @@ impl<'a> Parser<'a> { let (program, panicked) = match self.parse_program() { Ok(program) => (program, false), Err(error) => { - self.error(self.flow_error().unwrap_or(error)); + self.error( + self.flow_error().unwrap_or_else(|| self.overlong_error().unwrap_or(error)), + ); let program = self.ast.program( Span::default(), self.source_type, @@ -227,6 +237,15 @@ impl<'a> Parser<'a> { None } + /// Check if source length exceeds MAX_LEN, if the file cannot be parsed. + /// Original parsing error is not real - `Parser::new` substituted "\0" as the source text. + fn overlong_error(&self) -> Option { + if self.source_text.len() > MAX_LEN { + return Some(diagnostics::OverlongSource.into()); + } + None + } + /// Return error info at current token /// # Panics /// * The lexer did not push a diagnostic when `Kind::Undetermined` is returned @@ -280,4 +299,41 @@ mod test { assert!(ret.program.is_empty()); assert_eq!(ret.errors.first().unwrap().to_string(), "Flow is not supported"); } + + // Source with length u32::MAX + 1 fails to parse + #[test] + fn overlong_source() { + let allocator = Allocator::default(); + let source_type = SourceType::default(); + let source = "var x = 123456;\n".repeat(256 * 1024 * 1024); + assert_eq!(source.len() - 1, u32::MAX as usize); + let ret = Parser::new(&allocator, &source, source_type).parse(); + assert!(ret.program.is_empty()); + assert!(ret.panicked); + assert_eq!(ret.errors.len(), 1); + assert_eq!(ret.errors.first().unwrap().to_string(), "Source length exceeds 4 GiB limit"); + } + + // Source with length u32::MAX parses OK. + // This test takes over 1 minute on an M1 Macbook Pro unless compiled in release mode. + // `not(debug_assertions)` is a proxy for detecting release mode. + #[cfg(not(debug_assertions))] + #[test] + fn legal_length_source() { + let allocator = Allocator::default(); + let source_type = SourceType::default(); + + // Build a string u32::MAX bytes long which doesn't take too long to parse + let head = "const x = 1;\n/*"; + let foot = "*/\nconst y = 2;\n"; + let mut source = "x".repeat(u32::MAX as usize); + source.replace_range(..head.len(), head); + source.replace_range(source.len() - foot.len().., foot); + assert_eq!(source.len(), u32::MAX as usize); + + let ret = Parser::new(&allocator, &source, source_type).parse(); + assert!(!ret.panicked); + assert!(ret.errors.is_empty()); + assert_eq!(ret.program.body.len(), 2); + } }