feat(oxc)!: add SourceType::Unambiguous; parse .js as unambiguous (#5557)

See https://babel.dev/docs/options#misc-options for background on `unambiguous`

Once `SourceType::Unambiguous` is parsed, it will correctly set the returned `Program::source_type` to either `module` or `script`.
This commit is contained in:
Boshen 2024-09-07 10:48:57 +00:00
parent 08d2b7d761
commit 603817bef9
8 changed files with 80 additions and 12 deletions

View file

@ -548,7 +548,10 @@ impl<'a> ParserImpl<'a> {
) -> Result<Expression<'a>> {
self.bump_any(); // bump `.`
let property = match self.cur_kind() {
Kind::Meta => self.parse_keyword_identifier(Kind::Meta),
Kind::Meta => {
self.set_source_type_to_module_if_unambiguous();
self.parse_keyword_identifier(Kind::Meta)
}
Kind::Target => self.parse_keyword_identifier(Kind::Target),
_ => self.parse_identifier_name()?,
};

View file

@ -41,6 +41,11 @@ impl<'a> ParserImpl<'a> {
break;
}
let stmt = self.parse_statement_list_item(StatementContext::StatementList)?;
if is_top_level && stmt.is_module_declaration() {
self.set_source_type_to_module_if_unambiguous();
}
// Section 11.2.1 Directive Prologue
// The only way to get a correct directive is to parse the statement first and check if it is a string literal.
// All other method are flawed, see test cases in [babel](https://github.com/babel/babel/blob/main/packages/babel-parser/test/fixtures/core/categorized/not-directive/input.js)

View file

@ -350,6 +350,8 @@ impl<'a> ParserImpl<'a> {
let (directives, statements) =
self.parse_directives_and_statements(/* is_top_level */ true)?;
self.set_source_type_to_script_if_unambiguous();
let span = Span::new(0, self.source_text.len() as u32);
Ok(self.ast.program(span, self.source_type, hashbang, directives, statements))
}
@ -416,6 +418,18 @@ impl<'a> ParserImpl<'a> {
fn ts_enabled(&self) -> bool {
self.source_type.is_typescript()
}
fn set_source_type_to_module_if_unambiguous(&mut self) {
if self.source_type.is_unambiguous() {
self.source_type = self.source_type.with_module(true);
}
}
fn set_source_type_to_script_if_unambiguous(&mut self) {
if self.source_type.is_unambiguous() {
self.source_type = self.source_type.with_script(true);
}
}
}
#[cfg(test)]
@ -511,6 +525,24 @@ mod test {
}
}
#[test]
fn unambiguous() {
let allocator = Allocator::default();
let source_type = SourceType::default().with_unambiguous(true);
assert!(source_type.is_unambiguous());
let sources = ["import x from 'foo';", "export {x} from 'foo';", "import.meta"];
for source in sources {
let ret = Parser::new(&allocator, source, source_type).parse();
assert!(ret.program.source_type.is_module());
}
let sources = ["", "import('foo')"];
for source in sources {
let ret = Parser::new(&allocator, source, source_type).parse();
assert!(ret.program.source_type.is_script());
}
}
#[test]
fn memory_leak() {
let allocator = Allocator::default();

View file

@ -404,6 +404,10 @@ pub fn check_module_declaration<'a>(
let start = decl.span().start;
let span = Span::new(start, start + 6);
match ctx.source_type.module_kind() {
ModuleKind::Unambiguous => {
#[cfg(debug_assertions)]
panic!("Technically unreachable, omit to avoid panic.");
}
ModuleKind::Script => {
ctx.error(module_code(text, span));
}

View file

@ -64,7 +64,7 @@ impl SourceType {
pub const fn js() -> Self {
Self {
language: Language::JavaScript,
module_kind: ModuleKind::Script,
module_kind: ModuleKind::Unambiguous,
variant: LanguageVariant::Standard,
}
}
@ -159,6 +159,10 @@ impl SourceType {
self.module_kind == ModuleKind::Module
}
pub fn is_unambiguous(self) -> bool {
self.module_kind == ModuleKind::Unambiguous
}
pub fn module_kind(self) -> ModuleKind {
self.module_kind
}
@ -204,6 +208,14 @@ impl SourceType {
self
}
#[must_use]
pub const fn with_unambiguous(mut self, yes: bool) -> Self {
if yes {
self.module_kind = ModuleKind::Unambiguous;
}
self
}
#[must_use]
pub const fn with_typescript(mut self, yes: bool) -> Self {
if yes {
@ -290,7 +302,8 @@ impl SourceType {
})?;
let (language, module_kind) = match extension {
"js" | "mjs" | "jsx" => (Language::JavaScript, ModuleKind::Module),
"js" => (Language::JavaScript, ModuleKind::Unambiguous),
"mjs" | "jsx" => (Language::JavaScript, ModuleKind::Module),
"cjs" => (Language::JavaScript, ModuleKind::Script),
"ts" if file_name.ends_with(".d.ts") => {
(Language::TypeScriptDefinition, ModuleKind::Module)
@ -417,15 +430,15 @@ mod tests {
assert!(!ty.is_typescript(), "{ty:?}");
}
assert_eq!(SourceType::js().with_jsx(true).with_module(true), js);
assert_eq!(SourceType::js().with_jsx(true).with_unambiguous(true), js);
assert_eq!(SourceType::jsx().with_module(true), jsx);
assert!(js.is_module());
assert!(js.is_unambiguous());
assert!(mjs.is_module());
assert!(cjs.is_script());
assert!(jsx.is_module());
assert!(js.is_strict());
assert!(!js.is_strict());
assert!(mjs.is_strict());
assert!(!cjs.is_strict());
assert!(jsx.is_strict());

View file

@ -43,6 +43,14 @@ pub enum ModuleKind {
Script = 0,
/// ES6 Module
Module = 1,
/// Consider the file a "module" if ESM syntax is present, or else consider it a "script".
///
/// ESM syntax includes `import` statement, `export` statement and `import.meta`.
///
/// Note: Dynamic import expression is not ESM syntax.
///
/// See <https://babel.dev/docs/options#misc-options>
Unambiguous = 2,
}
/// JSX for JavaScript and TypeScript

View file

@ -1,2 +1,4 @@
'use strict';
let.a = 1;
let()[a] = 1;

View file

@ -245,15 +245,16 @@ Negative Passed: 17/17 (100.00%)
╰────
× The keyword 'let' is reserved
╭─[misc/fail/oxc.js:1:1]
1 │ let.a = 1;
╭─[misc/fail/oxc.js:3:1]
2 │
3 │ let.a = 1;
· ───
2 │ let()[a] = 1;
4 │ let()[a] = 1;
╰────
× The keyword 'let' is reserved
╭─[misc/fail/oxc.js:2:1]
1 │ let.a = 1;
2 │ let()[a] = 1;
╭─[misc/fail/oxc.js:4:1]
3 │ let.a = 1;
4 │ let()[a] = 1;
· ───
╰────