fix(regular_expression)!: Migrate to new regexp parser API (#6741)

Follow up #6635

- [x] Remove old APIs
- [x] Update linter usage
- [x] Update parser usage
- [x] Update transformer usage
This commit is contained in:
leaysgur 2024-10-22 05:34:18 +00:00
parent 54a50322db
commit 8032813bf8
14 changed files with 187 additions and 238 deletions

View file

@ -6,11 +6,11 @@ use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{ use oxc_regular_expression::{
ast::{CapturingGroup, Character, Pattern}, ast::{CapturingGroup, Character, Pattern},
visit::{walk, Visit}, visit::{walk, Visit},
Parser, ParserOptions, ConstructorParser, Options,
}; };
use oxc_span::{GetSpan, Span}; use oxc_span::Span;
use crate::{ast_util::extract_regex_flags, context::LintContext, rule::Rule, AstNode}; use crate::{context::LintContext, rule::Rule, AstNode};
fn no_control_regex_diagnostic(count: usize, regex: &str, span: Span) -> OxcDiagnostic { fn no_control_regex_diagnostic(count: usize, regex: &str, span: Span) -> OxcDiagnostic {
debug_assert!(count > 0); debug_assert!(count > 0);
@ -82,75 +82,63 @@ impl Rule for NoControlRegex {
} }
// new RegExp() // new RegExp()
AstKind::NewExpression(expr) => { AstKind::NewExpression(expr) if expr.callee.is_specific_id("RegExp") => {
// constructor is RegExp, // note: improvements required for strings used via identifier references
if expr.callee.is_specific_id("RegExp") // Missing or non-string arguments will be runtime errors, but are not covered by this rule.
// which is provided at least 1 parameter, match (&expr.arguments.first(), &expr.arguments.get(1)) {
&& expr.arguments.len() > 0 (
{ Some(Argument::StringLiteral(pattern)),
// where the first one is a string literal Some(Argument::StringLiteral(flags)),
// note: improvements required for strings used via identifier ) => {
// references parse_and_check_regex(context, pattern.span, Some(flags.span));
if let Argument::StringLiteral(pattern) = &expr.arguments[0] {
// get pattern from arguments. Missing or non-string arguments
// will be runtime errors, but are not covered by this rule.
parse_and_check_regex(
context,
&pattern.value,
&expr.arguments,
pattern.span,
);
} }
(Some(Argument::StringLiteral(pattern)), _) => {
parse_and_check_regex(context, pattern.span, None);
}
_ => {}
} }
} }
// RegExp() // RegExp()
AstKind::CallExpression(expr) => { AstKind::CallExpression(expr) if expr.callee.is_specific_id("RegExp") => {
// constructor is RegExp, // note: improvements required for strings used via identifier references
if expr.callee.is_specific_id("RegExp") // Missing or non-string arguments will be runtime errors, but are not covered by this rule.
// which is provided at least 1 parameter, match (&expr.arguments.first(), &expr.arguments.get(1)) {
&& expr.arguments.len() > 0 (
{ Some(Argument::StringLiteral(pattern)),
// where the first one is a string literal Some(Argument::StringLiteral(flags)),
// note: improvements required for strings used via identifier ) => {
// references parse_and_check_regex(context, pattern.span, Some(flags.span));
if let Argument::StringLiteral(pattern) = &expr.arguments[0] { }
// get pattern from arguments. Missing or non-string arguments (Some(Argument::StringLiteral(pattern)), _) => {
// will be runtime errors, but are not covered by this rule. parse_and_check_regex(context, pattern.span, None);
parse_and_check_regex( }
context, _ => {}
&pattern.value,
&expr.arguments,
pattern.span,
);
}
} }
} }
_ => {} _ => {}
}; };
} }
} }
fn parse_and_check_regex<'a>( fn parse_and_check_regex(ctx: &LintContext, pattern_span: Span, flags_span: Option<Span>) {
ctx: &LintContext<'a>,
source_text: &'a str,
arguments: &oxc_allocator::Vec<'a, Argument<'a>>,
expr_span: Span,
) {
let allocator = Allocator::default(); let allocator = Allocator::default();
let flags = extract_regex_flags(arguments);
let flags_text = flags.map_or(String::new(), |f| f.to_string()); let flags_text = flags_span.map(|span| span.source_text(ctx.source_text()));
let parser = Parser::new( let parser = ConstructorParser::new(
&allocator, &allocator,
source_text, pattern_span.source_text(ctx.source_text()),
ParserOptions::default() flags_text,
.with_span_offset(arguments.first().map_or(0, |arg| arg.span().start)) Options {
.with_flags(&flags_text), pattern_span_offset: pattern_span.start,
flags_span_offset: flags_span.map_or(0, |span| span.start),
},
); );
let Ok(pattern) = parser.parse() else { let Ok(pattern) = parser.parse() else {
return; return;
}; };
check_pattern(ctx, &pattern, expr_span); check_pattern(ctx, &pattern, pattern_span);
} }
fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) { fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) {
@ -279,7 +267,6 @@ mod tests {
vec![ vec![
r"let r = /\u{0}/u", r"let r = /\u{0}/u",
r"let r = new RegExp('\\u{0}', 'u');", r"let r = new RegExp('\\u{0}', 'u');",
r"let r = new RegExp('\\u{0}', `u`);",
r"let r = /\u{c}/u", r"let r = /\u{c}/u",
r"let r = /\u{1F}/u", r"let r = /\u{1F}/u",
r"let r = new RegExp('\\u{1F}', 'u');", // flags are known & contain u r"let r = new RegExp('\\u{1F}', 'u');", // flags are known & contain u

View file

@ -2,7 +2,7 @@ use oxc_allocator::Allocator;
use oxc_ast::{ast::Argument, AstKind}; use oxc_ast::{ast::Argument, AstKind};
use oxc_diagnostics::OxcDiagnostic; use oxc_diagnostics::OxcDiagnostic;
use oxc_macros::declare_oxc_lint; use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{Parser, ParserOptions}; use oxc_regular_expression::{ConstructorParser, Options};
use oxc_span::Span; use oxc_span::Span;
use rustc_hash::FxHashSet; use rustc_hash::FxHashSet;
use serde::Deserialize; use serde::Deserialize;
@ -86,13 +86,20 @@ impl Rule for NoInvalidRegexp {
return; return;
} }
// Validate flags first if exists
if let Some((flags_span_start, flags_text)) = flags_arg {
let (mut u_flag_found, mut v_flag_found) = (false, false); let (mut u_flag_found, mut v_flag_found) = (false, false);
// Validate flags first if exists
// `oxc_regular_expression` crate has a ability to validate flags.
// But, it does not accept any `allow_constructor_flags` option.
// And if we omit user defined flags here, `Span` may be incorrect on error reporting.
if let Some(flags_span) = flags_arg {
// Strip quotes
let flags_text =
flags_span.source_text(ctx.source_text()).trim_matches('\'').trim_matches('"');
let mut unique_flags = FxHashSet::default(); let mut unique_flags = FxHashSet::default();
for (idx, ch) in flags_text.char_indices() { for (idx, ch) in flags_text.char_indices() {
#[allow(clippy::cast_possible_truncation)] #[allow(clippy::cast_possible_truncation)]
let start = flags_span_start + idx as u32; let start = flags_span.start + 1 + idx as u32;
// Invalid combination: u+v // Invalid combination: u+v
if ch == 'u' { if ch == 'u' {
@ -128,12 +135,23 @@ impl Rule for NoInvalidRegexp {
// Pattern check is skipped when 1st argument is NOT a `StringLiteral` // Pattern check is skipped when 1st argument is NOT a `StringLiteral`
// e.g. `new RegExp(var)`, `RegExp("str" + var)` // e.g. `new RegExp(var)`, `RegExp("str" + var)`
let allocator = Allocator::default(); let allocator = Allocator::default();
if let Some((pattern_span_start, pattern_text)) = pattern_arg { if let Some(pattern_span) = pattern_arg {
let options = ParserOptions::default() let pattern_text = pattern_span.source_text(ctx.source_text());
.with_span_offset(pattern_span_start)
.with_flags(flags_arg.map_or("", |(_, flags_text)| flags_text));
match Parser::new(&allocator, pattern_text, options).parse() { let flags_text = match (u_flag_found, v_flag_found) {
(true, false) => Some("'u'"),
(_, true) => Some("'v'"),
(false, false) => None,
};
match ConstructorParser::new(
&allocator,
pattern_text,
flags_text,
Options { pattern_span_offset: pattern_span.start, flags_span_offset: 0 },
)
.parse()
{
Ok(_) => {} Ok(_) => {}
Err(diagnostic) => ctx.diagnostic(diagnostic), Err(diagnostic) => ctx.diagnostic(diagnostic),
} }
@ -141,27 +159,19 @@ impl Rule for NoInvalidRegexp {
} }
} }
/// Returns: (span_start, text)
/// span_start + 1 for opening string bracket.
type ParsedArgument<'a> = (u32, &'a str);
fn parse_arguments_to_check<'a>( fn parse_arguments_to_check<'a>(
arg1: Option<&Argument<'a>>, arg1: Option<&Argument<'a>>,
arg2: Option<&Argument<'a>>, arg2: Option<&Argument<'a>>,
) -> (Option<ParsedArgument<'a>>, Option<ParsedArgument<'a>>) { ) -> (Option<Span>, Option<Span>) {
match (arg1, arg2) { match (arg1, arg2) {
// ("pattern", "flags") // ("pattern", "flags")
(Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => ( (Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => {
Some((pattern.span.start + 1, pattern.value.as_str())), (Some(pattern.span), Some(flags.span))
Some((flags.span.start + 1, flags.value.as_str())), }
),
// (pattern, "flags") // (pattern, "flags")
(Some(_arg), Some(Argument::StringLiteral(flags))) => { (Some(_arg), Some(Argument::StringLiteral(flags))) => (None, Some(flags.span)),
(None, Some((flags.span.start + 1, flags.value.as_str())))
}
// ("pattern") // ("pattern")
(Some(Argument::StringLiteral(pattern)), None) => { (Some(Argument::StringLiteral(pattern)), None) => (Some(pattern.span), None),
(Some((pattern.span.start + 1, pattern.value.as_str())), None)
}
// (pattern), () // (pattern), ()
_ => (None, None), _ => (None, None),
} }
@ -172,7 +182,7 @@ fn test() {
use crate::tester::Tester; use crate::tester::Tester;
let pass = vec![ let pass = vec![
("[RegExp(''), /a/uv]", None), ("RegExp('')", None),
("RegExp()", None), ("RegExp()", None),
("RegExp('.', 'g')", None), ("RegExp('.', 'g')", None),
("new RegExp('.')", None), ("new RegExp('.')", None),

View file

@ -10,7 +10,7 @@ use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{ use oxc_regular_expression::{
ast::{Character, Pattern}, ast::{Character, Pattern},
visit::{RegExpAstKind, Visit}, visit::{RegExpAstKind, Visit},
Parser, ParserOptions, ConstructorParser, Options,
}; };
use oxc_span::Span; use oxc_span::Span;
@ -63,13 +63,13 @@ impl Rule for NoRegexSpaces {
} }
AstKind::CallExpression(expr) if Self::is_regexp_call_expression(expr) => { AstKind::CallExpression(expr) if Self::is_regexp_call_expression(expr) => {
if let Some(span) = Self::find_expr_to_report(&expr.arguments) { if let Some(span) = Self::find_expr_to_report(&expr.arguments, ctx) {
ctx.diagnostic(no_regex_spaces_diagnostic(span)); // RegExp('a b') ctx.diagnostic(no_regex_spaces_diagnostic(span)); // RegExp('a b')
} }
} }
AstKind::NewExpression(expr) if Self::is_regexp_new_expression(expr) => { AstKind::NewExpression(expr) if Self::is_regexp_new_expression(expr) => {
if let Some(span) = Self::find_expr_to_report(&expr.arguments) { if let Some(span) = Self::find_expr_to_report(&expr.arguments, ctx) {
ctx.diagnostic(no_regex_spaces_diagnostic(span)); // new RegExp('a b') ctx.diagnostic(no_regex_spaces_diagnostic(span)); // new RegExp('a b')
} }
} }
@ -90,7 +90,7 @@ impl NoRegexSpaces {
find_consecutive_spaces(pattern) find_consecutive_spaces(pattern)
} }
fn find_expr_to_report(args: &Vec<'_, Argument<'_>>) -> Option<Span> { fn find_expr_to_report(args: &Vec<'_, Argument<'_>>, ctx: &LintContext) -> Option<Span> {
if let Some(expr) = args.get(1).and_then(Argument::as_expression) { if let Some(expr) = args.get(1).and_then(Argument::as_expression) {
if !expr.is_string_literal() { if !expr.is_string_literal() {
return None; // skip on indeterminate flag, e.g. RegExp('a b', flags) return None; // skip on indeterminate flag, e.g. RegExp('a b', flags)
@ -105,10 +105,11 @@ impl NoRegexSpaces {
} }
let alloc = Allocator::default(); let alloc = Allocator::default();
let parser = Parser::new( let parser = ConstructorParser::new(
&alloc, &alloc,
pattern.value.as_str(), pattern.span.source_text(ctx.source_text()),
ParserOptions::default().with_span_offset(pattern.span.start + 1), None,
Options { pattern_span_offset: pattern.span.start, ..Options::default() },
); );
let parsed_pattern = parser.parse().ok()?; let parsed_pattern = parser.parse().ok()?;

View file

@ -104,43 +104,43 @@ source: crates/oxc_linter/src/tester.rs
╰──── ╰────
⚠ eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern ⚠ eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern
╭─[no_invalid_regexp.tsx:1:14] ╭─[no_invalid_regexp.tsx:1:15]
1 │ new RegExp('\\a', 'u'); 1 │ new RegExp('\\a', 'u');
· ▲ · ▲
╰──── ╰────
⚠ eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern ⚠ eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern
╭─[no_invalid_regexp.tsx:1:14] ╭─[no_invalid_regexp.tsx:1:15]
1 │ new RegExp('\\a', 'u'); 1 │ new RegExp('\\a', 'u');
· ▲ · ▲
╰──── ╰────
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:14] ╭─[no_invalid_regexp.tsx:1:15]
1 │ RegExp('\\u{0}*'); 1 │ RegExp('\\u{0}*');
· ─ · ─
╰──── ╰────
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:18] ╭─[no_invalid_regexp.tsx:1:19]
1 │ new RegExp('\\u{0}*'); 1 │ new RegExp('\\u{0}*');
· ─ · ─
╰──── ╰────
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:18] ╭─[no_invalid_regexp.tsx:1:19]
1 │ new RegExp('\\u{0}*', ''); 1 │ new RegExp('\\u{0}*', '');
· ─ · ─
╰──── ╰────
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:18] ╭─[no_invalid_regexp.tsx:1:19]
1 │ new RegExp('\\u{0}*', 'a'); 1 │ new RegExp('\\u{0}*', 'a');
· ─ · ─
╰──── ╰────
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:14] ╭─[no_invalid_regexp.tsx:1:15]
1 │ RegExp('\\u{0}*'); 1 │ RegExp('\\u{0}*');
· ─ · ─
╰──── ╰────
@ -148,7 +148,7 @@ source: crates/oxc_linter/src/tester.rs
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid extended atom escape ⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid extended atom escape
╭─[no_invalid_regexp.tsx:1:13] ╭─[no_invalid_regexp.tsx:1:13]
1 │ new RegExp('\\'); 1 │ new RegExp('\\');
· ─ · ─
╰──── ╰────
⚠ eslint(no-invalid-regexp): Invalid regular expression: Unknown flag ⚠ eslint(no-invalid-regexp): Invalid regular expression: Unknown flag
@ -196,7 +196,7 @@ source: crates/oxc_linter/src/tester.rs
⚠ eslint(no-invalid-regexp): Invalid regular expression: Unterminated character class ⚠ eslint(no-invalid-regexp): Invalid regular expression: Unterminated character class
╭─[no_invalid_regexp.tsx:1:13] ╭─[no_invalid_regexp.tsx:1:13]
1 │ new RegExp('[[]\\u{0}*' /* valid only with `u` flag */, 'v') 1 │ new RegExp('[[]\\u{0}*' /* valid only with `u` flag */, 'v')
· ──────── · ────────
╰──── ╰────
⚠ eslint(no-invalid-regexp): Invalid regular expression: Duplicated capturing group names ⚠ eslint(no-invalid-regexp): Invalid regular expression: Duplicated capturing group names

View file

@ -128,14 +128,14 @@ source: crates/oxc_linter/src/tester.rs
help: Use a quantifier: ` {2}` help: Use a quantifier: ` {2}`
⚠ eslint(no-regex-spaces): Multiple consecutive spaces are hard to count. ⚠ eslint(no-regex-spaces): Multiple consecutive spaces are hard to count.
╭─[no_regex_spaces.tsx:1:25] ╭─[no_regex_spaces.tsx:1:26]
1 │ var foo = new RegExp('\\d ') 1 │ var foo = new RegExp('\\d ')
· ── · ──
╰──── ╰────
help: Use a quantifier: ` {2}` help: Use a quantifier: ` {2}`
⚠ eslint(no-regex-spaces): Multiple consecutive spaces are hard to count. ⚠ eslint(no-regex-spaces): Multiple consecutive spaces are hard to count.
╭─[no_regex_spaces.tsx:1:25] ╭─[no_regex_spaces.tsx:1:26]
1 │ var foo = RegExp('\\u0041 ') 1 │ var foo = RegExp('\\u0041 ')
· ─── · ───
╰──── ╰────

View file

@ -4,7 +4,7 @@ use std::{env, fs, path::Path, sync::Arc};
use oxc_allocator::Allocator; use oxc_allocator::Allocator;
use oxc_ast::{ast, AstKind, Visit}; use oxc_ast::{ast, AstKind, Visit};
use oxc_parser::{ParseOptions, Parser}; use oxc_parser::{ParseOptions, Parser};
use oxc_regular_expression::{Parser as RegExpParser, ParserOptions as RegExpParserOptions}; use oxc_regular_expression::{ConstructorParser as RegExpParser, Options as RegExpParserOptions};
use oxc_span::SourceType; use oxc_span::SourceType;
// `cargo run -p oxc_parser --example regular_expression` // `cargo run -p oxc_parser --example regular_expression`
@ -62,32 +62,24 @@ impl<'a> Visit<'a> for RegularExpressionVisitor {
{ {
println!("🍀 {}", new_expr.span.source_text(&self.source_text)); println!("🍀 {}", new_expr.span.source_text(&self.source_text));
let (pattern, pattern_span) = match new_expr.arguments.first() { let pattern_span = match new_expr.arguments.first() {
Some(ast::Argument::StringLiteral(sl)) => (&sl.value, &sl.span), Some(ast::Argument::StringLiteral(sl)) => sl.span,
Some(ast::Argument::TemplateLiteral(tl))
if tl.is_no_substitution_template() =>
{
(&tl.quasi().unwrap(), &tl.span)
}
_ => return, _ => return,
}; };
let flags = match new_expr.arguments.get(1) { let flags_span = match new_expr.arguments.get(1) {
Some(ast::Argument::StringLiteral(sl)) => &sl.value, Some(ast::Argument::StringLiteral(sl)) => Some(sl.span),
Some(ast::Argument::TemplateLiteral(tl)) _ => None,
if tl.is_no_substitution_template() =>
{
&tl.quasi().unwrap()
}
_ => "",
}; };
let parsed = RegExpParser::new( let parsed = RegExpParser::new(
&allocator, &allocator,
pattern, pattern_span.source_text(&self.source_text),
RegExpParserOptions::default() flags_span.map(|span| span.source_text(&self.source_text)),
.with_span_offset(pattern_span.start + 1) RegExpParserOptions {
.with_flags(flags), pattern_span_offset: pattern_span.start,
flags_span_offset: flags_span.map_or(0, |span| span.start),
},
) )
.parse(); .parse();

View file

@ -223,10 +223,10 @@ impl<'a> ParserImpl<'a> {
} }
/// Tell lexer to read a regex /// Tell lexer to read a regex
pub(crate) fn read_regex(&mut self) -> Result<(u32, RegExpFlags)> { pub(crate) fn read_regex(&mut self) -> Result<(u32, RegExpFlags, bool)> {
let (token, pattern_end, flags) = self.lexer.next_regex(self.cur_kind())?; let (token, pattern_end, flags, flags_error) = self.lexer.next_regex(self.cur_kind())?;
self.token = token; self.token = token;
Ok((pattern_end, flags)) Ok((pattern_end, flags, flags_error))
} }
/// Tell lexer to read a template substitution tail /// Tell lexer to read a template substitution tail

View file

@ -345,17 +345,18 @@ impl<'a> ParserImpl<'a> {
pub(crate) fn parse_literal_regexp(&mut self) -> Result<RegExpLiteral<'a>> { pub(crate) fn parse_literal_regexp(&mut self) -> Result<RegExpLiteral<'a>> {
let span = self.start_span(); let span = self.start_span();
// split out pattern // split out pattern
let (pattern_end, flags) = self.read_regex()?; let (pattern_end, flags, flags_error) = self.read_regex()?;
let pattern_start = self.cur_token().start + 1; // +1 to exclude left `/` let pattern_start = self.cur_token().start + 1; // +1 to exclude left `/`
let pattern_text = &self.source_text[pattern_start as usize..pattern_end as usize]; let pattern_text = &self.source_text[pattern_start as usize..pattern_end as usize];
let flags_start = pattern_end + 1; // +1 to include right `/` let flags_start = pattern_end + 1; // +1 to include right `/`
let flags_text = &self.source_text[flags_start as usize..self.cur_token().end as usize]; let flags_text = &self.source_text[flags_start as usize..self.cur_token().end as usize];
self.bump_any(); self.bump_any();
let pattern = self // Parse pattern if options is enabled and also flags are valid
.options let pattern = (self.options.parse_regular_expression && !flags_error)
.parse_regular_expression
.then_some(()) .then_some(())
.map(|()| self.parse_regex_pattern(pattern_start, pattern_text, flags_text)) .map(|()| {
self.parse_regex_pattern(pattern_start, pattern_text, flags_start, flags_text)
})
.map_or_else( .map_or_else(
|| RegExpPattern::Raw(pattern_text), || RegExpPattern::Raw(pattern_text),
|pat| { |pat| {
@ -367,13 +368,20 @@ impl<'a> ParserImpl<'a> {
fn parse_regex_pattern( fn parse_regex_pattern(
&mut self, &mut self,
span_offset: u32, pattern_span_offset: u32,
pattern: &'a str, pattern: &'a str,
flags_span_offset: u32,
flags: &'a str, flags: &'a str,
) -> Option<Box<'a, Pattern<'a>>> { ) -> Option<Box<'a, Pattern<'a>>> {
use oxc_regular_expression::{Parser, ParserOptions}; use oxc_regular_expression::{LiteralParser, Options};
let options = ParserOptions::default().with_span_offset(span_offset).with_flags(flags); match LiteralParser::new(
match Parser::new(self.ast.allocator, pattern, options).parse() { self.ast.allocator,
pattern,
Some(flags),
Options { pattern_span_offset, flags_span_offset },
)
.parse()
{
Ok(regular_expression) => Some(self.ast.alloc(regular_expression)), Ok(regular_expression) => Some(self.ast.alloc(regular_expression)),
Err(diagnostic) => { Err(diagnostic) => {
self.error(diagnostic); self.error(diagnostic);

View file

@ -11,21 +11,21 @@ impl<'a> Lexer<'a> {
/// where a `RegularExpressionLiteral` is permitted /// where a `RegularExpressionLiteral` is permitted
/// Which means the parser needs to re-tokenize on `PrimaryExpression`, /// Which means the parser needs to re-tokenize on `PrimaryExpression`,
/// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression` /// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression`
pub(crate) fn next_regex(&mut self, kind: Kind) -> Result<(Token, u32, RegExpFlags)> { pub(crate) fn next_regex(&mut self, kind: Kind) -> Result<(Token, u32, RegExpFlags, bool)> {
self.token.start = self.offset() self.token.start = self.offset()
- match kind { - match kind {
Kind::Slash => 1, Kind::Slash => 1,
Kind::SlashEq => 2, Kind::SlashEq => 2,
_ => unreachable!(), _ => unreachable!(),
}; };
let (pattern_end, flags) = self.read_regex()?; let (pattern_end, flags, flags_error) = self.read_regex()?;
self.lookahead.clear(); self.lookahead.clear();
let token = self.finish_next(Kind::RegExp); let token = self.finish_next(Kind::RegExp);
Ok((token, pattern_end, flags)) Ok((token, pattern_end, flags, flags_error))
} }
/// 12.9.5 Regular Expression Literals /// 12.9.5 Regular Expression Literals
fn read_regex(&mut self) -> Result<(u32, RegExpFlags)> { fn read_regex(&mut self) -> Result<(u32, RegExpFlags, bool)> {
let mut in_escape = false; let mut in_escape = false;
let mut in_character_class = false; let mut in_character_class = false;
loop { loop {
@ -55,6 +55,8 @@ impl<'a> Lexer<'a> {
let pattern_end = self.offset() - 1; // -1 to exclude `/` let pattern_end = self.offset() - 1; // -1 to exclude `/`
let mut flags = RegExpFlags::empty(); let mut flags = RegExpFlags::empty();
// To prevent parsing `oxc_regular_expression` with invalid flags in the parser
let mut flags_error = false;
while let Some(b @ (b'$' | b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9')) = while let Some(b @ (b'$' | b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9')) =
self.peek_byte() self.peek_byte()
@ -65,6 +67,7 @@ impl<'a> Lexer<'a> {
b as char, b as char,
self.current_offset().expand_left(1), self.current_offset().expand_left(1),
)); ));
flags_error = true;
continue; continue;
}; };
if flags.contains(flag) { if flags.contains(flag) {
@ -72,11 +75,12 @@ impl<'a> Lexer<'a> {
b as char, b as char,
self.current_offset().expand_left(1), self.current_offset().expand_left(1),
)); ));
flags_error = true;
continue; continue;
} }
flags |= flag; flags |= flag;
} }
Ok((pattern_end, flags)) Ok((pattern_end, flags, flags_error))
} }
} }

View file

@ -20,87 +20,3 @@ pub use crate::{
options::Options, options::Options,
parser::{ConstructorParser, LiteralParser}, parser::{ConstructorParser, LiteralParser},
}; };
// LEGACY APIS TO BE REMOVED SOON! ============================================
#[derive(Clone, Copy, Debug, Default)]
pub struct ParserOptions {
pub span_offset: u32,
pub unicode_mode: bool,
pub unicode_sets_mode: bool,
pub parse_string_literal: bool,
}
impl ParserOptions {
#[must_use]
pub fn with_span_offset(self, span_offset: u32) -> Self {
ParserOptions { span_offset, ..self }
}
#[must_use]
pub fn with_flags(self, flags: &str) -> Self {
let (mut unicode_mode, mut unicode_sets_mode) = (false, false);
for ch in flags.chars() {
if ch == 'u' {
unicode_mode = true;
}
if ch == 'v' {
unicode_mode = true;
unicode_sets_mode = true;
}
}
ParserOptions { unicode_mode, unicode_sets_mode, ..self }
}
#[must_use]
pub fn with_parse_string_literal(self) -> Self {
ParserOptions { parse_string_literal: true, ..self }
}
}
pub struct Parser<'a> {
allocator: &'a oxc_allocator::Allocator,
source_text: &'a str,
options: ParserOptions,
}
impl<'a> Parser<'a> {
pub fn new(
allocator: &'a oxc_allocator::Allocator,
source_text: &'a str,
options: ParserOptions,
) -> Self {
Self { allocator, source_text, options }
}
pub fn parse(self) -> oxc_diagnostics::Result<crate::ast::Pattern<'a>> {
let ParserOptions { unicode_mode, unicode_sets_mode, span_offset, parse_string_literal } =
self.options;
let options = Options {
pattern_span_offset: span_offset,
flags_span_offset: 0, // Never be used
};
if parse_string_literal {
#[allow(clippy::match_same_arms)]
let flags_text = match (unicode_mode, unicode_sets_mode) {
(true, false) => Some("'u'"),
(false, true) => Some("'v'"),
(true, true) => Some("'v'"), // Do not validate this here
(false, false) => None,
};
ConstructorParser::new(self.allocator, self.source_text, flags_text, options).parse()
} else {
#[allow(clippy::match_same_arms)]
let flags_text = match (unicode_mode, unicode_sets_mode) {
(true, false) => Some("u"),
(false, true) => Some("v"),
(true, true) => Some("v"), // Do not validate this here
(false, false) => None,
};
LiteralParser::new(self.allocator, self.source_text, flags_text, options).parse()
}
}
}

View file

@ -132,11 +132,23 @@ impl<'a, 'ctx> Traverse<'a> for RegExp<'a, 'ctx> {
return; return;
} }
let span = regexp.span; let literal_span = regexp.span;
let pattern = match &mut regexp.regex.pattern { let pattern = match &mut regexp.regex.pattern {
RegExpPattern::Raw(raw) => { RegExpPattern::Raw(raw) => {
#[expect(clippy::cast_possible_truncation)]
let pattern_len = raw.len() as u32;
let pattern_span_start = literal_span.start + 1; // +1 to skip the opening `/`
let flags_span_start = pattern_span_start + pattern_len + 1; // +1 to skip the closing `/`
let flags_text = Span::new(flags_span_start, literal_span.end)
.source_text(self.ctx.source_text);
// Try to parse pattern // Try to parse pattern
match try_parse_pattern(raw, span, flags, ctx) { match try_parse_pattern(
raw,
pattern_span_start,
flags_text,
flags_span_start,
ctx,
) {
Ok(pattern) => { Ok(pattern) => {
regexp.regex.pattern = RegExpPattern::Pattern(ctx.alloc(pattern)); regexp.regex.pattern = RegExpPattern::Pattern(ctx.alloc(pattern));
let RegExpPattern::Pattern(pattern) = &regexp.regex.pattern else { let RegExpPattern::Pattern(pattern) = &regexp.regex.pattern else {
@ -238,14 +250,13 @@ fn character_class_has_unicode_property_escape(character_class: &CharacterClass)
fn try_parse_pattern<'a>( fn try_parse_pattern<'a>(
raw: &'a str, raw: &'a str,
span: Span, pattern_span_offset: u32,
flags: RegExpFlags, flags_text: &'a str,
flags_span_offset: u32,
ctx: &mut TraverseCtx<'a>, ctx: &mut TraverseCtx<'a>,
) -> Result<Pattern<'a>> { ) -> Result<Pattern<'a>> {
use oxc_regular_expression::{Parser, ParserOptions}; use oxc_regular_expression::{LiteralParser, Options};
let options = ParserOptions::default() let options = Options { pattern_span_offset, flags_span_offset };
.with_span_offset(span.start + 1) // exclude `/` LiteralParser::new(ctx.ast.allocator, raw, Some(flags_text), options).parse()
.with_flags(&flags.to_string());
Parser::new(ctx.ast.allocator, raw, options).parse()
} }

View file

@ -6919,12 +6919,24 @@ Expect to Parse: tasks/coverage/babel/packages/babel-parser/test/fixtures/typesc
· ──────────────── · ────────────────
╰──── ╰────
× Invalid regular expression: Invalid unicode flags combination `u` and `v`
╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/uv-error/input.js:1:6]
1 │ /a/ugv;
· ─
╰────
× The 'u' and 'v' regular expression flags cannot be enabled at the same time × The 'u' and 'v' regular expression flags cannot be enabled at the same time
╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/uv-error/input.js:1:1] ╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/uv-error/input.js:1:1]
1 │ /a/ugv; 1 │ /a/ugv;
· ────── · ──────
╰──── ╰────
× Invalid regular expression: Invalid unicode flags combination `u` and `v`
╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/vu-error/input.js:1:5]
1 │ /a/vu;
· ─
╰────
× The 'u' and 'v' regular expression flags cannot be enabled at the same time × The 'u' and 'v' regular expression flags cannot be enabled at the same time
╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/vu-error/input.js:1:1] ╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/vu-error/input.js:1:1]
1 │ /a/vu; 1 │ /a/vu;

View file

@ -1404,6 +1404,13 @@ Expect Syntax Error: tasks/coverage/test262/test/language/import/import-attribut
· ── · ──
╰──── ╰────
× Invalid regular expression: Invalid unicode flags combination `u` and `v`
╭─[test262/test/built-ins/RegExp/prototype/unicodeSets/uv-flags.js:18:5]
17 │
18 │ /./uv;
· ─
╰────
× The 'u' and 'v' regular expression flags cannot be enabled at the same time × The 'u' and 'v' regular expression flags cannot be enabled at the same time
╭─[test262/test/built-ins/RegExp/prototype/unicodeSets/uv-flags.js:18:1] ╭─[test262/test/built-ins/RegExp/prototype/unicodeSets/uv-flags.js:18:1]
17 │ 17 │

View file

@ -9,7 +9,7 @@ use oxc::{
diagnostics::OxcDiagnostic, diagnostics::OxcDiagnostic,
minifier::CompressOptions, minifier::CompressOptions,
parser::{ParseOptions, ParserReturn}, parser::{ParseOptions, ParserReturn},
regular_expression::{Parser, ParserOptions}, regular_expression::{LiteralParser, Options},
semantic::{Semantic, SemanticBuilderReturn}, semantic::{Semantic, SemanticBuilderReturn},
span::{cmp::ContentEq, SourceType, Span}, span::{cmp::ContentEq, SourceType, Span},
transformer::{TransformOptions, TransformerReturn}, transformer::{TransformOptions, TransformerReturn},
@ -166,8 +166,9 @@ impl Driver {
}; };
let printed1 = pattern.to_string(); let printed1 = pattern.to_string();
let flags = literal.regex.flags.to_string(); let flags = literal.regex.flags.to_string();
let options = ParserOptions::default().with_flags(&flags); match LiteralParser::new(&allocator, &printed1, Some(&flags), Options::default())
match Parser::new(&allocator, &printed1, options).parse() { .parse()
{
Ok(pattern2) => { Ok(pattern2) => {
let printed2 = pattern2.to_string(); let printed2 = pattern2.to_string();
if !pattern2.content_eq(pattern) { if !pattern2.content_eq(pattern) {