mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 04:08:41 +00:00
fix(regular_expression)!: Migrate to new regexp parser API (#6741)
Follow up #6635 - [x] Remove old APIs - [x] Update linter usage - [x] Update parser usage - [x] Update transformer usage
This commit is contained in:
parent
54a50322db
commit
8032813bf8
14 changed files with 187 additions and 238 deletions
|
|
@ -6,11 +6,11 @@ use oxc_macros::declare_oxc_lint;
|
|||
use oxc_regular_expression::{
|
||||
ast::{CapturingGroup, Character, Pattern},
|
||||
visit::{walk, Visit},
|
||||
Parser, ParserOptions,
|
||||
ConstructorParser, Options,
|
||||
};
|
||||
use oxc_span::{GetSpan, Span};
|
||||
use oxc_span::Span;
|
||||
|
||||
use crate::{ast_util::extract_regex_flags, context::LintContext, rule::Rule, AstNode};
|
||||
use crate::{context::LintContext, rule::Rule, AstNode};
|
||||
|
||||
fn no_control_regex_diagnostic(count: usize, regex: &str, span: Span) -> OxcDiagnostic {
|
||||
debug_assert!(count > 0);
|
||||
|
|
@ -82,75 +82,63 @@ impl Rule for NoControlRegex {
|
|||
}
|
||||
|
||||
// new RegExp()
|
||||
AstKind::NewExpression(expr) => {
|
||||
// constructor is RegExp,
|
||||
if expr.callee.is_specific_id("RegExp")
|
||||
// which is provided at least 1 parameter,
|
||||
&& expr.arguments.len() > 0
|
||||
{
|
||||
// where the first one is a string literal
|
||||
// note: improvements required for strings used via identifier
|
||||
// references
|
||||
if let Argument::StringLiteral(pattern) = &expr.arguments[0] {
|
||||
// get pattern from arguments. Missing or non-string arguments
|
||||
// will be runtime errors, but are not covered by this rule.
|
||||
parse_and_check_regex(
|
||||
context,
|
||||
&pattern.value,
|
||||
&expr.arguments,
|
||||
pattern.span,
|
||||
);
|
||||
AstKind::NewExpression(expr) if expr.callee.is_specific_id("RegExp") => {
|
||||
// note: improvements required for strings used via identifier references
|
||||
// Missing or non-string arguments will be runtime errors, but are not covered by this rule.
|
||||
match (&expr.arguments.first(), &expr.arguments.get(1)) {
|
||||
(
|
||||
Some(Argument::StringLiteral(pattern)),
|
||||
Some(Argument::StringLiteral(flags)),
|
||||
) => {
|
||||
parse_and_check_regex(context, pattern.span, Some(flags.span));
|
||||
}
|
||||
(Some(Argument::StringLiteral(pattern)), _) => {
|
||||
parse_and_check_regex(context, pattern.span, None);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// RegExp()
|
||||
AstKind::CallExpression(expr) => {
|
||||
// constructor is RegExp,
|
||||
if expr.callee.is_specific_id("RegExp")
|
||||
// which is provided at least 1 parameter,
|
||||
&& expr.arguments.len() > 0
|
||||
{
|
||||
// where the first one is a string literal
|
||||
// note: improvements required for strings used via identifier
|
||||
// references
|
||||
if let Argument::StringLiteral(pattern) = &expr.arguments[0] {
|
||||
// get pattern from arguments. Missing or non-string arguments
|
||||
// will be runtime errors, but are not covered by this rule.
|
||||
parse_and_check_regex(
|
||||
context,
|
||||
&pattern.value,
|
||||
&expr.arguments,
|
||||
pattern.span,
|
||||
);
|
||||
AstKind::CallExpression(expr) if expr.callee.is_specific_id("RegExp") => {
|
||||
// note: improvements required for strings used via identifier references
|
||||
// Missing or non-string arguments will be runtime errors, but are not covered by this rule.
|
||||
match (&expr.arguments.first(), &expr.arguments.get(1)) {
|
||||
(
|
||||
Some(Argument::StringLiteral(pattern)),
|
||||
Some(Argument::StringLiteral(flags)),
|
||||
) => {
|
||||
parse_and_check_regex(context, pattern.span, Some(flags.span));
|
||||
}
|
||||
(Some(Argument::StringLiteral(pattern)), _) => {
|
||||
parse_and_check_regex(context, pattern.span, None);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_and_check_regex<'a>(
|
||||
ctx: &LintContext<'a>,
|
||||
source_text: &'a str,
|
||||
arguments: &oxc_allocator::Vec<'a, Argument<'a>>,
|
||||
expr_span: Span,
|
||||
) {
|
||||
fn parse_and_check_regex(ctx: &LintContext, pattern_span: Span, flags_span: Option<Span>) {
|
||||
let allocator = Allocator::default();
|
||||
let flags = extract_regex_flags(arguments);
|
||||
let flags_text = flags.map_or(String::new(), |f| f.to_string());
|
||||
let parser = Parser::new(
|
||||
|
||||
let flags_text = flags_span.map(|span| span.source_text(ctx.source_text()));
|
||||
let parser = ConstructorParser::new(
|
||||
&allocator,
|
||||
source_text,
|
||||
ParserOptions::default()
|
||||
.with_span_offset(arguments.first().map_or(0, |arg| arg.span().start))
|
||||
.with_flags(&flags_text),
|
||||
pattern_span.source_text(ctx.source_text()),
|
||||
flags_text,
|
||||
Options {
|
||||
pattern_span_offset: pattern_span.start,
|
||||
flags_span_offset: flags_span.map_or(0, |span| span.start),
|
||||
},
|
||||
);
|
||||
let Ok(pattern) = parser.parse() else {
|
||||
return;
|
||||
};
|
||||
check_pattern(ctx, &pattern, expr_span);
|
||||
check_pattern(ctx, &pattern, pattern_span);
|
||||
}
|
||||
|
||||
fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) {
|
||||
|
|
@ -279,7 +267,6 @@ mod tests {
|
|||
vec![
|
||||
r"let r = /\u{0}/u",
|
||||
r"let r = new RegExp('\\u{0}', 'u');",
|
||||
r"let r = new RegExp('\\u{0}', `u`);",
|
||||
r"let r = /\u{c}/u",
|
||||
r"let r = /\u{1F}/u",
|
||||
r"let r = new RegExp('\\u{1F}', 'u');", // flags are known & contain u
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use oxc_allocator::Allocator;
|
|||
use oxc_ast::{ast::Argument, AstKind};
|
||||
use oxc_diagnostics::OxcDiagnostic;
|
||||
use oxc_macros::declare_oxc_lint;
|
||||
use oxc_regular_expression::{Parser, ParserOptions};
|
||||
use oxc_regular_expression::{ConstructorParser, Options};
|
||||
use oxc_span::Span;
|
||||
use rustc_hash::FxHashSet;
|
||||
use serde::Deserialize;
|
||||
|
|
@ -86,13 +86,20 @@ impl Rule for NoInvalidRegexp {
|
|||
return;
|
||||
}
|
||||
|
||||
let (mut u_flag_found, mut v_flag_found) = (false, false);
|
||||
// Validate flags first if exists
|
||||
if let Some((flags_span_start, flags_text)) = flags_arg {
|
||||
let (mut u_flag_found, mut v_flag_found) = (false, false);
|
||||
// `oxc_regular_expression` crate has a ability to validate flags.
|
||||
// But, it does not accept any `allow_constructor_flags` option.
|
||||
// And if we omit user defined flags here, `Span` may be incorrect on error reporting.
|
||||
if let Some(flags_span) = flags_arg {
|
||||
// Strip quotes
|
||||
let flags_text =
|
||||
flags_span.source_text(ctx.source_text()).trim_matches('\'').trim_matches('"');
|
||||
|
||||
let mut unique_flags = FxHashSet::default();
|
||||
for (idx, ch) in flags_text.char_indices() {
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
let start = flags_span_start + idx as u32;
|
||||
let start = flags_span.start + 1 + idx as u32;
|
||||
|
||||
// Invalid combination: u+v
|
||||
if ch == 'u' {
|
||||
|
|
@ -128,12 +135,23 @@ impl Rule for NoInvalidRegexp {
|
|||
// Pattern check is skipped when 1st argument is NOT a `StringLiteral`
|
||||
// e.g. `new RegExp(var)`, `RegExp("str" + var)`
|
||||
let allocator = Allocator::default();
|
||||
if let Some((pattern_span_start, pattern_text)) = pattern_arg {
|
||||
let options = ParserOptions::default()
|
||||
.with_span_offset(pattern_span_start)
|
||||
.with_flags(flags_arg.map_or("", |(_, flags_text)| flags_text));
|
||||
if let Some(pattern_span) = pattern_arg {
|
||||
let pattern_text = pattern_span.source_text(ctx.source_text());
|
||||
|
||||
match Parser::new(&allocator, pattern_text, options).parse() {
|
||||
let flags_text = match (u_flag_found, v_flag_found) {
|
||||
(true, false) => Some("'u'"),
|
||||
(_, true) => Some("'v'"),
|
||||
(false, false) => None,
|
||||
};
|
||||
|
||||
match ConstructorParser::new(
|
||||
&allocator,
|
||||
pattern_text,
|
||||
flags_text,
|
||||
Options { pattern_span_offset: pattern_span.start, flags_span_offset: 0 },
|
||||
)
|
||||
.parse()
|
||||
{
|
||||
Ok(_) => {}
|
||||
Err(diagnostic) => ctx.diagnostic(diagnostic),
|
||||
}
|
||||
|
|
@ -141,27 +159,19 @@ impl Rule for NoInvalidRegexp {
|
|||
}
|
||||
}
|
||||
|
||||
/// Returns: (span_start, text)
|
||||
/// span_start + 1 for opening string bracket.
|
||||
type ParsedArgument<'a> = (u32, &'a str);
|
||||
fn parse_arguments_to_check<'a>(
|
||||
arg1: Option<&Argument<'a>>,
|
||||
arg2: Option<&Argument<'a>>,
|
||||
) -> (Option<ParsedArgument<'a>>, Option<ParsedArgument<'a>>) {
|
||||
) -> (Option<Span>, Option<Span>) {
|
||||
match (arg1, arg2) {
|
||||
// ("pattern", "flags")
|
||||
(Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => (
|
||||
Some((pattern.span.start + 1, pattern.value.as_str())),
|
||||
Some((flags.span.start + 1, flags.value.as_str())),
|
||||
),
|
||||
(Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => {
|
||||
(Some(pattern.span), Some(flags.span))
|
||||
}
|
||||
// (pattern, "flags")
|
||||
(Some(_arg), Some(Argument::StringLiteral(flags))) => {
|
||||
(None, Some((flags.span.start + 1, flags.value.as_str())))
|
||||
}
|
||||
(Some(_arg), Some(Argument::StringLiteral(flags))) => (None, Some(flags.span)),
|
||||
// ("pattern")
|
||||
(Some(Argument::StringLiteral(pattern)), None) => {
|
||||
(Some((pattern.span.start + 1, pattern.value.as_str())), None)
|
||||
}
|
||||
(Some(Argument::StringLiteral(pattern)), None) => (Some(pattern.span), None),
|
||||
// (pattern), ()
|
||||
_ => (None, None),
|
||||
}
|
||||
|
|
@ -172,7 +182,7 @@ fn test() {
|
|||
use crate::tester::Tester;
|
||||
|
||||
let pass = vec![
|
||||
("[RegExp(''), /a/uv]", None),
|
||||
("RegExp('')", None),
|
||||
("RegExp()", None),
|
||||
("RegExp('.', 'g')", None),
|
||||
("new RegExp('.')", None),
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ use oxc_macros::declare_oxc_lint;
|
|||
use oxc_regular_expression::{
|
||||
ast::{Character, Pattern},
|
||||
visit::{RegExpAstKind, Visit},
|
||||
Parser, ParserOptions,
|
||||
ConstructorParser, Options,
|
||||
};
|
||||
use oxc_span::Span;
|
||||
|
||||
|
|
@ -63,13 +63,13 @@ impl Rule for NoRegexSpaces {
|
|||
}
|
||||
|
||||
AstKind::CallExpression(expr) if Self::is_regexp_call_expression(expr) => {
|
||||
if let Some(span) = Self::find_expr_to_report(&expr.arguments) {
|
||||
if let Some(span) = Self::find_expr_to_report(&expr.arguments, ctx) {
|
||||
ctx.diagnostic(no_regex_spaces_diagnostic(span)); // RegExp('a b')
|
||||
}
|
||||
}
|
||||
|
||||
AstKind::NewExpression(expr) if Self::is_regexp_new_expression(expr) => {
|
||||
if let Some(span) = Self::find_expr_to_report(&expr.arguments) {
|
||||
if let Some(span) = Self::find_expr_to_report(&expr.arguments, ctx) {
|
||||
ctx.diagnostic(no_regex_spaces_diagnostic(span)); // new RegExp('a b')
|
||||
}
|
||||
}
|
||||
|
|
@ -90,7 +90,7 @@ impl NoRegexSpaces {
|
|||
find_consecutive_spaces(pattern)
|
||||
}
|
||||
|
||||
fn find_expr_to_report(args: &Vec<'_, Argument<'_>>) -> Option<Span> {
|
||||
fn find_expr_to_report(args: &Vec<'_, Argument<'_>>, ctx: &LintContext) -> Option<Span> {
|
||||
if let Some(expr) = args.get(1).and_then(Argument::as_expression) {
|
||||
if !expr.is_string_literal() {
|
||||
return None; // skip on indeterminate flag, e.g. RegExp('a b', flags)
|
||||
|
|
@ -105,10 +105,11 @@ impl NoRegexSpaces {
|
|||
}
|
||||
|
||||
let alloc = Allocator::default();
|
||||
let parser = Parser::new(
|
||||
let parser = ConstructorParser::new(
|
||||
&alloc,
|
||||
pattern.value.as_str(),
|
||||
ParserOptions::default().with_span_offset(pattern.span.start + 1),
|
||||
pattern.span.source_text(ctx.source_text()),
|
||||
None,
|
||||
Options { pattern_span_offset: pattern.span.start, ..Options::default() },
|
||||
);
|
||||
let parsed_pattern = parser.parse().ok()?;
|
||||
|
||||
|
|
|
|||
|
|
@ -104,51 +104,51 @@ source: crates/oxc_linter/src/tester.rs
|
|||
╰────
|
||||
|
||||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern
|
||||
╭─[no_invalid_regexp.tsx:1:14]
|
||||
╭─[no_invalid_regexp.tsx:1:15]
|
||||
1 │ new RegExp('\\a', 'u');
|
||||
· ▲
|
||||
· ▲
|
||||
╰────
|
||||
|
||||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern
|
||||
╭─[no_invalid_regexp.tsx:1:14]
|
||||
╭─[no_invalid_regexp.tsx:1:15]
|
||||
1 │ new RegExp('\\a', 'u');
|
||||
· ▲
|
||||
· ▲
|
||||
╰────
|
||||
|
||||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
|
||||
╭─[no_invalid_regexp.tsx:1:14]
|
||||
╭─[no_invalid_regexp.tsx:1:15]
|
||||
1 │ RegExp('\\u{0}*');
|
||||
· ─
|
||||
· ─
|
||||
╰────
|
||||
|
||||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
|
||||
╭─[no_invalid_regexp.tsx:1:18]
|
||||
╭─[no_invalid_regexp.tsx:1:19]
|
||||
1 │ new RegExp('\\u{0}*');
|
||||
· ─
|
||||
· ─
|
||||
╰────
|
||||
|
||||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
|
||||
╭─[no_invalid_regexp.tsx:1:18]
|
||||
╭─[no_invalid_regexp.tsx:1:19]
|
||||
1 │ new RegExp('\\u{0}*', '');
|
||||
· ─
|
||||
· ─
|
||||
╰────
|
||||
|
||||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
|
||||
╭─[no_invalid_regexp.tsx:1:18]
|
||||
╭─[no_invalid_regexp.tsx:1:19]
|
||||
1 │ new RegExp('\\u{0}*', 'a');
|
||||
· ─
|
||||
· ─
|
||||
╰────
|
||||
|
||||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
|
||||
╭─[no_invalid_regexp.tsx:1:14]
|
||||
╭─[no_invalid_regexp.tsx:1:15]
|
||||
1 │ RegExp('\\u{0}*');
|
||||
· ─
|
||||
· ─
|
||||
╰────
|
||||
|
||||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Invalid extended atom escape
|
||||
╭─[no_invalid_regexp.tsx:1:13]
|
||||
1 │ new RegExp('\\');
|
||||
· ─
|
||||
· ──
|
||||
╰────
|
||||
|
||||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Unknown flag
|
||||
|
|
@ -196,7 +196,7 @@ source: crates/oxc_linter/src/tester.rs
|
|||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Unterminated character class
|
||||
╭─[no_invalid_regexp.tsx:1:13]
|
||||
1 │ new RegExp('[[]\\u{0}*' /* valid only with `u` flag */, 'v')
|
||||
· ────────
|
||||
· ─────────
|
||||
╰────
|
||||
|
||||
⚠ eslint(no-invalid-regexp): Invalid regular expression: Duplicated capturing group names
|
||||
|
|
|
|||
|
|
@ -128,16 +128,16 @@ source: crates/oxc_linter/src/tester.rs
|
|||
help: Use a quantifier: ` {2}`
|
||||
|
||||
⚠ eslint(no-regex-spaces): Multiple consecutive spaces are hard to count.
|
||||
╭─[no_regex_spaces.tsx:1:25]
|
||||
╭─[no_regex_spaces.tsx:1:26]
|
||||
1 │ var foo = new RegExp('\\d ')
|
||||
· ──
|
||||
· ──
|
||||
╰────
|
||||
help: Use a quantifier: ` {2}`
|
||||
|
||||
⚠ eslint(no-regex-spaces): Multiple consecutive spaces are hard to count.
|
||||
╭─[no_regex_spaces.tsx:1:25]
|
||||
╭─[no_regex_spaces.tsx:1:26]
|
||||
1 │ var foo = RegExp('\\u0041 ')
|
||||
· ───
|
||||
· ───
|
||||
╰────
|
||||
help: Use a quantifier: ` {3}`
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use std::{env, fs, path::Path, sync::Arc};
|
|||
use oxc_allocator::Allocator;
|
||||
use oxc_ast::{ast, AstKind, Visit};
|
||||
use oxc_parser::{ParseOptions, Parser};
|
||||
use oxc_regular_expression::{Parser as RegExpParser, ParserOptions as RegExpParserOptions};
|
||||
use oxc_regular_expression::{ConstructorParser as RegExpParser, Options as RegExpParserOptions};
|
||||
use oxc_span::SourceType;
|
||||
|
||||
// `cargo run -p oxc_parser --example regular_expression`
|
||||
|
|
@ -62,32 +62,24 @@ impl<'a> Visit<'a> for RegularExpressionVisitor {
|
|||
{
|
||||
println!("🍀 {}", new_expr.span.source_text(&self.source_text));
|
||||
|
||||
let (pattern, pattern_span) = match new_expr.arguments.first() {
|
||||
Some(ast::Argument::StringLiteral(sl)) => (&sl.value, &sl.span),
|
||||
Some(ast::Argument::TemplateLiteral(tl))
|
||||
if tl.is_no_substitution_template() =>
|
||||
{
|
||||
(&tl.quasi().unwrap(), &tl.span)
|
||||
}
|
||||
let pattern_span = match new_expr.arguments.first() {
|
||||
Some(ast::Argument::StringLiteral(sl)) => sl.span,
|
||||
_ => return,
|
||||
};
|
||||
|
||||
let flags = match new_expr.arguments.get(1) {
|
||||
Some(ast::Argument::StringLiteral(sl)) => &sl.value,
|
||||
Some(ast::Argument::TemplateLiteral(tl))
|
||||
if tl.is_no_substitution_template() =>
|
||||
{
|
||||
&tl.quasi().unwrap()
|
||||
}
|
||||
_ => "",
|
||||
let flags_span = match new_expr.arguments.get(1) {
|
||||
Some(ast::Argument::StringLiteral(sl)) => Some(sl.span),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let parsed = RegExpParser::new(
|
||||
&allocator,
|
||||
pattern,
|
||||
RegExpParserOptions::default()
|
||||
.with_span_offset(pattern_span.start + 1)
|
||||
.with_flags(flags),
|
||||
pattern_span.source_text(&self.source_text),
|
||||
flags_span.map(|span| span.source_text(&self.source_text)),
|
||||
RegExpParserOptions {
|
||||
pattern_span_offset: pattern_span.start,
|
||||
flags_span_offset: flags_span.map_or(0, |span| span.start),
|
||||
},
|
||||
)
|
||||
.parse();
|
||||
|
||||
|
|
|
|||
|
|
@ -223,10 +223,10 @@ impl<'a> ParserImpl<'a> {
|
|||
}
|
||||
|
||||
/// Tell lexer to read a regex
|
||||
pub(crate) fn read_regex(&mut self) -> Result<(u32, RegExpFlags)> {
|
||||
let (token, pattern_end, flags) = self.lexer.next_regex(self.cur_kind())?;
|
||||
pub(crate) fn read_regex(&mut self) -> Result<(u32, RegExpFlags, bool)> {
|
||||
let (token, pattern_end, flags, flags_error) = self.lexer.next_regex(self.cur_kind())?;
|
||||
self.token = token;
|
||||
Ok((pattern_end, flags))
|
||||
Ok((pattern_end, flags, flags_error))
|
||||
}
|
||||
|
||||
/// Tell lexer to read a template substitution tail
|
||||
|
|
|
|||
|
|
@ -345,17 +345,18 @@ impl<'a> ParserImpl<'a> {
|
|||
pub(crate) fn parse_literal_regexp(&mut self) -> Result<RegExpLiteral<'a>> {
|
||||
let span = self.start_span();
|
||||
// split out pattern
|
||||
let (pattern_end, flags) = self.read_regex()?;
|
||||
let (pattern_end, flags, flags_error) = self.read_regex()?;
|
||||
let pattern_start = self.cur_token().start + 1; // +1 to exclude left `/`
|
||||
let pattern_text = &self.source_text[pattern_start as usize..pattern_end as usize];
|
||||
let flags_start = pattern_end + 1; // +1 to include right `/`
|
||||
let flags_text = &self.source_text[flags_start as usize..self.cur_token().end as usize];
|
||||
self.bump_any();
|
||||
let pattern = self
|
||||
.options
|
||||
.parse_regular_expression
|
||||
// Parse pattern if options is enabled and also flags are valid
|
||||
let pattern = (self.options.parse_regular_expression && !flags_error)
|
||||
.then_some(())
|
||||
.map(|()| self.parse_regex_pattern(pattern_start, pattern_text, flags_text))
|
||||
.map(|()| {
|
||||
self.parse_regex_pattern(pattern_start, pattern_text, flags_start, flags_text)
|
||||
})
|
||||
.map_or_else(
|
||||
|| RegExpPattern::Raw(pattern_text),
|
||||
|pat| {
|
||||
|
|
@ -367,13 +368,20 @@ impl<'a> ParserImpl<'a> {
|
|||
|
||||
fn parse_regex_pattern(
|
||||
&mut self,
|
||||
span_offset: u32,
|
||||
pattern_span_offset: u32,
|
||||
pattern: &'a str,
|
||||
flags_span_offset: u32,
|
||||
flags: &'a str,
|
||||
) -> Option<Box<'a, Pattern<'a>>> {
|
||||
use oxc_regular_expression::{Parser, ParserOptions};
|
||||
let options = ParserOptions::default().with_span_offset(span_offset).with_flags(flags);
|
||||
match Parser::new(self.ast.allocator, pattern, options).parse() {
|
||||
use oxc_regular_expression::{LiteralParser, Options};
|
||||
match LiteralParser::new(
|
||||
self.ast.allocator,
|
||||
pattern,
|
||||
Some(flags),
|
||||
Options { pattern_span_offset, flags_span_offset },
|
||||
)
|
||||
.parse()
|
||||
{
|
||||
Ok(regular_expression) => Some(self.ast.alloc(regular_expression)),
|
||||
Err(diagnostic) => {
|
||||
self.error(diagnostic);
|
||||
|
|
|
|||
|
|
@ -11,21 +11,21 @@ impl<'a> Lexer<'a> {
|
|||
/// where a `RegularExpressionLiteral` is permitted
|
||||
/// Which means the parser needs to re-tokenize on `PrimaryExpression`,
|
||||
/// `RegularExpressionLiteral` only appear on the right hand side of `PrimaryExpression`
|
||||
pub(crate) fn next_regex(&mut self, kind: Kind) -> Result<(Token, u32, RegExpFlags)> {
|
||||
pub(crate) fn next_regex(&mut self, kind: Kind) -> Result<(Token, u32, RegExpFlags, bool)> {
|
||||
self.token.start = self.offset()
|
||||
- match kind {
|
||||
Kind::Slash => 1,
|
||||
Kind::SlashEq => 2,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let (pattern_end, flags) = self.read_regex()?;
|
||||
let (pattern_end, flags, flags_error) = self.read_regex()?;
|
||||
self.lookahead.clear();
|
||||
let token = self.finish_next(Kind::RegExp);
|
||||
Ok((token, pattern_end, flags))
|
||||
Ok((token, pattern_end, flags, flags_error))
|
||||
}
|
||||
|
||||
/// 12.9.5 Regular Expression Literals
|
||||
fn read_regex(&mut self) -> Result<(u32, RegExpFlags)> {
|
||||
fn read_regex(&mut self) -> Result<(u32, RegExpFlags, bool)> {
|
||||
let mut in_escape = false;
|
||||
let mut in_character_class = false;
|
||||
loop {
|
||||
|
|
@ -55,6 +55,8 @@ impl<'a> Lexer<'a> {
|
|||
|
||||
let pattern_end = self.offset() - 1; // -1 to exclude `/`
|
||||
let mut flags = RegExpFlags::empty();
|
||||
// To prevent parsing `oxc_regular_expression` with invalid flags in the parser
|
||||
let mut flags_error = false;
|
||||
|
||||
while let Some(b @ (b'$' | b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9')) =
|
||||
self.peek_byte()
|
||||
|
|
@ -65,6 +67,7 @@ impl<'a> Lexer<'a> {
|
|||
b as char,
|
||||
self.current_offset().expand_left(1),
|
||||
));
|
||||
flags_error = true;
|
||||
continue;
|
||||
};
|
||||
if flags.contains(flag) {
|
||||
|
|
@ -72,11 +75,12 @@ impl<'a> Lexer<'a> {
|
|||
b as char,
|
||||
self.current_offset().expand_left(1),
|
||||
));
|
||||
flags_error = true;
|
||||
continue;
|
||||
}
|
||||
flags |= flag;
|
||||
}
|
||||
|
||||
Ok((pattern_end, flags))
|
||||
Ok((pattern_end, flags, flags_error))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,87 +20,3 @@ pub use crate::{
|
|||
options::Options,
|
||||
parser::{ConstructorParser, LiteralParser},
|
||||
};
|
||||
|
||||
// LEGACY APIS TO BE REMOVED SOON! ============================================
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
pub struct ParserOptions {
|
||||
pub span_offset: u32,
|
||||
pub unicode_mode: bool,
|
||||
pub unicode_sets_mode: bool,
|
||||
pub parse_string_literal: bool,
|
||||
}
|
||||
|
||||
impl ParserOptions {
|
||||
#[must_use]
|
||||
pub fn with_span_offset(self, span_offset: u32) -> Self {
|
||||
ParserOptions { span_offset, ..self }
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_flags(self, flags: &str) -> Self {
|
||||
let (mut unicode_mode, mut unicode_sets_mode) = (false, false);
|
||||
for ch in flags.chars() {
|
||||
if ch == 'u' {
|
||||
unicode_mode = true;
|
||||
}
|
||||
if ch == 'v' {
|
||||
unicode_mode = true;
|
||||
unicode_sets_mode = true;
|
||||
}
|
||||
}
|
||||
|
||||
ParserOptions { unicode_mode, unicode_sets_mode, ..self }
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_parse_string_literal(self) -> Self {
|
||||
ParserOptions { parse_string_literal: true, ..self }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Parser<'a> {
|
||||
allocator: &'a oxc_allocator::Allocator,
|
||||
source_text: &'a str,
|
||||
options: ParserOptions,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(
|
||||
allocator: &'a oxc_allocator::Allocator,
|
||||
source_text: &'a str,
|
||||
options: ParserOptions,
|
||||
) -> Self {
|
||||
Self { allocator, source_text, options }
|
||||
}
|
||||
|
||||
pub fn parse(self) -> oxc_diagnostics::Result<crate::ast::Pattern<'a>> {
|
||||
let ParserOptions { unicode_mode, unicode_sets_mode, span_offset, parse_string_literal } =
|
||||
self.options;
|
||||
|
||||
let options = Options {
|
||||
pattern_span_offset: span_offset,
|
||||
flags_span_offset: 0, // Never be used
|
||||
};
|
||||
|
||||
if parse_string_literal {
|
||||
#[allow(clippy::match_same_arms)]
|
||||
let flags_text = match (unicode_mode, unicode_sets_mode) {
|
||||
(true, false) => Some("'u'"),
|
||||
(false, true) => Some("'v'"),
|
||||
(true, true) => Some("'v'"), // Do not validate this here
|
||||
(false, false) => None,
|
||||
};
|
||||
ConstructorParser::new(self.allocator, self.source_text, flags_text, options).parse()
|
||||
} else {
|
||||
#[allow(clippy::match_same_arms)]
|
||||
let flags_text = match (unicode_mode, unicode_sets_mode) {
|
||||
(true, false) => Some("u"),
|
||||
(false, true) => Some("v"),
|
||||
(true, true) => Some("v"), // Do not validate this here
|
||||
(false, false) => None,
|
||||
};
|
||||
LiteralParser::new(self.allocator, self.source_text, flags_text, options).parse()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -132,11 +132,23 @@ impl<'a, 'ctx> Traverse<'a> for RegExp<'a, 'ctx> {
|
|||
return;
|
||||
}
|
||||
|
||||
let span = regexp.span;
|
||||
let literal_span = regexp.span;
|
||||
let pattern = match &mut regexp.regex.pattern {
|
||||
RegExpPattern::Raw(raw) => {
|
||||
#[expect(clippy::cast_possible_truncation)]
|
||||
let pattern_len = raw.len() as u32;
|
||||
let pattern_span_start = literal_span.start + 1; // +1 to skip the opening `/`
|
||||
let flags_span_start = pattern_span_start + pattern_len + 1; // +1 to skip the closing `/`
|
||||
let flags_text = Span::new(flags_span_start, literal_span.end)
|
||||
.source_text(self.ctx.source_text);
|
||||
// Try to parse pattern
|
||||
match try_parse_pattern(raw, span, flags, ctx) {
|
||||
match try_parse_pattern(
|
||||
raw,
|
||||
pattern_span_start,
|
||||
flags_text,
|
||||
flags_span_start,
|
||||
ctx,
|
||||
) {
|
||||
Ok(pattern) => {
|
||||
regexp.regex.pattern = RegExpPattern::Pattern(ctx.alloc(pattern));
|
||||
let RegExpPattern::Pattern(pattern) = ®exp.regex.pattern else {
|
||||
|
|
@ -238,14 +250,13 @@ fn character_class_has_unicode_property_escape(character_class: &CharacterClass)
|
|||
|
||||
fn try_parse_pattern<'a>(
|
||||
raw: &'a str,
|
||||
span: Span,
|
||||
flags: RegExpFlags,
|
||||
pattern_span_offset: u32,
|
||||
flags_text: &'a str,
|
||||
flags_span_offset: u32,
|
||||
ctx: &mut TraverseCtx<'a>,
|
||||
) -> Result<Pattern<'a>> {
|
||||
use oxc_regular_expression::{Parser, ParserOptions};
|
||||
use oxc_regular_expression::{LiteralParser, Options};
|
||||
|
||||
let options = ParserOptions::default()
|
||||
.with_span_offset(span.start + 1) // exclude `/`
|
||||
.with_flags(&flags.to_string());
|
||||
Parser::new(ctx.ast.allocator, raw, options).parse()
|
||||
let options = Options { pattern_span_offset, flags_span_offset };
|
||||
LiteralParser::new(ctx.ast.allocator, raw, Some(flags_text), options).parse()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6919,12 +6919,24 @@ Expect to Parse: tasks/coverage/babel/packages/babel-parser/test/fixtures/typesc
|
|||
· ────────────────
|
||||
╰────
|
||||
|
||||
× Invalid regular expression: Invalid unicode flags combination `u` and `v`
|
||||
╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/uv-error/input.js:1:6]
|
||||
1 │ /a/ugv;
|
||||
· ─
|
||||
╰────
|
||||
|
||||
× The 'u' and 'v' regular expression flags cannot be enabled at the same time
|
||||
╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/uv-error/input.js:1:1]
|
||||
1 │ /a/ugv;
|
||||
· ──────
|
||||
╰────
|
||||
|
||||
× Invalid regular expression: Invalid unicode flags combination `u` and `v`
|
||||
╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/vu-error/input.js:1:5]
|
||||
1 │ /a/vu;
|
||||
· ─
|
||||
╰────
|
||||
|
||||
× The 'u' and 'v' regular expression flags cannot be enabled at the same time
|
||||
╭─[babel/packages/babel-parser/test/fixtures/es2024/regexp-unicode-sets/vu-error/input.js:1:1]
|
||||
1 │ /a/vu;
|
||||
|
|
|
|||
|
|
@ -1404,6 +1404,13 @@ Expect Syntax Error: tasks/coverage/test262/test/language/import/import-attribut
|
|||
· ──
|
||||
╰────
|
||||
|
||||
× Invalid regular expression: Invalid unicode flags combination `u` and `v`
|
||||
╭─[test262/test/built-ins/RegExp/prototype/unicodeSets/uv-flags.js:18:5]
|
||||
17 │
|
||||
18 │ /./uv;
|
||||
· ─
|
||||
╰────
|
||||
|
||||
× The 'u' and 'v' regular expression flags cannot be enabled at the same time
|
||||
╭─[test262/test/built-ins/RegExp/prototype/unicodeSets/uv-flags.js:18:1]
|
||||
17 │
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ use oxc::{
|
|||
diagnostics::OxcDiagnostic,
|
||||
minifier::CompressOptions,
|
||||
parser::{ParseOptions, ParserReturn},
|
||||
regular_expression::{Parser, ParserOptions},
|
||||
regular_expression::{LiteralParser, Options},
|
||||
semantic::{Semantic, SemanticBuilderReturn},
|
||||
span::{cmp::ContentEq, SourceType, Span},
|
||||
transformer::{TransformOptions, TransformerReturn},
|
||||
|
|
@ -166,8 +166,9 @@ impl Driver {
|
|||
};
|
||||
let printed1 = pattern.to_string();
|
||||
let flags = literal.regex.flags.to_string();
|
||||
let options = ParserOptions::default().with_flags(&flags);
|
||||
match Parser::new(&allocator, &printed1, options).parse() {
|
||||
match LiteralParser::new(&allocator, &printed1, Some(&flags), Options::default())
|
||||
.parse()
|
||||
{
|
||||
Ok(pattern2) => {
|
||||
let printed2 = pattern2.to_string();
|
||||
if !pattern2.content_eq(pattern) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue