oxc/crates/oxc_linter/src/rules/eslint/no_invalid_regexp.rs
leaysgur 8032813bf8 fix(regular_expression)!: Migrate to new regexp parser API (#6741)
Follow up #6635

- [x] Remove old APIs
- [x] Update linter usage
- [x] Update parser usage
- [x] Update transformer usage
2024-10-22 05:34:18 +00:00

344 lines
14 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use oxc_allocator::Allocator;
use oxc_ast::{ast::Argument, AstKind};
use oxc_diagnostics::OxcDiagnostic;
use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{ConstructorParser, Options};
use oxc_span::Span;
use rustc_hash::FxHashSet;
use serde::Deserialize;
use crate::{context::LintContext, rule::Rule, AstNode};
// Use the same prefix with `oxc_regular_expression` crate
fn duplicated_flag_diagnostic(span: Span) -> OxcDiagnostic {
OxcDiagnostic::warn("Invalid regular expression: Duplicated flag").with_label(span)
}
fn unknown_flag_diagnostic(span: Span) -> OxcDiagnostic {
OxcDiagnostic::warn("Invalid regular expression: Unknown flag").with_label(span)
}
fn invalid_unicode_flags_diagnostic(span: Span) -> OxcDiagnostic {
OxcDiagnostic::warn("Invalid regular expression: `u` and `v` flags should be used alone")
.with_label(span)
}
#[derive(Debug, Default, Clone)]
pub struct NoInvalidRegexp(Box<NoInvalidRegexpConfig>);
declare_oxc_lint!(
/// ### What it does
/// Disallow invalid regular expression strings in RegExp constructors.
///
/// ### Why is this bad?
/// An invalid pattern in a regular expression literal is a SyntaxError when the code is parsed,
/// but an invalid string in RegExp constructors throws a SyntaxError only when the code is executed.
///
/// ### Examples
///
/// Examples of **incorrect** code for this rule:
/// ```js
/// RegExp('[')
/// RegExp('.', 'z')
/// new RegExp('\\')
/// ```
///
/// Examples of **correct** code for this rule:
/// ```js
/// RegExp('.')
/// new RegExp
/// this.RegExp('[')
/// ```
NoInvalidRegexp,
correctness,
);
#[derive(Debug, Clone, Deserialize, Default)]
struct NoInvalidRegexpConfig {
#[serde(default, rename = "allowConstructorFlags")]
/// Case-sensitive array of flags.
allow_constructor_flags: Vec<char>,
}
impl Rule for NoInvalidRegexp {
fn from_configuration(value: serde_json::Value) -> Self {
value
.as_array()
.and_then(|arr| arr.first())
.and_then(|value| serde_json::from_value(value.clone()).ok())
.map_or_else(Self::default, |value| Self(Box::new(value)))
}
fn run<'a>(&self, node: &AstNode<'a>, ctx: &LintContext<'a>) {
let (pattern_arg, flags_arg) = match node.kind() {
AstKind::NewExpression(expr) if expr.callee.is_specific_id("RegExp") => {
parse_arguments_to_check(expr.arguments.first(), expr.arguments.get(1))
}
AstKind::CallExpression(expr) if expr.callee.is_specific_id("RegExp") => {
parse_arguments_to_check(expr.arguments.first(), expr.arguments.get(1))
}
// Other kinds, skip
_ => return,
};
// No arguments, skip
if pattern_arg.is_none() && flags_arg.is_none() {
return;
}
let (mut u_flag_found, mut v_flag_found) = (false, false);
// Validate flags first if exists
// `oxc_regular_expression` crate has a ability to validate flags.
// But, it does not accept any `allow_constructor_flags` option.
// And if we omit user defined flags here, `Span` may be incorrect on error reporting.
if let Some(flags_span) = flags_arg {
// Strip quotes
let flags_text =
flags_span.source_text(ctx.source_text()).trim_matches('\'').trim_matches('"');
let mut unique_flags = FxHashSet::default();
for (idx, ch) in flags_text.char_indices() {
#[allow(clippy::cast_possible_truncation)]
let start = flags_span.start + 1 + idx as u32;
// Invalid combination: u+v
if ch == 'u' {
if v_flag_found {
return ctx
.diagnostic(invalid_unicode_flags_diagnostic(Span::new(start, start)));
}
u_flag_found = true;
}
if ch == 'v' {
if u_flag_found {
return ctx
.diagnostic(invalid_unicode_flags_diagnostic(Span::new(start, start)));
}
v_flag_found = true;
}
// Duplicated: user defined, invalid or valid
if !unique_flags.insert(ch) {
return ctx.diagnostic(duplicated_flag_diagnostic(Span::new(start, start)));
}
// Unknown: not valid, not user defined
if !(matches!(ch, 'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y')
|| self.0.allow_constructor_flags.contains(&ch))
{
return ctx.diagnostic(unknown_flag_diagnostic(Span::new(start, start)));
}
}
}
// Then, validate pattern if exists
// Pattern check is skipped when 1st argument is NOT a `StringLiteral`
// e.g. `new RegExp(var)`, `RegExp("str" + var)`
let allocator = Allocator::default();
if let Some(pattern_span) = pattern_arg {
let pattern_text = pattern_span.source_text(ctx.source_text());
let flags_text = match (u_flag_found, v_flag_found) {
(true, false) => Some("'u'"),
(_, true) => Some("'v'"),
(false, false) => None,
};
match ConstructorParser::new(
&allocator,
pattern_text,
flags_text,
Options { pattern_span_offset: pattern_span.start, flags_span_offset: 0 },
)
.parse()
{
Ok(_) => {}
Err(diagnostic) => ctx.diagnostic(diagnostic),
}
}
}
}
fn parse_arguments_to_check<'a>(
arg1: Option<&Argument<'a>>,
arg2: Option<&Argument<'a>>,
) -> (Option<Span>, Option<Span>) {
match (arg1, arg2) {
// ("pattern", "flags")
(Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => {
(Some(pattern.span), Some(flags.span))
}
// (pattern, "flags")
(Some(_arg), Some(Argument::StringLiteral(flags))) => (None, Some(flags.span)),
// ("pattern")
(Some(Argument::StringLiteral(pattern)), None) => (Some(pattern.span), None),
// (pattern), ()
_ => (None, None),
}
}
#[test]
fn test() {
use crate::tester::Tester;
let pass = vec![
("RegExp('')", None),
("RegExp()", None),
("RegExp('.', 'g')", None),
("new RegExp('.')", None),
("new RegExp", None),
("new RegExp('.', 'im')", None),
("global.RegExp('\\\\')", None),
("new RegExp('.', y)", None),
("new RegExp('.', 'y')", None),
("new RegExp('.', 'u')", None),
("new RegExp('.', 'yu')", None),
("new RegExp('/', 'yu')", None),
("new RegExp('\\/', 'yu')", None),
("new RegExp('\\\\u{65}', 'u')", None),
("new RegExp('\\\\u{65}*', 'u')", None),
("new RegExp('[\\\\u{0}-\\\\u{1F}]', 'u')", None),
("new RegExp('.', 's')", None),
("new RegExp('(?<=a)b')", None),
("new RegExp('(?<!a)b')", None),
("new RegExp('(?<a>b)\\k<a>')", None),
("new RegExp('(?<a>b)\\k<a>', 'u')", None),
("new RegExp('\\\\p{Letter}', 'u')", None),
// unknown flags
("RegExp('{', flags)", None),
("new RegExp('{', flags)", None),
("RegExp('\\\\u{0}*', flags)", None),
("new RegExp('\\\\u{0}*', flags)", None),
("RegExp('{', flags)", Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }]))),
(
"RegExp('\\\\u{0}*', flags)",
Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])),
),
// unknown pattern
("new RegExp(pattern, 'g')", None),
("new RegExp('.' + '', 'g')", None),
("new RegExp(pattern, '')", None),
("new RegExp(pattern)", None),
// ES2020
("new RegExp('(?<\\\\ud835\\\\udc9c>.)', 'g')", None),
("new RegExp('(?<\\\\u{1d49c}>.)', 'g')", None),
("new RegExp('(?<𝒜>.)', 'g');", None),
("new RegExp('\\\\p{Script=Nandinagari}', 'u');", None),
// ES2022
("new RegExp('a+(?<Z>z)?', 'd')", None),
("new RegExp('\\\\p{Script=Cpmn}', 'u')", None),
("new RegExp('\\\\p{Script=Cypro_Minoan}', 'u')", None),
("new RegExp('\\\\p{Script=Old_Uyghur}', 'u')", None),
("new RegExp('\\\\p{Script=Ougr}', 'u')", None),
("new RegExp('\\\\p{Script=Tangsa}', 'u')", None),
("new RegExp('\\\\p{Script=Tnsa}', 'u')", None),
("new RegExp('\\\\p{Script=Toto}', 'u')", None),
("new RegExp('\\\\p{Script=Vith}', 'u')", None),
("new RegExp('\\\\p{Script=Vithkuqi}', 'u')", None),
// ES2024
("new RegExp('[A--B]', 'v')", None),
("new RegExp('[A&&B]', 'v')", None),
("new RegExp('[A--[0-9]]', 'v')", None),
("new RegExp('[\\\\p{Basic_Emoji}--\\\\q{a|bc|def}]', 'v')", None),
("new RegExp('[A--B]', flags)", None),
("new RegExp('[[]\\\\u{0}*', flags)", None),
// ES2025
// ("new RegExp('((?<k>a)|(?<k>b))')", None),
// allowConstructorFlags
("new RegExp('.', 'g')", Some(serde_json::json!([{ "allowConstructorFlags": [] }]))),
("new RegExp('.', 'g')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
("new RegExp('.', 'a')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
("new RegExp('.', 'ag')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
("new RegExp('.', 'ga')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
(
"new RegExp(pattern, 'ga')",
Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])),
),
(
"new RegExp('.' + '', 'ga')",
Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])),
),
(
"new RegExp('.', 'a')",
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
),
(
"new RegExp('.', 'z')",
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
),
(
"new RegExp('.', 'az')",
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
),
(
"new RegExp('.', 'za')",
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
),
(
"new RegExp('.', 'agz')",
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
),
];
let fail = vec![
("RegExp('[');", None),
("RegExp('.', 'z');", None),
("RegExp('.', 'a');", Some(serde_json::json!([{}]))),
("new RegExp('.', 'a');", Some(serde_json::json!([{ "allowConstructorFlags": [] }]))),
("new RegExp('.', 'z');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
("RegExp('.', 'a');", Some(serde_json::json!([{ "allowConstructorFlags": ["A"] }]))),
("RegExp('.', 'A');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
("new RegExp('.', 'az');", Some(serde_json::json!([{ "allowConstructorFlags": ["z"] }]))),
("new RegExp('.', 'aa');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
(
"new RegExp('.', 'aa');",
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "a"] }])),
),
("new RegExp('.', 'aA');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
(
"new RegExp('.', 'aaz');",
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
),
(
"new RegExp('.', 'azz');",
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
),
("new RegExp('.', 'aga');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
("new RegExp('.', 'uu');", Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }]))),
("new RegExp('.', 'ouo');", Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }]))),
("new RegExp(')');", None),
("new RegExp('\\\\a', 'u');", None),
(
"new RegExp('\\\\a', 'u');",
Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }])),
),
("RegExp('\\\\u{0}*');", None),
("new RegExp('\\\\u{0}*');", None),
("new RegExp('\\\\u{0}*', '');", None),
(
"new RegExp('\\\\u{0}*', 'a');",
Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])),
),
("RegExp('\\\\u{0}*');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
("new RegExp('\\\\');", None),
("RegExp(')' + '', 'a');", None),
(
"new RegExp('.' + '', 'az');",
Some(serde_json::json!([{ "allowConstructorFlags": ["z"] }])),
),
(
"new RegExp(pattern, 'az');",
Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])),
),
// ES2024
("new RegExp('[[]', 'v');", None),
("new RegExp('.', 'uv');", None),
("new RegExp(pattern, 'uv');", None),
("new RegExp('[A--B]' /* valid only with `v` flag */, 'u')", None),
("new RegExp('[[]\\\\u{0}*' /* valid only with `u` flag */, 'v')", None),
// ES2025
("new RegExp('(?<k>a)(?<k>b)')", None),
];
Tester::new(NoInvalidRegexp::NAME, pass, fail).test_and_snapshot();
}