From 6d5a9f2ee06d5dacd16f163c870449e83f7c3f1b Mon Sep 17 00:00:00 2001 From: DonIsaac <22823424+DonIsaac@users.noreply.github.com> Date: Sun, 13 Oct 2024 19:34:18 +0000 Subject: [PATCH] fix(linter/no-control-regex): allow capture group references (#6529) Closes #6525. Allows `\1`, `\2`, etc. when referencing a capturing group. Examples of now-allowed patterns: ```js // both of these have a capture group with an index of 1 const r = /([a-z]+)\1/; const r = /\1([a-z]+)/; ``` Examples of still-banned patterns: ```js /([a-z])\0/; //out of range: capture groups are 1-indexed /([a-z])\2/; // there's only one capture group here ``` --- .../src/rules/eslint/no_control_regex.rs | 62 +++++++++++++++++-- ..._control_regex@capture-group-indexing.snap | 30 +++++++++ 2 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 crates/oxc_linter/src/snapshots/no_control_regex@capture-group-indexing.snap diff --git a/crates/oxc_linter/src/rules/eslint/no_control_regex.rs b/crates/oxc_linter/src/rules/eslint/no_control_regex.rs index 80b610a04..0e0b0bba2 100644 --- a/crates/oxc_linter/src/rules/eslint/no_control_regex.rs +++ b/crates/oxc_linter/src/rules/eslint/no_control_regex.rs @@ -1,10 +1,11 @@ +use itertools::Itertools as _; use oxc_allocator::Allocator; use oxc_ast::{ast::Argument, AstKind}; use oxc_diagnostics::OxcDiagnostic; use oxc_macros::declare_oxc_lint; use oxc_regular_expression::{ - ast::{Character, Pattern}, - visit::Visit, + ast::{CapturingGroup, Character, Pattern}, + visit::{walk, Visit}, Parser, ParserOptions, }; use oxc_span::{GetSpan, Span}; @@ -138,20 +139,45 @@ fn parse_and_check_regex<'a>( } fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) { - let mut finder = ControlCharacterFinder { control_chars: Vec::new() }; + let mut finder = ControlCharacterFinder::default(); finder.visit_pattern(pattern); if !finder.control_chars.is_empty() { - let violations = finder.control_chars.join(", "); + let violations = finder.control_chars.into_iter().map(|c| c.to_string()).join(", "); context.diagnostic(no_control_regex_diagnostic(&violations, span)); } } +#[derive(Default)] struct ControlCharacterFinder { - control_chars: Vec, + control_chars: Vec, + num_capture_groups: u32, } impl<'a> Visit<'a> for ControlCharacterFinder { + fn visit_pattern(&mut self, it: &Pattern<'a>) { + walk::walk_pattern(self, it); + // \1, \2, etc. are sometimes valid "control" characters as they can be + // used to reference values from capturing groups. Note in this case + // they're not actually control characters. However, if there's no + // corresponding capturing group, they _are_ invalid control characters. + // + // Some important notes: + // 1. Capture groups are 1-indexed. + // 2. Capture groups can be nested. + // 3. Capture groups may be referenced before they are defined. This is + // why we need to do this check here, instead of filtering inside of + // visit_character. + if self.num_capture_groups > 0 && !self.control_chars.is_empty() { + let control_chars = std::mem::take(&mut self.control_chars); + let control_chars = control_chars + .into_iter() + .filter(|c| !(c.value > 0x01 && c.value <= self.num_capture_groups)) + .collect::>(); + self.control_chars = control_chars; + } + } + fn visit_character(&mut self, ch: &Character) { // Control characters are in the range 0x00 to 0x1F if ch.value <= 0x1F && @@ -163,9 +189,14 @@ impl<'a> Visit<'a> for ControlCharacterFinder { ch.value != 0x0D { // TODO: check if starts with \x or \u when char spans work correctly - self.control_chars.push(ch.to_string()); + self.control_chars.push(*ch); } } + + fn visit_capturing_group(&mut self, group: &CapturingGroup<'a>) { + self.num_capture_groups += 1; + walk::walk_capturing_group(self, group); + } } #[cfg(test)] @@ -239,6 +270,25 @@ mod tests { .test(); } + #[test] + fn test_capture_group_indexing() { + // https://github.com/oxc-project/oxc/issues/6525 + let pass = vec![ + r#"const filename = /filename[^;=\n]=((['"]).?\2|[^;\n]*)/;"#, + r"const r = /([a-z])\1/;", + r"const r = /\1([a-z])/;", + ]; + let fail = vec![ + r"const r = /\0/;", + r"const r = /[a-z]\1/;", + r"const r = /([a-z])\2/;", + r"const r = /([a-z])\0/;", + ]; + Tester::new(NoControlRegex::NAME, pass, fail) + .with_snapshot_suffix("capture-group-indexing") + .test_and_snapshot(); + } + #[test] fn test() { // test cases taken from eslint. See: diff --git a/crates/oxc_linter/src/snapshots/no_control_regex@capture-group-indexing.snap b/crates/oxc_linter/src/snapshots/no_control_regex@capture-group-indexing.snap new file mode 100644 index 000000000..3424f2119 --- /dev/null +++ b/crates/oxc_linter/src/snapshots/no_control_regex@capture-group-indexing.snap @@ -0,0 +1,30 @@ +--- +source: crates/oxc_linter/src/tester.rs +--- + ⚠ eslint(no-control-regex): Unexpected control character(s) + ╭─[no_control_regex.tsx:1:11] + 1 │ const r = /\0/; + · ──── + ╰──── + help: Unexpected control character(s) in regular expression: "\0" + + ⚠ eslint(no-control-regex): Unexpected control character(s) + ╭─[no_control_regex.tsx:1:11] + 1 │ const r = /[a-z]\1/; + · ───────── + ╰──── + help: Unexpected control character(s) in regular expression: "\1" + + ⚠ eslint(no-control-regex): Unexpected control character(s) + ╭─[no_control_regex.tsx:1:11] + 1 │ const r = /([a-z])\2/; + · ─────────── + ╰──── + help: Unexpected control character(s) in regular expression: "\2" + + ⚠ eslint(no-control-regex): Unexpected control character(s) + ╭─[no_control_regex.tsx:1:11] + 1 │ const r = /([a-z])\0/; + · ─────────── + ╰──── + help: Unexpected control character(s) in regular expression: "\0"