mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 20:32:10 +00:00
Preparation for #6141
`oxc_regular_expression` can already parse and validate both `/regexp-literal/` and `new RegExp("string-literal")`.
But one thing that is not well-supported was reporting `Span` for the `RegExp("string-literal-with-\\escape")` case.
For example, these two cases produce the same `RegExp` instances in JavaScript:
- `/\d+/`
- `new RegExp("\\d+")`
For now, mainly in `oxc_linter`, the latter case is parsed with `oxc_parser` -> `ast::literal::StringLiteral` AST node -> `value` property.
At this point, escape sequences are resolved(!), `oxc_regular_expression` can handle aligned `&str` as an argument without any problem in both cases.
However, in terms of `Span` representation, these cases should be handled differently because of the `\\` in string literals...
As a result, the parsed AST's `Span` for `new RegExp("string-literal")` is not accurate if it contains escape sequences.
e.g. a01a5dfdaf/crates/oxc_linter/src/snapshots/no_invalid_regexp.snap (L118-L122)
Each time the `\` appears, the subsequent position is shifted. `_` should be placed under `*` in this case.
So... to resolve this issue, we need to implement `string_literal_parser` first, and use them as reading units of `oxc_regular_expression`.
68 lines
2 KiB
Rust
68 lines
2 KiB
Rust
#![allow(clippy::print_stdout)]
|
|
|
|
use oxc_allocator::Allocator;
|
|
use oxc_regular_expression::{LiteralParser, Options};
|
|
|
|
fn main() {
|
|
let allocator = Allocator::default();
|
|
|
|
for (pattern_text, flags_text) in [
|
|
(r"ab", ""),
|
|
(r"abc", "i"),
|
|
(r"abcd", "igv"),
|
|
(r"emo👈🏻ji", "u"),
|
|
(r"ab|c", "i"),
|
|
(r"a|b+|c", "i"),
|
|
(r"a{0}|b{1,2}|c{3,}", "i"),
|
|
(r"(?=a)|(?<=b)|(?!c)|(?<!d)", "i"),
|
|
(r"\n\cM\0\x41\.", ""),
|
|
(r"\n\cM\0\x41\u1234\.", "u"),
|
|
(r"\n\cM\0\x41\u{1f600}\.", "u"),
|
|
(r"a\k<f>x\1c", "u"),
|
|
(r"(cg)(?<n>cg)(?:g)", ""),
|
|
(r"{3}", ""), // Error
|
|
(r"Em🥹j", ""),
|
|
(r"^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$", ""),
|
|
(r"^(?<!ab)$", ""),
|
|
(r"a)", ""), // Error
|
|
(r"c]", ""),
|
|
(r"[abc]", ""),
|
|
(r"[|\]]", ""),
|
|
(r"[a&&b]", "v"),
|
|
(r"[a--b]", "v"),
|
|
(r"[a&&&]", "v"), // Error
|
|
(r"[a---]", "v"), // Error
|
|
(r"[^a--b--c]", "v"),
|
|
(r"[a[b[c[d[e[f[g[h[i[j[k[l]]]]]]]]]]]]", "v"),
|
|
(r"[\q{abc|d|e|}]", "v"),
|
|
(r"\p{Basic_Emoji}", "v"),
|
|
(r"\p{Basic_Emoji}", "u"), // Error
|
|
(r"[[^\q{}]]", "v"), // Error
|
|
(r"(?<a>)(?<a>)", ""), // Error
|
|
(r"(?noname)", "v"), // Error
|
|
(r"[\bb]", ""),
|
|
(r"a{2,1}", "v"), // Error
|
|
] {
|
|
let parser = LiteralParser::new(
|
|
&allocator,
|
|
pattern_text,
|
|
Some(flags_text),
|
|
// +1 for added `/` in error reports
|
|
Options { pattern_span_offset: 1, ..Options::default() },
|
|
);
|
|
let ret = parser.parse();
|
|
|
|
let literal = format!("/{pattern_text}/{flags_text}");
|
|
println!("Parse: {literal}");
|
|
match ret {
|
|
Ok(pattern) => {
|
|
println!("✨ {pattern:#?}");
|
|
}
|
|
Err(error) => {
|
|
let error = error.with_source_code(literal);
|
|
println!("💥 {error:?}");
|
|
}
|
|
}
|
|
println!();
|
|
}
|
|
}
|