refactor(regular_expression): Align diagnostics (#5543)

Manage all diagnostics for LiteralParser, FlagsParser, PatternParser in one place, same message format.
This commit is contained in:
leaysgur 2024-09-06 16:28:06 +00:00
parent 81a394d11d
commit dec139529d
6 changed files with 65 additions and 24 deletions

View file

@ -1,4 +1,3 @@
mod diagnostics;
mod parser;
mod reader;
mod state;

View file

@ -4,7 +4,8 @@ use oxc_span::Atom as SpanAtom;
use crate::{
ast,
body_parser::{diagnostics, reader::Reader, state::State, unicode, unicode_property},
body_parser::{reader::Reader, state::State, unicode, unicode_property},
diagnostics,
options::ParserOptions,
span::SpanFactory,
surrogate_pair,

View file

@ -3,6 +3,43 @@ use oxc_span::Span;
const PREFIX: &str = "Invalid regular expression:";
// For (Literal)Parser ---
#[cold]
pub fn unexpected_literal_char(span0: Span) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Unexpected literal character")).with_label(span0)
}
#[cold]
pub fn unterminated_literal(span0: Span, kind: &str) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Unterminated {kind}")).with_label(span0)
}
#[cold]
pub fn empty_literal(span0: Span) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Empty literal")).with_label(span0)
}
// For FlagsParser ---
#[cold]
pub fn duplicated_flag(span0: Span) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Duplicated flag")).with_label(span0)
}
#[cold]
pub fn unknown_flag(span0: Span) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Unknown flag")).with_label(span0)
}
#[cold]
pub fn invalid_unicode_flags(span0: Span) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Invalid flags, `u` and `v` should be used alone"))
.with_label(span0)
}
// For PatternParser ---
#[cold]
pub fn duplicated_capturing_group_names(spans: Vec<Span>) -> OxcDiagnostic {
OxcDiagnostic::error(format!("{PREFIX} Duplicated capturing group names")).with_labels(spans)

View file

@ -1,8 +1,8 @@
use oxc_allocator::Allocator;
use oxc_diagnostics::{OxcDiagnostic, Result};
use oxc_diagnostics::Result;
use rustc_hash::FxHashSet;
use crate::{ast, options::ParserOptions, span::SpanFactory};
use crate::{ast, diagnostics, options::ParserOptions, span::SpanFactory};
pub struct FlagsParser<'a> {
source_text: &'a str,
@ -20,8 +20,7 @@ impl<'a> FlagsParser<'a> {
}
pub fn parse(&mut self) -> Result<ast::Flags> {
let mut existing_flags = FxHashSet::default();
let span = self.span_factory.create(0, self.source_text.len());
let mut global = false;
let mut ignore_case = false;
let mut multiline = false;
@ -31,9 +30,10 @@ impl<'a> FlagsParser<'a> {
let mut has_indices = false;
let mut unicode_sets = false;
for c in self.source_text.chars() {
let mut existing_flags = FxHashSet::default();
for (idx, c) in self.source_text.char_indices() {
if !existing_flags.insert(c) {
return Err(OxcDiagnostic::error(format!("Duplicated flag `{c}`")));
return Err(diagnostics::duplicated_flag(self.span_factory.create(idx, idx)));
}
match c {
@ -45,17 +45,16 @@ impl<'a> FlagsParser<'a> {
's' => dot_all = true,
'd' => has_indices = true,
'v' => unicode_sets = true,
_ => return Err(OxcDiagnostic::error(format!("Invalid flag `{c}`"))),
_ => return Err(diagnostics::unknown_flag(self.span_factory.create(idx, idx))),
}
}
// This should be a `SyntaxError`
if unicode && unicode_sets {
return Err(OxcDiagnostic::error("Invalid regular expression flags"));
return Err(diagnostics::invalid_unicode_flags(span));
}
Ok(ast::Flags {
span: self.span_factory.create(0, self.source_text.len()),
span,
global,
ignore_case,
multiline,

View file

@ -2,6 +2,7 @@
pub mod ast;
mod body_parser;
mod diagnostics;
mod display;
mod flag_parser;
mod literal_parser;

View file

@ -1,8 +1,8 @@
use oxc_allocator::Allocator;
use oxc_diagnostics::{OxcDiagnostic, Result};
use oxc_diagnostics::Result;
use crate::{
ast, body_parser::PatternParser, flag_parser::FlagsParser, options::ParserOptions,
ast, body_parser::PatternParser, diagnostics, flag_parser::FlagsParser, options::ParserOptions,
span::SpanFactory,
};
@ -28,7 +28,7 @@ impl<'a> Parser<'a> {
// Precheck if the source text is a valid regular expression literal
// If valid, parse the pattern and flags with returned span offsets
let (body_start_offset, body_end_offset, flag_start_offset) =
parse_reg_exp_literal(self.source_text)?;
parse_reg_exp_literal(self.source_text, &self.span_factory)?;
// Parse flags first to know if unicode mode is enabled or not
let flags = FlagsParser::new(
@ -67,12 +67,15 @@ impl<'a> Parser<'a> {
/// / RegularExpressionBody / RegularExpressionFlags
/// ```
/// Returns `(body_start_offset, body_end_offset, flag_start_offset)`.
fn parse_reg_exp_literal(source_text: &str) -> Result<(usize, usize, usize)> {
fn parse_reg_exp_literal(
source_text: &str,
span_factory: &SpanFactory,
) -> Result<(usize, usize, usize)> {
let mut offset = 0;
let mut chars = source_text.chars().peekable();
let Some('/') = chars.next() else {
return Err(OxcDiagnostic::error("Invalid regular expression: Unexpected character"));
return Err(diagnostics::unexpected_literal_char(span_factory.create(offset, offset)));
};
offset += 1; // '/'
@ -84,9 +87,10 @@ fn parse_reg_exp_literal(source_text: &str) -> Result<(usize, usize, usize)> {
match chars.peek() {
// Line terminators are not allowed
Some('\u{a}' | '\u{d}' | '\u{2028}' | '\u{2029}') | None => {
let kind =
if in_character_class { "character class" } else { "regular expression" };
return Err(OxcDiagnostic::error(format!("Unterminated {kind}")));
return Err(diagnostics::unterminated_literal(
span_factory.create(body_start, offset),
if in_character_class { "character class" } else { "regular expression" },
));
}
Some(&ch) => {
if in_escape {
@ -112,12 +116,12 @@ fn parse_reg_exp_literal(source_text: &str) -> Result<(usize, usize, usize)> {
}
let Some('/') = chars.next() else {
return Err(OxcDiagnostic::error("Invalid regular expression: Unexpected character"));
return Err(diagnostics::unexpected_literal_char(span_factory.create(offset, offset)));
};
let body_end = offset;
if body_end == body_start {
return Err(OxcDiagnostic::error("Invalid regular expression: Empty"));
return Err(diagnostics::empty_literal(span_factory.create(0, body_end + 1)));
}
Ok((body_start, body_end, body_end + 1))
@ -141,7 +145,7 @@ mod test {
"/👈🏻こっち/u",
] {
let (body_start_offset, body_end_offset, flag_start_offset) =
parse_reg_exp_literal(literal_text)
parse_reg_exp_literal(literal_text, &SpanFactory::new(0))
.unwrap_or_else(|_| panic!("{literal_text} should be parsed"));
let body_text = &literal_text[body_start_offset..body_end_offset];
@ -155,7 +159,7 @@ mod test {
for literal_text in
["", "foo", ":(", "a\nb", "/", "/x", "/y\nz/", "/1[\n]/", "//", "///", "/*abc/", "/\\/"]
{
assert!(parse_reg_exp_literal(literal_text).is_err());
assert!(parse_reg_exp_literal(literal_text, &SpanFactory::new(0)).is_err());
}
}
}