mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
refactor(lexer): don't use lexer.current.chars directly (#2237)
This PR replaces most usages of `lexer.current.chars.next()` with `lexer.consume_char()`, or a new function `lexer.next_char()`. This is a preparatory step towards replacing the `Chars` iterator with something more flexible which can also consume bytes (not `char`s), and this PR was intended as pure refactor. But surprised to see there is a small performance bump (no idea why!). There's an additional benefit: Using `consume_char()` everywhere where we believe there's definitely a char there to be consumed will make logic errors produce a panic, rather than silently outputting garbage.
This commit is contained in:
parent
a79988d5e2
commit
622a2c37fa
12 changed files with 60 additions and 54 deletions
|
|
@ -242,11 +242,11 @@ ascii_byte_handler!(SLH(lexer) {
|
|||
lexer.consume_char();
|
||||
match lexer.peek() {
|
||||
Some('/') => {
|
||||
lexer.current.chars.next();
|
||||
lexer.consume_char();
|
||||
lexer.skip_single_line_comment()
|
||||
}
|
||||
Some('*') => {
|
||||
lexer.current.chars.next();
|
||||
lexer.consume_char();
|
||||
lexer.skip_multi_line_comment()
|
||||
}
|
||||
_ => {
|
||||
|
|
@ -327,7 +327,7 @@ ascii_byte_handler!(QST(lexer) {
|
|||
if lexer.peek2().is_some_and(|c| c.is_ascii_digit()) {
|
||||
Kind::Question
|
||||
} else {
|
||||
lexer.current.chars.next();
|
||||
lexer.consume_char();
|
||||
Kind::QuestionDot
|
||||
}
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ impl<'a> Lexer<'a> {
|
|||
#[allow(clippy::cast_possible_truncation)]
|
||||
pub(super) fn skip_single_line_comment(&mut self) -> Kind {
|
||||
let start = self.current.token.start;
|
||||
while let Some(c) = self.current.chars.next() {
|
||||
while let Some(c) = self.next_char() {
|
||||
if is_line_terminator(c) {
|
||||
self.current.token.is_on_new_line = true;
|
||||
self.trivia_builder
|
||||
|
|
@ -23,7 +23,7 @@ impl<'a> Lexer<'a> {
|
|||
|
||||
/// Section 12.4 Multi Line Comment
|
||||
pub(super) fn skip_multi_line_comment(&mut self) -> Kind {
|
||||
while let Some(c) = self.current.chars.next() {
|
||||
while let Some(c) = self.next_char() {
|
||||
if c == '*' && self.next_eq('/') {
|
||||
self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
|
||||
return Kind::Skip;
|
||||
|
|
@ -38,7 +38,7 @@ impl<'a> Lexer<'a> {
|
|||
|
||||
/// Section 12.5 Hashbang Comments
|
||||
pub(super) fn read_hashbang_comment(&mut self) -> Kind {
|
||||
while let Some(c) = self.current.chars.next().as_ref() {
|
||||
while let Some(c) = self.next_char().as_ref() {
|
||||
if is_line_terminator(*c) {
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ impl<'a> Lexer<'a> {
|
|||
pub(super) fn private_identifier(&mut self) -> Kind {
|
||||
let mut builder = AutoCow::new(self);
|
||||
let start = self.offset();
|
||||
match self.current.chars.next() {
|
||||
match self.next_char() {
|
||||
Some(c) if is_identifier_start(c) => {
|
||||
builder.push_matching(c);
|
||||
}
|
||||
|
|
@ -48,14 +48,14 @@ impl<'a> Lexer<'a> {
|
|||
while let Some(c) = self.peek() {
|
||||
if !is_identifier_part(c) {
|
||||
if c == '\\' {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
builder.force_allocation_without_current_ascii_char(self);
|
||||
self.identifier_unicode_escape_sequence(&mut builder, false);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
builder.push_matching(c);
|
||||
}
|
||||
let has_escape = builder.has_escape();
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ impl<'a> Lexer<'a> {
|
|||
pub(super) fn read_jsx_string_literal(&mut self, delimiter: char) -> Kind {
|
||||
let mut builder = AutoCow::new(self);
|
||||
loop {
|
||||
match self.current.chars.next() {
|
||||
match self.next_char() {
|
||||
Some(c @ ('"' | '\'')) => {
|
||||
if c == delimiter {
|
||||
self.save_string(builder.has_escape(), builder.finish_without_push(self));
|
||||
|
|
@ -58,11 +58,11 @@ impl<'a> Lexer<'a> {
|
|||
fn read_jsx_child(&mut self) -> Kind {
|
||||
match self.peek() {
|
||||
Some('<') => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
Kind::LAngle
|
||||
}
|
||||
Some('{') => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
Kind::LCurly
|
||||
}
|
||||
Some(_) => {
|
||||
|
|
@ -74,7 +74,7 @@ impl<'a> Lexer<'a> {
|
|||
if self.peek().is_some_and(|c| c == '{' || c == '<') {
|
||||
break;
|
||||
}
|
||||
if self.current.chars.next().is_none() {
|
||||
if self.next_char().is_none() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -91,10 +91,10 @@ impl<'a> Lexer<'a> {
|
|||
fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind {
|
||||
while let Some(c) = self.peek() {
|
||||
if c == '-' || is_identifier_start(c) {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
while let Some(c) = self.peek() {
|
||||
if is_identifier_part(c) {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -212,6 +212,12 @@ impl<'a> Lexer<'a> {
|
|||
Span::new(self.current.token.start, self.offset())
|
||||
}
|
||||
|
||||
/// Consume the current char if not at EOF
|
||||
#[inline]
|
||||
fn next_char(&mut self) -> Option<char> {
|
||||
self.current.chars.next()
|
||||
}
|
||||
|
||||
/// Consume the current char
|
||||
#[inline]
|
||||
fn consume_char(&mut self) -> char {
|
||||
|
|
|
|||
|
|
@ -11,15 +11,15 @@ impl<'a> Lexer<'a> {
|
|||
Some('o' | 'O') => self.read_non_decimal(Kind::Octal),
|
||||
Some('x' | 'X') => self.read_non_decimal(Kind::Hex),
|
||||
Some('e' | 'E') => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
self.read_decimal_exponent()
|
||||
}
|
||||
Some('.') => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
self.decimal_literal_after_decimal_point_after_digits()
|
||||
}
|
||||
Some('n') => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
self.check_after_numeric_literal(Kind::Decimal)
|
||||
}
|
||||
Some(n) if n.is_ascii_digit() => self.read_legacy_octal(),
|
||||
|
|
@ -40,10 +40,10 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
|
||||
fn read_non_decimal(&mut self, kind: Kind) -> Kind {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
|
||||
if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
} else {
|
||||
self.unexpected_err();
|
||||
return Kind::Undetermined;
|
||||
|
|
@ -52,22 +52,22 @@ impl<'a> Lexer<'a> {
|
|||
while let Some(c) = self.peek() {
|
||||
match c {
|
||||
'_' => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
} else {
|
||||
self.unexpected_err();
|
||||
return Kind::Undetermined;
|
||||
}
|
||||
}
|
||||
c if kind.matches_number_char(c) => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
if self.peek() == Some('n') {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
}
|
||||
self.check_after_numeric_literal(kind)
|
||||
}
|
||||
|
|
@ -77,10 +77,10 @@ impl<'a> Lexer<'a> {
|
|||
loop {
|
||||
match self.peek() {
|
||||
Some('0'..='7') => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
}
|
||||
Some('8'..='9') => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
kind = Kind::Decimal;
|
||||
}
|
||||
_ => break,
|
||||
|
|
@ -90,12 +90,12 @@ impl<'a> Lexer<'a> {
|
|||
match self.peek() {
|
||||
// allow 08.5 and 09.5
|
||||
Some('.') if kind == Kind::Decimal => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
self.decimal_literal_after_decimal_point_after_digits()
|
||||
}
|
||||
// allow 08e1 and 09e1
|
||||
Some('e') if kind == Kind::Decimal => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
self.read_decimal_exponent()
|
||||
}
|
||||
_ => self.check_after_numeric_literal(kind),
|
||||
|
|
@ -105,11 +105,11 @@ impl<'a> Lexer<'a> {
|
|||
fn read_decimal_exponent(&mut self) -> Kind {
|
||||
let kind = match self.peek() {
|
||||
Some('-') => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
Kind::NegativeExponential
|
||||
}
|
||||
Some('+') => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
Kind::PositiveExponential
|
||||
}
|
||||
_ => Kind::PositiveExponential,
|
||||
|
|
@ -120,7 +120,7 @@ impl<'a> Lexer<'a> {
|
|||
|
||||
fn read_decimal_digits(&mut self) {
|
||||
if self.peek().is_some_and(|c| c.is_ascii_digit()) {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
} else {
|
||||
self.unexpected_err();
|
||||
return;
|
||||
|
|
@ -133,16 +133,16 @@ impl<'a> Lexer<'a> {
|
|||
while let Some(c) = self.peek() {
|
||||
match c {
|
||||
'_' => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
if self.peek().is_some_and(|c| c.is_ascii_digit()) {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
} else {
|
||||
self.unexpected_err();
|
||||
return;
|
||||
}
|
||||
}
|
||||
'0'..='9' => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
|
|
@ -163,7 +163,7 @@ impl<'a> Lexer<'a> {
|
|||
|
||||
fn optional_decimal_digits(&mut self) {
|
||||
if self.peek().is_some_and(|c| c.is_ascii_digit()) {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
|
@ -172,7 +172,7 @@ impl<'a> Lexer<'a> {
|
|||
|
||||
fn optional_exponent(&mut self) -> Option<Kind> {
|
||||
if matches!(self.peek(), Some('e' | 'E')) {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
return Some(self.read_decimal_exponent());
|
||||
}
|
||||
None
|
||||
|
|
@ -185,10 +185,10 @@ impl<'a> Lexer<'a> {
|
|||
if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start(ch)) {
|
||||
return kind;
|
||||
}
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
while let Some(c) = self.peek() {
|
||||
if is_identifier_start(c) {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ impl<'a> Lexer<'a> {
|
|||
/// Section 12.8 Punctuators
|
||||
pub(super) fn read_dot(&mut self) -> Kind {
|
||||
if self.peek() == Some('.') && self.peek2() == Some('.') {
|
||||
self.current.chars.next();
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
self.consume_char();
|
||||
return Kind::Dot3;
|
||||
}
|
||||
if self.peek().is_some_and(|c| c.is_ascii_digit()) {
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ impl<'a> Lexer<'a> {
|
|||
let mut in_escape = false;
|
||||
let mut in_character_class = false;
|
||||
loop {
|
||||
match self.current.chars.next() {
|
||||
match self.next_char() {
|
||||
None => {
|
||||
self.error(diagnostics::UnterminatedRegExp(self.unterminated_range()));
|
||||
return (self.offset(), RegExpFlags::empty());
|
||||
|
|
@ -59,7 +59,7 @@ impl<'a> Lexer<'a> {
|
|||
let mut flags = RegExpFlags::empty();
|
||||
|
||||
while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
let flag = if let Ok(flag) = RegExpFlags::try_from(ch) {
|
||||
flag
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ impl<'a> Lexer<'a> {
|
|||
pub(super) fn read_string_literal(&mut self, delimiter: char) -> Kind {
|
||||
let mut builder = AutoCow::new(self);
|
||||
loop {
|
||||
match self.current.chars.next() {
|
||||
match self.next_char() {
|
||||
None | Some('\r' | '\n') => {
|
||||
self.error(diagnostics::UnterminatedString(self.unterminated_range()));
|
||||
return Kind::Undetermined;
|
||||
|
|
|
|||
|
|
@ -15,14 +15,14 @@ impl<'a> AutoCow<'a> {
|
|||
AutoCow { start, value: None }
|
||||
}
|
||||
|
||||
// Push a char that matches lexer.chars().next()
|
||||
// Push a char that matches lexer.current.chars().next()
|
||||
pub fn push_matching(&mut self, c: char) {
|
||||
if let Some(text) = &mut self.value {
|
||||
text.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
// Push a different character than lexer.chars().next().
|
||||
// Push a different character than lexer.current.chars().next().
|
||||
// force_allocation_without_current_ascii_char must be called before this.
|
||||
pub fn push_different(&mut self, c: char) {
|
||||
debug_assert!(self.value.is_some());
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ impl<'a> Lexer<'a> {
|
|||
pub(super) fn read_template_literal(&mut self, substitute: Kind, tail: Kind) -> Kind {
|
||||
let mut builder = AutoCow::new(self);
|
||||
let mut is_valid_escape_sequence = true;
|
||||
while let Some(c) = self.current.chars.next() {
|
||||
while let Some(c) = self.next_char() {
|
||||
match c {
|
||||
'$' if self.peek() == Some('{') => {
|
||||
self.save_template_string(
|
||||
|
|
@ -16,7 +16,7 @@ impl<'a> Lexer<'a> {
|
|||
builder.has_escape(),
|
||||
builder.finish_without_push(self),
|
||||
);
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
return substitute;
|
||||
}
|
||||
'`' => {
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ enum SurrogatePair {
|
|||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub(super) fn unicode_char_handler(&mut self) -> Kind {
|
||||
let c = self.current.chars.clone().next().unwrap();
|
||||
let c = self.peek().unwrap();
|
||||
match c {
|
||||
c if is_identifier_start_unicode(c) => {
|
||||
let mut builder = AutoCow::new(self);
|
||||
|
|
@ -55,7 +55,7 @@ impl<'a> Lexer<'a> {
|
|||
check_identifier_start: bool,
|
||||
) {
|
||||
let start = self.offset();
|
||||
if self.current.chars.next() != Some('u') {
|
||||
if self.next_char() != Some('u') {
|
||||
let range = Span::new(start, self.offset());
|
||||
self.error(diagnostics::UnicodeEscapeSequence(range));
|
||||
return;
|
||||
|
|
@ -167,7 +167,7 @@ impl<'a> Lexer<'a> {
|
|||
Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32),
|
||||
_ => return None,
|
||||
};
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
Some(value)
|
||||
}
|
||||
|
||||
|
|
@ -196,8 +196,8 @@ impl<'a> Lexer<'a> {
|
|||
return Some(SurrogatePair::CodePoint(high));
|
||||
}
|
||||
|
||||
self.current.chars.next();
|
||||
self.current.chars.next();
|
||||
self.next_char();
|
||||
self.next_char();
|
||||
|
||||
let low = self.hex_4_digits()?;
|
||||
|
||||
|
|
@ -219,7 +219,7 @@ impl<'a> Lexer<'a> {
|
|||
in_template: bool,
|
||||
is_valid_escape_sequence: &mut bool,
|
||||
) {
|
||||
match self.current.chars.next() {
|
||||
match self.next_char() {
|
||||
None => {
|
||||
self.error(diagnostics::UnterminatedString(self.unterminated_range()));
|
||||
}
|
||||
|
|
@ -299,7 +299,7 @@ impl<'a> Lexer<'a> {
|
|||
text.push(value);
|
||||
}
|
||||
'0' if in_template && self.peek().is_some_and(|c| c.is_ascii_digit()) => {
|
||||
self.current.chars.next();
|
||||
self.consume_char();
|
||||
// error raised within the parser by `diagnostics::TemplateLiteral`
|
||||
*is_valid_escape_sequence = false;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue