refactor(lexer): don't use lexer.current.chars directly (#2237)

This PR replaces most usages of `lexer.current.chars.next()` with
`lexer.consume_char()`, or a new function `lexer.next_char()`.

This is a preparatory step towards replacing the `Chars` iterator with
something more flexible which can also consume bytes (not `char`s), and
this PR was intended as pure refactor. But surprised to see there is a
small performance bump (no idea why!).

There's an additional benefit: Using `consume_char()` everywhere where
we believe there's definitely a char there to be consumed will make
logic errors produce a panic, rather than silently outputting garbage.
This commit is contained in:
overlookmotel 2024-01-31 13:35:46 +00:00 committed by GitHub
parent a79988d5e2
commit 622a2c37fa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 60 additions and 54 deletions

View file

@ -242,11 +242,11 @@ ascii_byte_handler!(SLH(lexer) {
lexer.consume_char();
match lexer.peek() {
Some('/') => {
lexer.current.chars.next();
lexer.consume_char();
lexer.skip_single_line_comment()
}
Some('*') => {
lexer.current.chars.next();
lexer.consume_char();
lexer.skip_multi_line_comment()
}
_ => {
@ -327,7 +327,7 @@ ascii_byte_handler!(QST(lexer) {
if lexer.peek2().is_some_and(|c| c.is_ascii_digit()) {
Kind::Question
} else {
lexer.current.chars.next();
lexer.consume_char();
Kind::QuestionDot
}
} else {

View file

@ -8,7 +8,7 @@ impl<'a> Lexer<'a> {
#[allow(clippy::cast_possible_truncation)]
pub(super) fn skip_single_line_comment(&mut self) -> Kind {
let start = self.current.token.start;
while let Some(c) = self.current.chars.next() {
while let Some(c) = self.next_char() {
if is_line_terminator(c) {
self.current.token.is_on_new_line = true;
self.trivia_builder
@ -23,7 +23,7 @@ impl<'a> Lexer<'a> {
/// Section 12.4 Multi Line Comment
pub(super) fn skip_multi_line_comment(&mut self) -> Kind {
while let Some(c) = self.current.chars.next() {
while let Some(c) = self.next_char() {
if c == '*' && self.next_eq('/') {
self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
return Kind::Skip;
@ -38,7 +38,7 @@ impl<'a> Lexer<'a> {
/// Section 12.5 Hashbang Comments
pub(super) fn read_hashbang_comment(&mut self) -> Kind {
while let Some(c) = self.current.chars.next().as_ref() {
while let Some(c) = self.next_char().as_ref() {
if is_line_terminator(*c) {
break;
}

View file

@ -18,7 +18,7 @@ impl<'a> Lexer<'a> {
pub(super) fn private_identifier(&mut self) -> Kind {
let mut builder = AutoCow::new(self);
let start = self.offset();
match self.current.chars.next() {
match self.next_char() {
Some(c) if is_identifier_start(c) => {
builder.push_matching(c);
}
@ -48,14 +48,14 @@ impl<'a> Lexer<'a> {
while let Some(c) = self.peek() {
if !is_identifier_part(c) {
if c == '\\' {
self.current.chars.next();
self.consume_char();
builder.force_allocation_without_current_ascii_char(self);
self.identifier_unicode_escape_sequence(&mut builder, false);
continue;
}
break;
}
self.current.chars.next();
self.consume_char();
builder.push_matching(c);
}
let has_escape = builder.has_escape();

View file

@ -17,7 +17,7 @@ impl<'a> Lexer<'a> {
pub(super) fn read_jsx_string_literal(&mut self, delimiter: char) -> Kind {
let mut builder = AutoCow::new(self);
loop {
match self.current.chars.next() {
match self.next_char() {
Some(c @ ('"' | '\'')) => {
if c == delimiter {
self.save_string(builder.has_escape(), builder.finish_without_push(self));
@ -58,11 +58,11 @@ impl<'a> Lexer<'a> {
fn read_jsx_child(&mut self) -> Kind {
match self.peek() {
Some('<') => {
self.current.chars.next();
self.consume_char();
Kind::LAngle
}
Some('{') => {
self.current.chars.next();
self.consume_char();
Kind::LCurly
}
Some(_) => {
@ -74,7 +74,7 @@ impl<'a> Lexer<'a> {
if self.peek().is_some_and(|c| c == '{' || c == '<') {
break;
}
if self.current.chars.next().is_none() {
if self.next_char().is_none() {
break;
}
}
@ -91,10 +91,10 @@ impl<'a> Lexer<'a> {
fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind {
while let Some(c) = self.peek() {
if c == '-' || is_identifier_start(c) {
self.current.chars.next();
self.consume_char();
while let Some(c) = self.peek() {
if is_identifier_part(c) {
self.current.chars.next();
self.consume_char();
} else {
break;
}

View file

@ -212,6 +212,12 @@ impl<'a> Lexer<'a> {
Span::new(self.current.token.start, self.offset())
}
/// Consume the current char if not at EOF
#[inline]
fn next_char(&mut self) -> Option<char> {
self.current.chars.next()
}
/// Consume the current char
#[inline]
fn consume_char(&mut self) -> char {

View file

@ -11,15 +11,15 @@ impl<'a> Lexer<'a> {
Some('o' | 'O') => self.read_non_decimal(Kind::Octal),
Some('x' | 'X') => self.read_non_decimal(Kind::Hex),
Some('e' | 'E') => {
self.current.chars.next();
self.consume_char();
self.read_decimal_exponent()
}
Some('.') => {
self.current.chars.next();
self.consume_char();
self.decimal_literal_after_decimal_point_after_digits()
}
Some('n') => {
self.current.chars.next();
self.consume_char();
self.check_after_numeric_literal(Kind::Decimal)
}
Some(n) if n.is_ascii_digit() => self.read_legacy_octal(),
@ -40,10 +40,10 @@ impl<'a> Lexer<'a> {
}
fn read_non_decimal(&mut self, kind: Kind) -> Kind {
self.current.chars.next();
self.consume_char();
if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
self.current.chars.next();
self.consume_char();
} else {
self.unexpected_err();
return Kind::Undetermined;
@ -52,22 +52,22 @@ impl<'a> Lexer<'a> {
while let Some(c) = self.peek() {
match c {
'_' => {
self.current.chars.next();
self.consume_char();
if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
self.current.chars.next();
self.consume_char();
} else {
self.unexpected_err();
return Kind::Undetermined;
}
}
c if kind.matches_number_char(c) => {
self.current.chars.next();
self.consume_char();
}
_ => break,
}
}
if self.peek() == Some('n') {
self.current.chars.next();
self.consume_char();
}
self.check_after_numeric_literal(kind)
}
@ -77,10 +77,10 @@ impl<'a> Lexer<'a> {
loop {
match self.peek() {
Some('0'..='7') => {
self.current.chars.next();
self.consume_char();
}
Some('8'..='9') => {
self.current.chars.next();
self.consume_char();
kind = Kind::Decimal;
}
_ => break,
@ -90,12 +90,12 @@ impl<'a> Lexer<'a> {
match self.peek() {
// allow 08.5 and 09.5
Some('.') if kind == Kind::Decimal => {
self.current.chars.next();
self.consume_char();
self.decimal_literal_after_decimal_point_after_digits()
}
// allow 08e1 and 09e1
Some('e') if kind == Kind::Decimal => {
self.current.chars.next();
self.consume_char();
self.read_decimal_exponent()
}
_ => self.check_after_numeric_literal(kind),
@ -105,11 +105,11 @@ impl<'a> Lexer<'a> {
fn read_decimal_exponent(&mut self) -> Kind {
let kind = match self.peek() {
Some('-') => {
self.current.chars.next();
self.consume_char();
Kind::NegativeExponential
}
Some('+') => {
self.current.chars.next();
self.consume_char();
Kind::PositiveExponential
}
_ => Kind::PositiveExponential,
@ -120,7 +120,7 @@ impl<'a> Lexer<'a> {
fn read_decimal_digits(&mut self) {
if self.peek().is_some_and(|c| c.is_ascii_digit()) {
self.current.chars.next();
self.consume_char();
} else {
self.unexpected_err();
return;
@ -133,16 +133,16 @@ impl<'a> Lexer<'a> {
while let Some(c) = self.peek() {
match c {
'_' => {
self.current.chars.next();
self.consume_char();
if self.peek().is_some_and(|c| c.is_ascii_digit()) {
self.current.chars.next();
self.consume_char();
} else {
self.unexpected_err();
return;
}
}
'0'..='9' => {
self.current.chars.next();
self.consume_char();
}
_ => break,
}
@ -163,7 +163,7 @@ impl<'a> Lexer<'a> {
fn optional_decimal_digits(&mut self) {
if self.peek().is_some_and(|c| c.is_ascii_digit()) {
self.current.chars.next();
self.consume_char();
} else {
return;
}
@ -172,7 +172,7 @@ impl<'a> Lexer<'a> {
fn optional_exponent(&mut self) -> Option<Kind> {
if matches!(self.peek(), Some('e' | 'E')) {
self.current.chars.next();
self.consume_char();
return Some(self.read_decimal_exponent());
}
None
@ -185,10 +185,10 @@ impl<'a> Lexer<'a> {
if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start(ch)) {
return kind;
}
self.current.chars.next();
self.consume_char();
while let Some(c) = self.peek() {
if is_identifier_start(c) {
self.current.chars.next();
self.consume_char();
} else {
break;
}

View file

@ -4,8 +4,8 @@ impl<'a> Lexer<'a> {
/// Section 12.8 Punctuators
pub(super) fn read_dot(&mut self) -> Kind {
if self.peek() == Some('.') && self.peek2() == Some('.') {
self.current.chars.next();
self.current.chars.next();
self.consume_char();
self.consume_char();
return Kind::Dot3;
}
if self.peek().is_some_and(|c| c.is_ascii_digit()) {

View file

@ -28,7 +28,7 @@ impl<'a> Lexer<'a> {
let mut in_escape = false;
let mut in_character_class = false;
loop {
match self.current.chars.next() {
match self.next_char() {
None => {
self.error(diagnostics::UnterminatedRegExp(self.unterminated_range()));
return (self.offset(), RegExpFlags::empty());
@ -59,7 +59,7 @@ impl<'a> Lexer<'a> {
let mut flags = RegExpFlags::empty();
while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
self.current.chars.next();
self.consume_char();
let flag = if let Ok(flag) = RegExpFlags::try_from(ch) {
flag
} else {

View file

@ -6,7 +6,7 @@ impl<'a> Lexer<'a> {
pub(super) fn read_string_literal(&mut self, delimiter: char) -> Kind {
let mut builder = AutoCow::new(self);
loop {
match self.current.chars.next() {
match self.next_char() {
None | Some('\r' | '\n') => {
self.error(diagnostics::UnterminatedString(self.unterminated_range()));
return Kind::Undetermined;

View file

@ -15,14 +15,14 @@ impl<'a> AutoCow<'a> {
AutoCow { start, value: None }
}
// Push a char that matches lexer.chars().next()
// Push a char that matches lexer.current.chars().next()
pub fn push_matching(&mut self, c: char) {
if let Some(text) = &mut self.value {
text.push(c);
}
}
// Push a different character than lexer.chars().next().
// Push a different character than lexer.current.chars().next().
// force_allocation_without_current_ascii_char must be called before this.
pub fn push_different(&mut self, c: char) {
debug_assert!(self.value.is_some());

View file

@ -8,7 +8,7 @@ impl<'a> Lexer<'a> {
pub(super) fn read_template_literal(&mut self, substitute: Kind, tail: Kind) -> Kind {
let mut builder = AutoCow::new(self);
let mut is_valid_escape_sequence = true;
while let Some(c) = self.current.chars.next() {
while let Some(c) = self.next_char() {
match c {
'$' if self.peek() == Some('{') => {
self.save_template_string(
@ -16,7 +16,7 @@ impl<'a> Lexer<'a> {
builder.has_escape(),
builder.finish_without_push(self),
);
self.current.chars.next();
self.consume_char();
return substitute;
}
'`' => {

View file

@ -18,7 +18,7 @@ enum SurrogatePair {
impl<'a> Lexer<'a> {
pub(super) fn unicode_char_handler(&mut self) -> Kind {
let c = self.current.chars.clone().next().unwrap();
let c = self.peek().unwrap();
match c {
c if is_identifier_start_unicode(c) => {
let mut builder = AutoCow::new(self);
@ -55,7 +55,7 @@ impl<'a> Lexer<'a> {
check_identifier_start: bool,
) {
let start = self.offset();
if self.current.chars.next() != Some('u') {
if self.next_char() != Some('u') {
let range = Span::new(start, self.offset());
self.error(diagnostics::UnicodeEscapeSequence(range));
return;
@ -167,7 +167,7 @@ impl<'a> Lexer<'a> {
Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32),
_ => return None,
};
self.current.chars.next();
self.consume_char();
Some(value)
}
@ -196,8 +196,8 @@ impl<'a> Lexer<'a> {
return Some(SurrogatePair::CodePoint(high));
}
self.current.chars.next();
self.current.chars.next();
self.next_char();
self.next_char();
let low = self.hex_4_digits()?;
@ -219,7 +219,7 @@ impl<'a> Lexer<'a> {
in_template: bool,
is_valid_escape_sequence: &mut bool,
) {
match self.current.chars.next() {
match self.next_char() {
None => {
self.error(diagnostics::UnterminatedString(self.unterminated_range()));
}
@ -299,7 +299,7 @@ impl<'a> Lexer<'a> {
text.push(value);
}
'0' if in_template && self.peek().is_some_and(|c| c.is_ascii_digit()) => {
self.current.chars.next();
self.consume_char();
// error raised within the parser by `diagnostics::TemplateLiteral`
*is_valid_escape_sequence = false;
}