refactor(lexer): don't use lexer.current.chars directly (#2237)

This PR replaces most usages of `lexer.current.chars.next()` with
`lexer.consume_char()`, or a new function `lexer.next_char()`.

This is a preparatory step towards replacing the `Chars` iterator with
something more flexible which can also consume bytes (not `char`s), and
this PR was intended as pure refactor. But surprised to see there is a
small performance bump (no idea why!).

There's an additional benefit: Using `consume_char()` everywhere where
we believe there's definitely a char there to be consumed will make
logic errors produce a panic, rather than silently outputting garbage.
This commit is contained in:
overlookmotel 2024-01-31 13:35:46 +00:00 committed by GitHub
parent a79988d5e2
commit 622a2c37fa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 60 additions and 54 deletions

View file

@ -242,11 +242,11 @@ ascii_byte_handler!(SLH(lexer) {
lexer.consume_char(); lexer.consume_char();
match lexer.peek() { match lexer.peek() {
Some('/') => { Some('/') => {
lexer.current.chars.next(); lexer.consume_char();
lexer.skip_single_line_comment() lexer.skip_single_line_comment()
} }
Some('*') => { Some('*') => {
lexer.current.chars.next(); lexer.consume_char();
lexer.skip_multi_line_comment() lexer.skip_multi_line_comment()
} }
_ => { _ => {
@ -327,7 +327,7 @@ ascii_byte_handler!(QST(lexer) {
if lexer.peek2().is_some_and(|c| c.is_ascii_digit()) { if lexer.peek2().is_some_and(|c| c.is_ascii_digit()) {
Kind::Question Kind::Question
} else { } else {
lexer.current.chars.next(); lexer.consume_char();
Kind::QuestionDot Kind::QuestionDot
} }
} else { } else {

View file

@ -8,7 +8,7 @@ impl<'a> Lexer<'a> {
#[allow(clippy::cast_possible_truncation)] #[allow(clippy::cast_possible_truncation)]
pub(super) fn skip_single_line_comment(&mut self) -> Kind { pub(super) fn skip_single_line_comment(&mut self) -> Kind {
let start = self.current.token.start; let start = self.current.token.start;
while let Some(c) = self.current.chars.next() { while let Some(c) = self.next_char() {
if is_line_terminator(c) { if is_line_terminator(c) {
self.current.token.is_on_new_line = true; self.current.token.is_on_new_line = true;
self.trivia_builder self.trivia_builder
@ -23,7 +23,7 @@ impl<'a> Lexer<'a> {
/// Section 12.4 Multi Line Comment /// Section 12.4 Multi Line Comment
pub(super) fn skip_multi_line_comment(&mut self) -> Kind { pub(super) fn skip_multi_line_comment(&mut self) -> Kind {
while let Some(c) = self.current.chars.next() { while let Some(c) = self.next_char() {
if c == '*' && self.next_eq('/') { if c == '*' && self.next_eq('/') {
self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset()); self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
return Kind::Skip; return Kind::Skip;
@ -38,7 +38,7 @@ impl<'a> Lexer<'a> {
/// Section 12.5 Hashbang Comments /// Section 12.5 Hashbang Comments
pub(super) fn read_hashbang_comment(&mut self) -> Kind { pub(super) fn read_hashbang_comment(&mut self) -> Kind {
while let Some(c) = self.current.chars.next().as_ref() { while let Some(c) = self.next_char().as_ref() {
if is_line_terminator(*c) { if is_line_terminator(*c) {
break; break;
} }

View file

@ -18,7 +18,7 @@ impl<'a> Lexer<'a> {
pub(super) fn private_identifier(&mut self) -> Kind { pub(super) fn private_identifier(&mut self) -> Kind {
let mut builder = AutoCow::new(self); let mut builder = AutoCow::new(self);
let start = self.offset(); let start = self.offset();
match self.current.chars.next() { match self.next_char() {
Some(c) if is_identifier_start(c) => { Some(c) if is_identifier_start(c) => {
builder.push_matching(c); builder.push_matching(c);
} }
@ -48,14 +48,14 @@ impl<'a> Lexer<'a> {
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
if !is_identifier_part(c) { if !is_identifier_part(c) {
if c == '\\' { if c == '\\' {
self.current.chars.next(); self.consume_char();
builder.force_allocation_without_current_ascii_char(self); builder.force_allocation_without_current_ascii_char(self);
self.identifier_unicode_escape_sequence(&mut builder, false); self.identifier_unicode_escape_sequence(&mut builder, false);
continue; continue;
} }
break; break;
} }
self.current.chars.next(); self.consume_char();
builder.push_matching(c); builder.push_matching(c);
} }
let has_escape = builder.has_escape(); let has_escape = builder.has_escape();

View file

@ -17,7 +17,7 @@ impl<'a> Lexer<'a> {
pub(super) fn read_jsx_string_literal(&mut self, delimiter: char) -> Kind { pub(super) fn read_jsx_string_literal(&mut self, delimiter: char) -> Kind {
let mut builder = AutoCow::new(self); let mut builder = AutoCow::new(self);
loop { loop {
match self.current.chars.next() { match self.next_char() {
Some(c @ ('"' | '\'')) => { Some(c @ ('"' | '\'')) => {
if c == delimiter { if c == delimiter {
self.save_string(builder.has_escape(), builder.finish_without_push(self)); self.save_string(builder.has_escape(), builder.finish_without_push(self));
@ -58,11 +58,11 @@ impl<'a> Lexer<'a> {
fn read_jsx_child(&mut self) -> Kind { fn read_jsx_child(&mut self) -> Kind {
match self.peek() { match self.peek() {
Some('<') => { Some('<') => {
self.current.chars.next(); self.consume_char();
Kind::LAngle Kind::LAngle
} }
Some('{') => { Some('{') => {
self.current.chars.next(); self.consume_char();
Kind::LCurly Kind::LCurly
} }
Some(_) => { Some(_) => {
@ -74,7 +74,7 @@ impl<'a> Lexer<'a> {
if self.peek().is_some_and(|c| c == '{' || c == '<') { if self.peek().is_some_and(|c| c == '{' || c == '<') {
break; break;
} }
if self.current.chars.next().is_none() { if self.next_char().is_none() {
break; break;
} }
} }
@ -91,10 +91,10 @@ impl<'a> Lexer<'a> {
fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind { fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind {
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
if c == '-' || is_identifier_start(c) { if c == '-' || is_identifier_start(c) {
self.current.chars.next(); self.consume_char();
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
if is_identifier_part(c) { if is_identifier_part(c) {
self.current.chars.next(); self.consume_char();
} else { } else {
break; break;
} }

View file

@ -212,6 +212,12 @@ impl<'a> Lexer<'a> {
Span::new(self.current.token.start, self.offset()) Span::new(self.current.token.start, self.offset())
} }
/// Consume the current char if not at EOF
#[inline]
fn next_char(&mut self) -> Option<char> {
self.current.chars.next()
}
/// Consume the current char /// Consume the current char
#[inline] #[inline]
fn consume_char(&mut self) -> char { fn consume_char(&mut self) -> char {

View file

@ -11,15 +11,15 @@ impl<'a> Lexer<'a> {
Some('o' | 'O') => self.read_non_decimal(Kind::Octal), Some('o' | 'O') => self.read_non_decimal(Kind::Octal),
Some('x' | 'X') => self.read_non_decimal(Kind::Hex), Some('x' | 'X') => self.read_non_decimal(Kind::Hex),
Some('e' | 'E') => { Some('e' | 'E') => {
self.current.chars.next(); self.consume_char();
self.read_decimal_exponent() self.read_decimal_exponent()
} }
Some('.') => { Some('.') => {
self.current.chars.next(); self.consume_char();
self.decimal_literal_after_decimal_point_after_digits() self.decimal_literal_after_decimal_point_after_digits()
} }
Some('n') => { Some('n') => {
self.current.chars.next(); self.consume_char();
self.check_after_numeric_literal(Kind::Decimal) self.check_after_numeric_literal(Kind::Decimal)
} }
Some(n) if n.is_ascii_digit() => self.read_legacy_octal(), Some(n) if n.is_ascii_digit() => self.read_legacy_octal(),
@ -40,10 +40,10 @@ impl<'a> Lexer<'a> {
} }
fn read_non_decimal(&mut self, kind: Kind) -> Kind { fn read_non_decimal(&mut self, kind: Kind) -> Kind {
self.current.chars.next(); self.consume_char();
if self.peek().is_some_and(|c| kind.matches_number_char(c)) { if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
self.current.chars.next(); self.consume_char();
} else { } else {
self.unexpected_err(); self.unexpected_err();
return Kind::Undetermined; return Kind::Undetermined;
@ -52,22 +52,22 @@ impl<'a> Lexer<'a> {
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
match c { match c {
'_' => { '_' => {
self.current.chars.next(); self.consume_char();
if self.peek().is_some_and(|c| kind.matches_number_char(c)) { if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
self.current.chars.next(); self.consume_char();
} else { } else {
self.unexpected_err(); self.unexpected_err();
return Kind::Undetermined; return Kind::Undetermined;
} }
} }
c if kind.matches_number_char(c) => { c if kind.matches_number_char(c) => {
self.current.chars.next(); self.consume_char();
} }
_ => break, _ => break,
} }
} }
if self.peek() == Some('n') { if self.peek() == Some('n') {
self.current.chars.next(); self.consume_char();
} }
self.check_after_numeric_literal(kind) self.check_after_numeric_literal(kind)
} }
@ -77,10 +77,10 @@ impl<'a> Lexer<'a> {
loop { loop {
match self.peek() { match self.peek() {
Some('0'..='7') => { Some('0'..='7') => {
self.current.chars.next(); self.consume_char();
} }
Some('8'..='9') => { Some('8'..='9') => {
self.current.chars.next(); self.consume_char();
kind = Kind::Decimal; kind = Kind::Decimal;
} }
_ => break, _ => break,
@ -90,12 +90,12 @@ impl<'a> Lexer<'a> {
match self.peek() { match self.peek() {
// allow 08.5 and 09.5 // allow 08.5 and 09.5
Some('.') if kind == Kind::Decimal => { Some('.') if kind == Kind::Decimal => {
self.current.chars.next(); self.consume_char();
self.decimal_literal_after_decimal_point_after_digits() self.decimal_literal_after_decimal_point_after_digits()
} }
// allow 08e1 and 09e1 // allow 08e1 and 09e1
Some('e') if kind == Kind::Decimal => { Some('e') if kind == Kind::Decimal => {
self.current.chars.next(); self.consume_char();
self.read_decimal_exponent() self.read_decimal_exponent()
} }
_ => self.check_after_numeric_literal(kind), _ => self.check_after_numeric_literal(kind),
@ -105,11 +105,11 @@ impl<'a> Lexer<'a> {
fn read_decimal_exponent(&mut self) -> Kind { fn read_decimal_exponent(&mut self) -> Kind {
let kind = match self.peek() { let kind = match self.peek() {
Some('-') => { Some('-') => {
self.current.chars.next(); self.consume_char();
Kind::NegativeExponential Kind::NegativeExponential
} }
Some('+') => { Some('+') => {
self.current.chars.next(); self.consume_char();
Kind::PositiveExponential Kind::PositiveExponential
} }
_ => Kind::PositiveExponential, _ => Kind::PositiveExponential,
@ -120,7 +120,7 @@ impl<'a> Lexer<'a> {
fn read_decimal_digits(&mut self) { fn read_decimal_digits(&mut self) {
if self.peek().is_some_and(|c| c.is_ascii_digit()) { if self.peek().is_some_and(|c| c.is_ascii_digit()) {
self.current.chars.next(); self.consume_char();
} else { } else {
self.unexpected_err(); self.unexpected_err();
return; return;
@ -133,16 +133,16 @@ impl<'a> Lexer<'a> {
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
match c { match c {
'_' => { '_' => {
self.current.chars.next(); self.consume_char();
if self.peek().is_some_and(|c| c.is_ascii_digit()) { if self.peek().is_some_and(|c| c.is_ascii_digit()) {
self.current.chars.next(); self.consume_char();
} else { } else {
self.unexpected_err(); self.unexpected_err();
return; return;
} }
} }
'0'..='9' => { '0'..='9' => {
self.current.chars.next(); self.consume_char();
} }
_ => break, _ => break,
} }
@ -163,7 +163,7 @@ impl<'a> Lexer<'a> {
fn optional_decimal_digits(&mut self) { fn optional_decimal_digits(&mut self) {
if self.peek().is_some_and(|c| c.is_ascii_digit()) { if self.peek().is_some_and(|c| c.is_ascii_digit()) {
self.current.chars.next(); self.consume_char();
} else { } else {
return; return;
} }
@ -172,7 +172,7 @@ impl<'a> Lexer<'a> {
fn optional_exponent(&mut self) -> Option<Kind> { fn optional_exponent(&mut self) -> Option<Kind> {
if matches!(self.peek(), Some('e' | 'E')) { if matches!(self.peek(), Some('e' | 'E')) {
self.current.chars.next(); self.consume_char();
return Some(self.read_decimal_exponent()); return Some(self.read_decimal_exponent());
} }
None None
@ -185,10 +185,10 @@ impl<'a> Lexer<'a> {
if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start(ch)) { if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start(ch)) {
return kind; return kind;
} }
self.current.chars.next(); self.consume_char();
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
if is_identifier_start(c) { if is_identifier_start(c) {
self.current.chars.next(); self.consume_char();
} else { } else {
break; break;
} }

View file

@ -4,8 +4,8 @@ impl<'a> Lexer<'a> {
/// Section 12.8 Punctuators /// Section 12.8 Punctuators
pub(super) fn read_dot(&mut self) -> Kind { pub(super) fn read_dot(&mut self) -> Kind {
if self.peek() == Some('.') && self.peek2() == Some('.') { if self.peek() == Some('.') && self.peek2() == Some('.') {
self.current.chars.next(); self.consume_char();
self.current.chars.next(); self.consume_char();
return Kind::Dot3; return Kind::Dot3;
} }
if self.peek().is_some_and(|c| c.is_ascii_digit()) { if self.peek().is_some_and(|c| c.is_ascii_digit()) {

View file

@ -28,7 +28,7 @@ impl<'a> Lexer<'a> {
let mut in_escape = false; let mut in_escape = false;
let mut in_character_class = false; let mut in_character_class = false;
loop { loop {
match self.current.chars.next() { match self.next_char() {
None => { None => {
self.error(diagnostics::UnterminatedRegExp(self.unterminated_range())); self.error(diagnostics::UnterminatedRegExp(self.unterminated_range()));
return (self.offset(), RegExpFlags::empty()); return (self.offset(), RegExpFlags::empty());
@ -59,7 +59,7 @@ impl<'a> Lexer<'a> {
let mut flags = RegExpFlags::empty(); let mut flags = RegExpFlags::empty();
while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() { while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
self.current.chars.next(); self.consume_char();
let flag = if let Ok(flag) = RegExpFlags::try_from(ch) { let flag = if let Ok(flag) = RegExpFlags::try_from(ch) {
flag flag
} else { } else {

View file

@ -6,7 +6,7 @@ impl<'a> Lexer<'a> {
pub(super) fn read_string_literal(&mut self, delimiter: char) -> Kind { pub(super) fn read_string_literal(&mut self, delimiter: char) -> Kind {
let mut builder = AutoCow::new(self); let mut builder = AutoCow::new(self);
loop { loop {
match self.current.chars.next() { match self.next_char() {
None | Some('\r' | '\n') => { None | Some('\r' | '\n') => {
self.error(diagnostics::UnterminatedString(self.unterminated_range())); self.error(diagnostics::UnterminatedString(self.unterminated_range()));
return Kind::Undetermined; return Kind::Undetermined;

View file

@ -15,14 +15,14 @@ impl<'a> AutoCow<'a> {
AutoCow { start, value: None } AutoCow { start, value: None }
} }
// Push a char that matches lexer.chars().next() // Push a char that matches lexer.current.chars().next()
pub fn push_matching(&mut self, c: char) { pub fn push_matching(&mut self, c: char) {
if let Some(text) = &mut self.value { if let Some(text) = &mut self.value {
text.push(c); text.push(c);
} }
} }
// Push a different character than lexer.chars().next(). // Push a different character than lexer.current.chars().next().
// force_allocation_without_current_ascii_char must be called before this. // force_allocation_without_current_ascii_char must be called before this.
pub fn push_different(&mut self, c: char) { pub fn push_different(&mut self, c: char) {
debug_assert!(self.value.is_some()); debug_assert!(self.value.is_some());

View file

@ -8,7 +8,7 @@ impl<'a> Lexer<'a> {
pub(super) fn read_template_literal(&mut self, substitute: Kind, tail: Kind) -> Kind { pub(super) fn read_template_literal(&mut self, substitute: Kind, tail: Kind) -> Kind {
let mut builder = AutoCow::new(self); let mut builder = AutoCow::new(self);
let mut is_valid_escape_sequence = true; let mut is_valid_escape_sequence = true;
while let Some(c) = self.current.chars.next() { while let Some(c) = self.next_char() {
match c { match c {
'$' if self.peek() == Some('{') => { '$' if self.peek() == Some('{') => {
self.save_template_string( self.save_template_string(
@ -16,7 +16,7 @@ impl<'a> Lexer<'a> {
builder.has_escape(), builder.has_escape(),
builder.finish_without_push(self), builder.finish_without_push(self),
); );
self.current.chars.next(); self.consume_char();
return substitute; return substitute;
} }
'`' => { '`' => {

View file

@ -18,7 +18,7 @@ enum SurrogatePair {
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
pub(super) fn unicode_char_handler(&mut self) -> Kind { pub(super) fn unicode_char_handler(&mut self) -> Kind {
let c = self.current.chars.clone().next().unwrap(); let c = self.peek().unwrap();
match c { match c {
c if is_identifier_start_unicode(c) => { c if is_identifier_start_unicode(c) => {
let mut builder = AutoCow::new(self); let mut builder = AutoCow::new(self);
@ -55,7 +55,7 @@ impl<'a> Lexer<'a> {
check_identifier_start: bool, check_identifier_start: bool,
) { ) {
let start = self.offset(); let start = self.offset();
if self.current.chars.next() != Some('u') { if self.next_char() != Some('u') {
let range = Span::new(start, self.offset()); let range = Span::new(start, self.offset());
self.error(diagnostics::UnicodeEscapeSequence(range)); self.error(diagnostics::UnicodeEscapeSequence(range));
return; return;
@ -167,7 +167,7 @@ impl<'a> Lexer<'a> {
Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32), Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32),
_ => return None, _ => return None,
}; };
self.current.chars.next(); self.consume_char();
Some(value) Some(value)
} }
@ -196,8 +196,8 @@ impl<'a> Lexer<'a> {
return Some(SurrogatePair::CodePoint(high)); return Some(SurrogatePair::CodePoint(high));
} }
self.current.chars.next(); self.next_char();
self.current.chars.next(); self.next_char();
let low = self.hex_4_digits()?; let low = self.hex_4_digits()?;
@ -219,7 +219,7 @@ impl<'a> Lexer<'a> {
in_template: bool, in_template: bool,
is_valid_escape_sequence: &mut bool, is_valid_escape_sequence: &mut bool,
) { ) {
match self.current.chars.next() { match self.next_char() {
None => { None => {
self.error(diagnostics::UnterminatedString(self.unterminated_range())); self.error(diagnostics::UnterminatedString(self.unterminated_range()));
} }
@ -299,7 +299,7 @@ impl<'a> Lexer<'a> {
text.push(value); text.push(value);
} }
'0' if in_template && self.peek().is_some_and(|c| c.is_ascii_digit()) => { '0' if in_template && self.peek().is_some_and(|c| c.is_ascii_digit()) => {
self.current.chars.next(); self.consume_char();
// error raised within the parser by `diagnostics::TemplateLiteral` // error raised within the parser by `diagnostics::TemplateLiteral`
*is_valid_escape_sequence = false; *is_valid_escape_sequence = false;
} }