perf(lexer): reduce an extra branch from peek (#841)

This commit is contained in:
Boshen 2023-09-03 00:02:42 +08:00 committed by GitHub
parent 53b094e46c
commit d25355c9e8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -20,7 +20,7 @@ use oxc_span::{SourceType, Span};
use oxc_syntax::{ use oxc_syntax::{
identifier::{ identifier::{
is_identifier_part, is_identifier_start_all, is_irregular_line_terminator, is_identifier_part, is_identifier_start_all, is_irregular_line_terminator,
is_irregular_whitespace, is_line_terminator, CR, EOF, FF, LF, LS, PS, TAB, VT, is_irregular_whitespace, is_line_terminator, CR, FF, LF, LS, PS, TAB, VT,
}, },
unicode_id_start::is_id_start_unicode, unicode_id_start::is_id_start_unicode,
}; };
@ -84,7 +84,7 @@ impl<'a> Lexer<'a> {
source_type, source_type,
current, current,
errors: vec![], errors: vec![],
lookahead: VecDeque::with_capacity(4), lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript
context: LexerContext::Regular, context: LexerContext::Regular,
trivia_builder: TriviaBuilder::default(), trivia_builder: TriviaBuilder::default(),
} }
@ -254,21 +254,22 @@ impl<'a> Lexer<'a> {
/// Peek the next char without advancing the position /// Peek the next char without advancing the position
#[inline] #[inline]
fn peek(&self) -> char { fn peek(&self) -> Option<char> {
self.current.chars.clone().next().unwrap_or(EOF) self.current.chars.clone().next()
} }
/// Peek the next next char without advancing the position /// Peek the next next char without advancing the position
fn peek2(&self) -> char { #[inline]
fn peek2(&self) -> Option<char> {
let mut chars = self.current.chars.clone(); let mut chars = self.current.chars.clone();
chars.next(); chars.next();
chars.next().unwrap_or(EOF) chars.next()
} }
/// Peek the next character, and advance the current position if it matches /// Peek the next character, and advance the current position if it matches
#[inline] #[inline]
fn next_eq(&mut self, c: char) -> bool { fn next_eq(&mut self, c: char) -> bool {
let matched = self.peek() == c; let matched = self.peek() == Some(c);
if matched { if matched {
self.current.chars.next(); self.current.chars.next();
} }
@ -282,11 +283,10 @@ impl<'a> Lexer<'a> {
/// Return `IllegalCharacter` Error or `UnexpectedEnd` if EOF /// Return `IllegalCharacter` Error or `UnexpectedEnd` if EOF
fn unexpected_err(&mut self) { fn unexpected_err(&mut self) {
let c = self.peek(); let offset = self.current_offset();
if c == EOF { match self.peek() {
self.error(diagnostics::UnexpectedEnd(self.current_offset())); Some(c) => self.error(diagnostics::InvalidCharacter(c, offset)),
} else { None => self.error(diagnostics::UnexpectedEnd(offset)),
self.error(diagnostics::InvalidCharacter(c, self.current_offset()));
} }
} }
@ -419,8 +419,7 @@ impl<'a> Lexer<'a> {
/// Section 12.6.1 Identifier Names /// Section 12.6.1 Identifier Names
fn identifier_tail(&mut self, mut builder: AutoCow<'a>) -> (bool, &'a str) { fn identifier_tail(&mut self, mut builder: AutoCow<'a>) -> (bool, &'a str) {
// ident tail // ident tail
loop { while let Some(c) = self.peek() {
let c = self.peek();
if !is_identifier_part(c) { if !is_identifier_part(c) {
if c == '\\' { if c == '\\' {
self.current.chars.next(); self.current.chars.next();
@ -453,12 +452,12 @@ impl<'a> Lexer<'a> {
/// Section 12.7 Punctuators /// Section 12.7 Punctuators
fn read_dot(&mut self, builder: &mut AutoCow<'a>) -> Kind { fn read_dot(&mut self, builder: &mut AutoCow<'a>) -> Kind {
if self.peek() == '.' && self.peek2() == '.' { if self.peek() == Some('.') && self.peek2() == Some('.') {
self.current.chars.next(); self.current.chars.next();
self.current.chars.next(); self.current.chars.next();
return Kind::Dot3; return Kind::Dot3;
} }
if self.peek().is_ascii_digit() { if self.peek().is_some_and(|c| c.is_ascii_digit()) {
builder.push_matching('.'); builder.push_matching('.');
self.decimal_literal_after_decimal_point(builder) self.decimal_literal_after_decimal_point(builder)
} else { } else {
@ -476,7 +475,7 @@ impl<'a> Lexer<'a> {
} }
} else if self.next_eq('=') { } else if self.next_eq('=') {
Some(Kind::LtEq) Some(Kind::LtEq)
} else if self.peek() == '!' } else if self.peek() == Some('!')
// SingleLineHTMLOpenComment `<!--` in script mode // SingleLineHTMLOpenComment `<!--` in script mode
&& self.source_type.is_script() && self.source_type.is_script()
&& self.remaining().starts_with("!--") && self.remaining().starts_with("!--")
@ -557,25 +556,25 @@ impl<'a> Lexer<'a> {
/// 12.8.3 Numeric Literals with `0` prefix /// 12.8.3 Numeric Literals with `0` prefix
fn read_zero(&mut self, builder: &mut AutoCow<'a>) -> Kind { fn read_zero(&mut self, builder: &mut AutoCow<'a>) -> Kind {
match self.peek() { match self.peek() {
'b' | 'B' => self.read_non_decimal(Kind::Binary, builder), Some('b' | 'B') => self.read_non_decimal(Kind::Binary, builder),
'o' | 'O' => self.read_non_decimal(Kind::Octal, builder), Some('o' | 'O') => self.read_non_decimal(Kind::Octal, builder),
'x' | 'X' => self.read_non_decimal(Kind::Hex, builder), Some('x' | 'X') => self.read_non_decimal(Kind::Hex, builder),
c @ ('e' | 'E') => { Some(c @ ('e' | 'E')) => {
self.current.chars.next(); self.current.chars.next();
builder.push_matching(c); builder.push_matching(c);
self.read_decimal_exponent(builder) self.read_decimal_exponent(builder)
} }
'.' => { Some('.') => {
self.current.chars.next(); self.current.chars.next();
builder.push_matching('.'); builder.push_matching('.');
self.decimal_literal_after_decimal_point_after_digits(builder) self.decimal_literal_after_decimal_point_after_digits(builder)
} }
'n' => { Some('n') => {
self.current.chars.next(); self.current.chars.next();
builder.push_matching('n'); builder.push_matching('n');
self.check_after_numeric_literal(Kind::Decimal) self.check_after_numeric_literal(Kind::Decimal)
} }
n if n.is_ascii_digit() => self.read_legacy_octal(builder), Some(n) if n.is_ascii_digit() => self.read_legacy_octal(builder),
_ => self.check_after_numeric_literal(Kind::Decimal), _ => self.check_after_numeric_literal(Kind::Decimal),
} }
} }
@ -584,7 +583,7 @@ impl<'a> Lexer<'a> {
let c = self.current.chars.next().unwrap(); let c = self.current.chars.next().unwrap();
builder.push_matching(c); builder.push_matching(c);
if kind.matches_number_char(self.peek()) { if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
let c = self.current.chars.next().unwrap(); let c = self.current.chars.next().unwrap();
builder.push_matching(c); builder.push_matching(c);
} else { } else {
@ -592,14 +591,13 @@ impl<'a> Lexer<'a> {
return Kind::Undetermined; return Kind::Undetermined;
} }
loop { while let Some(c) = self.peek() {
match self.peek() { match c {
'_' => { '_' => {
self.current.chars.next(); self.current.chars.next();
builder.force_allocation_without_current_ascii_char(self); builder.force_allocation_without_current_ascii_char(self);
let c = self.peek(); if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
if kind.matches_number_char(c) { let c = self.current.chars.next().unwrap();
self.current.chars.next();
builder.push_matching(c); builder.push_matching(c);
} else { } else {
self.unexpected_err(); self.unexpected_err();
@ -613,7 +611,7 @@ impl<'a> Lexer<'a> {
_ => break, _ => break,
} }
} }
if self.peek() == 'n' { if self.peek() == Some('n') {
self.current.chars.next(); self.current.chars.next();
builder.push_matching('n'); builder.push_matching('n');
} }
@ -624,10 +622,10 @@ impl<'a> Lexer<'a> {
let mut kind = Kind::Octal; let mut kind = Kind::Octal;
loop { loop {
match self.peek() { match self.peek() {
'0'..='7' => { Some('0'..='7') => {
self.current.chars.next(); self.current.chars.next();
} }
'8'..='9' => { Some('8'..='9') => {
self.current.chars.next(); self.current.chars.next();
kind = Kind::Decimal; kind = Kind::Decimal;
} }
@ -637,13 +635,13 @@ impl<'a> Lexer<'a> {
match self.peek() { match self.peek() {
// allow 08.5 and 09.5 // allow 08.5 and 09.5
'.' if kind == Kind::Decimal => { Some('.') if kind == Kind::Decimal => {
self.current.chars.next(); self.current.chars.next();
builder.push_matching('.'); builder.push_matching('.');
self.decimal_literal_after_decimal_point_after_digits(builder) self.decimal_literal_after_decimal_point_after_digits(builder)
} }
// allow 08e1 and 09e1 // allow 08e1 and 09e1
'e' if kind == Kind::Decimal => { Some('e') if kind == Kind::Decimal => {
self.current.chars.next(); self.current.chars.next();
builder.push_matching('e'); builder.push_matching('e');
self.read_decimal_exponent(builder) self.read_decimal_exponent(builder)
@ -668,12 +666,12 @@ impl<'a> Lexer<'a> {
fn read_decimal_exponent(&mut self, builder: &mut AutoCow<'a>) -> Kind { fn read_decimal_exponent(&mut self, builder: &mut AutoCow<'a>) -> Kind {
let kind = match self.peek() { let kind = match self.peek() {
'-' => { Some('-') => {
self.current.chars.next(); self.current.chars.next();
builder.push_matching('-'); builder.push_matching('-');
Kind::NegativeExponential Kind::NegativeExponential
} }
'+' => { Some('+') => {
self.current.chars.next(); self.current.chars.next();
builder.push_matching('+'); builder.push_matching('+');
Kind::PositiveExponential Kind::PositiveExponential
@ -685,7 +683,7 @@ impl<'a> Lexer<'a> {
} }
fn read_decimal_digits(&mut self, builder: &mut AutoCow<'a>) { fn read_decimal_digits(&mut self, builder: &mut AutoCow<'a>) {
if self.peek().is_ascii_digit() { if self.peek().is_some_and(|c| c.is_ascii_digit()) {
let c = self.current.chars.next().unwrap(); let c = self.current.chars.next().unwrap();
builder.push_matching(c); builder.push_matching(c);
} else { } else {
@ -697,12 +695,12 @@ impl<'a> Lexer<'a> {
} }
fn read_decimal_digits_after_first_digit(&mut self, builder: &mut AutoCow<'a>) { fn read_decimal_digits_after_first_digit(&mut self, builder: &mut AutoCow<'a>) {
loop { while let Some(c) = self.peek() {
match self.peek() { match c {
'_' => { '_' => {
self.current.chars.next(); self.current.chars.next();
builder.force_allocation_without_current_ascii_char(self); builder.force_allocation_without_current_ascii_char(self);
if self.peek().is_ascii_digit() { if self.peek().is_some_and(|c| c.is_ascii_digit()) {
let c = self.current.chars.next().unwrap(); let c = self.current.chars.next().unwrap();
builder.push_matching(c); builder.push_matching(c);
} else { } else {
@ -735,7 +733,7 @@ impl<'a> Lexer<'a> {
} }
fn optional_decimal_digits(&mut self, builder: &mut AutoCow<'a>) { fn optional_decimal_digits(&mut self, builder: &mut AutoCow<'a>) {
if self.peek().is_ascii_digit() { if self.peek().is_some_and(|c| c.is_ascii_digit()) {
let c = self.current.chars.next().unwrap(); let c = self.current.chars.next().unwrap();
builder.push_matching(c); builder.push_matching(c);
} else { } else {
@ -745,7 +743,7 @@ impl<'a> Lexer<'a> {
} }
fn optional_exponent(&mut self, builder: &mut AutoCow<'a>) -> Option<Kind> { fn optional_exponent(&mut self, builder: &mut AutoCow<'a>) -> Option<Kind> {
if matches!(self.peek(), 'e' | 'E') { if matches!(self.peek(), Some('e' | 'E')) {
let c = self.current.chars.next().unwrap(); let c = self.current.chars.next().unwrap();
builder.push_matching(c); builder.push_matching(c);
return Some(self.read_decimal_exponent(builder)); return Some(self.read_decimal_exponent(builder));
@ -756,14 +754,13 @@ impl<'a> Lexer<'a> {
fn check_after_numeric_literal(&mut self, kind: Kind) -> Kind { fn check_after_numeric_literal(&mut self, kind: Kind) -> Kind {
let offset = self.offset(); let offset = self.offset();
// The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit. // The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit.
let ch = self.peek(); let c = self.peek();
if !ch.is_ascii_digit() && !is_identifier_start_all(ch) { if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start_all(ch)) {
return kind; return kind;
} }
self.current.chars.next(); self.current.chars.next();
loop { while let Some(c) = self.peek() {
let c = self.peek(); if is_identifier_start_all(c) {
if c != EOF && is_identifier_start_all(c) {
self.current.chars.next(); self.current.chars.next();
} else { } else {
break; break;
@ -843,7 +840,7 @@ impl<'a> Lexer<'a> {
let mut flags = RegExpFlags::empty(); let mut flags = RegExpFlags::empty();
while let ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9') = self.peek() { while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
self.current.chars.next(); self.current.chars.next();
if !ch.is_ascii_lowercase() { if !ch.is_ascii_lowercase() {
self.error(diagnostics::RegExpFlag(ch, self.current_offset())); self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
@ -881,7 +878,7 @@ impl<'a> Lexer<'a> {
let mut is_valid_escape_sequence = true; let mut is_valid_escape_sequence = true;
while let Some(c) = self.current.chars.next() { while let Some(c) = self.current.chars.next() {
match c { match c {
'$' if self.peek() == '{' => { '$' if self.peek() == Some('{') => {
if is_valid_escape_sequence { if is_valid_escape_sequence {
self.current.token.value = self.current.token.value =
self.string_to_token_value(builder.finish_without_push(self)); self.string_to_token_value(builder.finish_without_push(self));
@ -921,15 +918,13 @@ impl<'a> Lexer<'a> {
let prev_str = &self.source[start_offset as usize..self.offset() as usize]; let prev_str = &self.source[start_offset as usize..self.offset() as usize];
let mut builder = AutoCow::new(self); let mut builder = AutoCow::new(self);
loop { while let Some(c) = self.peek() {
let c = self.peek();
if c == '-' || is_identifier_start_all(c) { if c == '-' || is_identifier_start_all(c) {
self.current.chars.next(); self.current.chars.next();
builder.push_matching(c); builder.push_matching(c);
loop { while let Some(c) = self.peek() {
let c = self.peek();
if is_identifier_part(c) { if is_identifier_part(c) {
self.current.chars.next(); let c = self.current.chars.next().unwrap();
builder.push_matching(c); builder.push_matching(c);
} else { } else {
break; break;
@ -953,22 +948,21 @@ impl<'a> Lexer<'a> {
/// { `JSXChildExpressionopt` } /// { `JSXChildExpressionopt` }
fn read_jsx_child(&mut self) -> Kind { fn read_jsx_child(&mut self) -> Kind {
match self.peek() { match self.peek() {
'<' => { Some('<') => {
self.current.chars.next(); self.current.chars.next();
Kind::LAngle Kind::LAngle
} }
'{' => { Some('{') => {
self.current.chars.next(); self.current.chars.next();
Kind::LCurly Kind::LCurly
} }
EOF => Kind::Eof, Some(c) => {
c => {
let mut builder = AutoCow::new(self); let mut builder = AutoCow::new(self);
builder.push_matching(c); builder.push_matching(c);
loop { loop {
// `>` and `}` are errors in TypeScript but not Babel // `>` and `}` are errors in TypeScript but not Babel
// let's make this less strict so we can parse more code // let's make this less strict so we can parse more code
if matches!(self.peek(), '{' | '<') { if matches!(self.peek(), Some('{' | '<')) {
break; break;
} }
if let Some(c) = self.current.chars.next() { if let Some(c) = self.current.chars.next() {
@ -980,6 +974,7 @@ impl<'a> Lexer<'a> {
self.current.token.value = self.string_to_token_value(builder.finish(self)); self.current.token.value = self.string_to_token_value(builder.finish(self));
Kind::JSXText Kind::JSXText
} }
None => Kind::Eof,
} }
} }
@ -1034,7 +1029,7 @@ impl<'a> Lexer<'a> {
} }
let value = match self.peek() { let value = match self.peek() {
'{' => self.unicode_code_point(), Some('{') => self.unicode_code_point(),
_ => self.surrogate_pair(), _ => self.surrogate_pair(),
}; };
@ -1086,7 +1081,7 @@ impl<'a> Lexer<'a> {
is_valid_escape_sequence: &mut bool, is_valid_escape_sequence: &mut bool,
) { ) {
let value = match self.peek() { let value = match self.peek() {
'{' => self.unicode_code_point(), Some('{') => self.unicode_code_point(),
_ => self.surrogate_pair(), _ => self.surrogate_pair(),
}; };
@ -1137,9 +1132,9 @@ impl<'a> Lexer<'a> {
fn hex_digit(&mut self) -> Option<u32> { fn hex_digit(&mut self) -> Option<u32> {
let value = match self.peek() { let value = match self.peek() {
c @ '0'..='9' => c as u32 - '0' as u32, Some(c @ '0'..='9') => c as u32 - '0' as u32,
c @ 'a'..='f' => 10 + (c as u32 - 'a' as u32), Some(c @ 'a'..='f') => 10 + (c as u32 - 'a' as u32),
c @ 'A'..='F' => 10 + (c as u32 - 'A' as u32), Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32),
_ => return None, _ => return None,
}; };
self.current.chars.next(); self.current.chars.next();
@ -1164,7 +1159,10 @@ impl<'a> Lexer<'a> {
fn surrogate_pair(&mut self) -> Option<SurrogatePair> { fn surrogate_pair(&mut self) -> Option<SurrogatePair> {
let high = self.hex_4_digits()?; let high = self.hex_4_digits()?;
// The first code unit of a surrogate pair is always in the range from 0xD800 to 0xDBFF, and is called a high surrogate or a lead surrogate. // The first code unit of a surrogate pair is always in the range from 0xD800 to 0xDBFF, and is called a high surrogate or a lead surrogate.
if !((0xD800..=0xDBFF).contains(&high) && self.peek() == '\\' && self.peek2() == 'u') { if !((0xD800..=0xDBFF).contains(&high)
&& self.peek() == Some('\\')
&& self.peek2() == Some('u'))
{
return Some(SurrogatePair::CodePoint(high)); return Some(SurrogatePair::CodePoint(high));
} }
@ -1231,7 +1229,7 @@ impl<'a> Lexer<'a> {
self.string_unicode_escape_sequence(text, is_valid_escape_sequence); self.string_unicode_escape_sequence(text, is_valid_escape_sequence);
} }
// 0 [lookahead ∉ DecimalDigit] // 0 [lookahead ∉ DecimalDigit]
'0' if !self.peek().is_ascii_digit() => text.push('\0'), '0' if !self.peek().is_some_and(|c| c.is_ascii_digit()) => text.push('\0'),
// Section 12.8.4 String Literals // Section 12.8.4 String Literals
// LegacyOctalEscapeSequence // LegacyOctalEscapeSequence
// NonOctalDecimalEscapeSequence // NonOctalDecimalEscapeSequence
@ -1240,16 +1238,16 @@ impl<'a> Lexer<'a> {
num.push(a); num.push(a);
match a { match a {
'4'..='7' => { '4'..='7' => {
if matches!(self.peek(), '0'..='7') { if matches!(self.peek(), Some('0'..='7')) {
let b = self.current.chars.next().unwrap(); let b = self.current.chars.next().unwrap();
num.push(b); num.push(b);
} }
} }
'0'..='3' => { '0'..='3' => {
if matches!(self.peek(), '0'..='7') { if matches!(self.peek(), Some('0'..='7')) {
let b = self.current.chars.next().unwrap(); let b = self.current.chars.next().unwrap();
num.push(b); num.push(b);
if matches!(self.peek(), '0'..='7') { if matches!(self.peek(), Some('0'..='7')) {
let c = self.current.chars.next().unwrap(); let c = self.current.chars.next().unwrap();
num.push(c); num.push(c);
} }
@ -1262,7 +1260,7 @@ impl<'a> Lexer<'a> {
char::from_u32(u32::from_str_radix(num.as_str(), 8).unwrap()).unwrap(); char::from_u32(u32::from_str_radix(num.as_str(), 8).unwrap()).unwrap();
text.push(value); text.push(value);
} }
'0' if in_template && self.peek().is_ascii_digit() => { '0' if in_template && self.peek().is_some_and(|c| c.is_ascii_digit()) => {
self.current.chars.next(); self.current.chars.next();
// error raised within the parser by `diagnostics::TemplateLiteral` // error raised within the parser by `diagnostics::TemplateLiteral`
*is_valid_escape_sequence = false; *is_valid_escape_sequence = false;
@ -1464,11 +1462,11 @@ const PRD: ByteHandler = |lexer| {
const SLH: ByteHandler = |lexer| { const SLH: ByteHandler = |lexer| {
lexer.consume_char(); lexer.consume_char();
match lexer.peek() { match lexer.peek() {
'/' => { Some('/') => {
lexer.current.chars.next(); lexer.current.chars.next();
lexer.skip_single_line_comment() lexer.skip_single_line_comment()
} }
'*' => { Some('*') => {
lexer.current.chars.next(); lexer.current.chars.next();
lexer.skip_multi_line_comment() lexer.skip_multi_line_comment()
} }
@ -1553,9 +1551,9 @@ const QST: ByteHandler = |lexer| {
} else { } else {
Kind::Question2 Kind::Question2
} }
} else if lexer.peek() == '.' { } else if lexer.peek() == Some('.') {
// parse `?.1` as `?` `.1` // parse `?.1` as `?` `.1`
if lexer.peek2().is_ascii_digit() { if lexer.peek2().is_some_and(|c| c.is_ascii_digit()) {
Kind::Question Kind::Question
} else { } else {
lexer.current.chars.next(); lexer.current.chars.next();