refactor(parser): combine token kinds for skipped tokens (#2072)

Small optimization to the lexer.

Whitespace, line breaks, and comments are all skipped by
`read_next_token()`.

At present there's a different `Kind` for each, and `read_next_token()`
decides whether to skip with `matches!(kind, Kind::WhiteSpace |
Kind::NewLine | Kind::Comment | Kind::MultiLineComment)`.

These `Kind`s are used for no other purpose, so there seems little
reason to differentiate them.

This PR combines them all into `Kind::Skip`, so then the test of whether
to skip is reduced to `kind == Kind::Skip`.

Only produces ~0.3% performance bump on parser benchmarks. But, why
not?...
This commit is contained in:
overlookmotel 2024-01-18 13:14:12 +00:00 committed by GitHub
parent d7ecd21801
commit 0e32618664
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 10 additions and 20 deletions

View file

@ -8,10 +8,7 @@ pub enum Kind {
Undetermined,
#[default]
Eof,
WhiteSpace,
NewLine,
Comment,
MultiLineComment,
Skip, // Whitespace, line breaks, comments
// 12.5 Hashbang Comments
HashbangComment,
// 12.7.1 identifier
@ -482,11 +479,8 @@ impl Kind {
match self {
Undetermined => "Unknown",
Eof => "EOF",
NewLine => "\n",
Comment => "//",
MultiLineComment => "/** */",
Skip => "Skipped",
HashbangComment => "#!",
WhiteSpace => " ",
Ident => "Identifier",
Await => "await",
Break => "break",

View file

@ -383,11 +383,7 @@ impl<'a> Lexer<'a> {
// SAFETY: Check for `remaining.is_empty()` ensures not at end of file,
// and `byte` is the byte at current position of `self.current.chars`.
let kind = unsafe { handle_byte(byte, self) };
if !matches!(
kind,
Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
) {
if kind != Kind::Skip {
return kind;
}
}
@ -407,12 +403,12 @@ impl<'a> Lexer<'a> {
self.trivia_builder
.add_irregular_whitespace(self.current.token.start, self.offset());
self.consume_char();
Kind::WhiteSpace
Kind::Skip
}
c if is_irregular_line_terminator(c) => {
self.consume_char();
self.current.token.is_on_new_line = true;
Kind::NewLine
Kind::Skip
}
_ => {
self.consume_char();
@ -431,12 +427,12 @@ impl<'a> Lexer<'a> {
self.current.token.is_on_new_line = true;
self.trivia_builder
.add_single_line_comment(start, self.offset() - c.len_utf8() as u32);
return Kind::Comment;
return Kind::Skip;
}
}
// EOF
self.trivia_builder.add_single_line_comment(start, self.offset());
Kind::Comment
Kind::Skip
}
/// Section 12.4 Multi Line Comment
@ -444,7 +440,7 @@ impl<'a> Lexer<'a> {
while let Some(c) = self.current.chars.next() {
if c == '*' && self.next_eq('/') {
self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
return Kind::MultiLineComment;
return Kind::Skip;
}
if is_line_terminator(c) {
self.current.token.is_on_new_line = true;
@ -1393,14 +1389,14 @@ ascii_byte_handler!(ERR(lexer) {
// <SPACE> <TAB> <VT> <FF>
ascii_byte_handler!(SPS(lexer) {
lexer.consume_char();
Kind::WhiteSpace
Kind::Skip
});
// '\r' '\n'
ascii_byte_handler!(LIN(lexer) {
lexer.consume_char();
lexer.current.token.is_on_new_line = true;
Kind::NewLine
Kind::Skip
});
// !