mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 20:32:10 +00:00
refactor(parser): combine token kinds for skipped tokens (#2072)
Small optimization to the lexer. Whitespace, line breaks, and comments are all skipped by `read_next_token()`. At present there's a different `Kind` for each, and `read_next_token()` decides whether to skip with `matches!(kind, Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment)`. These `Kind`s are used for no other purpose, so there seems little reason to differentiate them. This PR combines them all into `Kind::Skip`, so then the test of whether to skip is reduced to `kind == Kind::Skip`. Only produces ~0.3% performance bump on parser benchmarks. But, why not?...
This commit is contained in:
parent
d7ecd21801
commit
0e32618664
2 changed files with 10 additions and 20 deletions
|
|
@ -8,10 +8,7 @@ pub enum Kind {
|
||||||
Undetermined,
|
Undetermined,
|
||||||
#[default]
|
#[default]
|
||||||
Eof,
|
Eof,
|
||||||
WhiteSpace,
|
Skip, // Whitespace, line breaks, comments
|
||||||
NewLine,
|
|
||||||
Comment,
|
|
||||||
MultiLineComment,
|
|
||||||
// 12.5 Hashbang Comments
|
// 12.5 Hashbang Comments
|
||||||
HashbangComment,
|
HashbangComment,
|
||||||
// 12.7.1 identifier
|
// 12.7.1 identifier
|
||||||
|
|
@ -482,11 +479,8 @@ impl Kind {
|
||||||
match self {
|
match self {
|
||||||
Undetermined => "Unknown",
|
Undetermined => "Unknown",
|
||||||
Eof => "EOF",
|
Eof => "EOF",
|
||||||
NewLine => "\n",
|
Skip => "Skipped",
|
||||||
Comment => "//",
|
|
||||||
MultiLineComment => "/** */",
|
|
||||||
HashbangComment => "#!",
|
HashbangComment => "#!",
|
||||||
WhiteSpace => " ",
|
|
||||||
Ident => "Identifier",
|
Ident => "Identifier",
|
||||||
Await => "await",
|
Await => "await",
|
||||||
Break => "break",
|
Break => "break",
|
||||||
|
|
|
||||||
|
|
@ -383,11 +383,7 @@ impl<'a> Lexer<'a> {
|
||||||
// SAFETY: Check for `remaining.is_empty()` ensures not at end of file,
|
// SAFETY: Check for `remaining.is_empty()` ensures not at end of file,
|
||||||
// and `byte` is the byte at current position of `self.current.chars`.
|
// and `byte` is the byte at current position of `self.current.chars`.
|
||||||
let kind = unsafe { handle_byte(byte, self) };
|
let kind = unsafe { handle_byte(byte, self) };
|
||||||
|
if kind != Kind::Skip {
|
||||||
if !matches!(
|
|
||||||
kind,
|
|
||||||
Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
|
|
||||||
) {
|
|
||||||
return kind;
|
return kind;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -407,12 +403,12 @@ impl<'a> Lexer<'a> {
|
||||||
self.trivia_builder
|
self.trivia_builder
|
||||||
.add_irregular_whitespace(self.current.token.start, self.offset());
|
.add_irregular_whitespace(self.current.token.start, self.offset());
|
||||||
self.consume_char();
|
self.consume_char();
|
||||||
Kind::WhiteSpace
|
Kind::Skip
|
||||||
}
|
}
|
||||||
c if is_irregular_line_terminator(c) => {
|
c if is_irregular_line_terminator(c) => {
|
||||||
self.consume_char();
|
self.consume_char();
|
||||||
self.current.token.is_on_new_line = true;
|
self.current.token.is_on_new_line = true;
|
||||||
Kind::NewLine
|
Kind::Skip
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
self.consume_char();
|
self.consume_char();
|
||||||
|
|
@ -431,12 +427,12 @@ impl<'a> Lexer<'a> {
|
||||||
self.current.token.is_on_new_line = true;
|
self.current.token.is_on_new_line = true;
|
||||||
self.trivia_builder
|
self.trivia_builder
|
||||||
.add_single_line_comment(start, self.offset() - c.len_utf8() as u32);
|
.add_single_line_comment(start, self.offset() - c.len_utf8() as u32);
|
||||||
return Kind::Comment;
|
return Kind::Skip;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// EOF
|
// EOF
|
||||||
self.trivia_builder.add_single_line_comment(start, self.offset());
|
self.trivia_builder.add_single_line_comment(start, self.offset());
|
||||||
Kind::Comment
|
Kind::Skip
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Section 12.4 Multi Line Comment
|
/// Section 12.4 Multi Line Comment
|
||||||
|
|
@ -444,7 +440,7 @@ impl<'a> Lexer<'a> {
|
||||||
while let Some(c) = self.current.chars.next() {
|
while let Some(c) = self.current.chars.next() {
|
||||||
if c == '*' && self.next_eq('/') {
|
if c == '*' && self.next_eq('/') {
|
||||||
self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
|
self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
|
||||||
return Kind::MultiLineComment;
|
return Kind::Skip;
|
||||||
}
|
}
|
||||||
if is_line_terminator(c) {
|
if is_line_terminator(c) {
|
||||||
self.current.token.is_on_new_line = true;
|
self.current.token.is_on_new_line = true;
|
||||||
|
|
@ -1393,14 +1389,14 @@ ascii_byte_handler!(ERR(lexer) {
|
||||||
// <SPACE> <TAB> <VT> <FF>
|
// <SPACE> <TAB> <VT> <FF>
|
||||||
ascii_byte_handler!(SPS(lexer) {
|
ascii_byte_handler!(SPS(lexer) {
|
||||||
lexer.consume_char();
|
lexer.consume_char();
|
||||||
Kind::WhiteSpace
|
Kind::Skip
|
||||||
});
|
});
|
||||||
|
|
||||||
// '\r' '\n'
|
// '\r' '\n'
|
||||||
ascii_byte_handler!(LIN(lexer) {
|
ascii_byte_handler!(LIN(lexer) {
|
||||||
lexer.consume_char();
|
lexer.consume_char();
|
||||||
lexer.current.token.is_on_new_line = true;
|
lexer.current.token.is_on_new_line = true;
|
||||||
Kind::NewLine
|
Kind::Skip
|
||||||
});
|
});
|
||||||
|
|
||||||
// !
|
// !
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue