mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
refactor(parser): combine token kinds for skipped tokens (#2072)
Small optimization to the lexer. Whitespace, line breaks, and comments are all skipped by `read_next_token()`. At present there's a different `Kind` for each, and `read_next_token()` decides whether to skip with `matches!(kind, Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment)`. These `Kind`s are used for no other purpose, so there seems little reason to differentiate them. This PR combines them all into `Kind::Skip`, so then the test of whether to skip is reduced to `kind == Kind::Skip`. Only produces ~0.3% performance bump on parser benchmarks. But, why not?...
This commit is contained in:
parent
d7ecd21801
commit
0e32618664
2 changed files with 10 additions and 20 deletions
|
|
@ -8,10 +8,7 @@ pub enum Kind {
|
|||
Undetermined,
|
||||
#[default]
|
||||
Eof,
|
||||
WhiteSpace,
|
||||
NewLine,
|
||||
Comment,
|
||||
MultiLineComment,
|
||||
Skip, // Whitespace, line breaks, comments
|
||||
// 12.5 Hashbang Comments
|
||||
HashbangComment,
|
||||
// 12.7.1 identifier
|
||||
|
|
@ -482,11 +479,8 @@ impl Kind {
|
|||
match self {
|
||||
Undetermined => "Unknown",
|
||||
Eof => "EOF",
|
||||
NewLine => "\n",
|
||||
Comment => "//",
|
||||
MultiLineComment => "/** */",
|
||||
Skip => "Skipped",
|
||||
HashbangComment => "#!",
|
||||
WhiteSpace => " ",
|
||||
Ident => "Identifier",
|
||||
Await => "await",
|
||||
Break => "break",
|
||||
|
|
|
|||
|
|
@ -383,11 +383,7 @@ impl<'a> Lexer<'a> {
|
|||
// SAFETY: Check for `remaining.is_empty()` ensures not at end of file,
|
||||
// and `byte` is the byte at current position of `self.current.chars`.
|
||||
let kind = unsafe { handle_byte(byte, self) };
|
||||
|
||||
if !matches!(
|
||||
kind,
|
||||
Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
|
||||
) {
|
||||
if kind != Kind::Skip {
|
||||
return kind;
|
||||
}
|
||||
}
|
||||
|
|
@ -407,12 +403,12 @@ impl<'a> Lexer<'a> {
|
|||
self.trivia_builder
|
||||
.add_irregular_whitespace(self.current.token.start, self.offset());
|
||||
self.consume_char();
|
||||
Kind::WhiteSpace
|
||||
Kind::Skip
|
||||
}
|
||||
c if is_irregular_line_terminator(c) => {
|
||||
self.consume_char();
|
||||
self.current.token.is_on_new_line = true;
|
||||
Kind::NewLine
|
||||
Kind::Skip
|
||||
}
|
||||
_ => {
|
||||
self.consume_char();
|
||||
|
|
@ -431,12 +427,12 @@ impl<'a> Lexer<'a> {
|
|||
self.current.token.is_on_new_line = true;
|
||||
self.trivia_builder
|
||||
.add_single_line_comment(start, self.offset() - c.len_utf8() as u32);
|
||||
return Kind::Comment;
|
||||
return Kind::Skip;
|
||||
}
|
||||
}
|
||||
// EOF
|
||||
self.trivia_builder.add_single_line_comment(start, self.offset());
|
||||
Kind::Comment
|
||||
Kind::Skip
|
||||
}
|
||||
|
||||
/// Section 12.4 Multi Line Comment
|
||||
|
|
@ -444,7 +440,7 @@ impl<'a> Lexer<'a> {
|
|||
while let Some(c) = self.current.chars.next() {
|
||||
if c == '*' && self.next_eq('/') {
|
||||
self.trivia_builder.add_multi_line_comment(self.current.token.start, self.offset());
|
||||
return Kind::MultiLineComment;
|
||||
return Kind::Skip;
|
||||
}
|
||||
if is_line_terminator(c) {
|
||||
self.current.token.is_on_new_line = true;
|
||||
|
|
@ -1393,14 +1389,14 @@ ascii_byte_handler!(ERR(lexer) {
|
|||
// <SPACE> <TAB> <VT> <FF>
|
||||
ascii_byte_handler!(SPS(lexer) {
|
||||
lexer.consume_char();
|
||||
Kind::WhiteSpace
|
||||
Kind::Skip
|
||||
});
|
||||
|
||||
// '\r' '\n'
|
||||
ascii_byte_handler!(LIN(lexer) {
|
||||
lexer.consume_char();
|
||||
lexer.current.token.is_on_new_line = true;
|
||||
Kind::NewLine
|
||||
Kind::Skip
|
||||
});
|
||||
|
||||
// !
|
||||
|
|
|
|||
Loading…
Reference in a new issue