From 85955d71473ea8aa2bc873763b7972a53d715cc4 Mon Sep 17 00:00:00 2001 From: Boshen Date: Sun, 12 Feb 2023 21:33:56 +0800 Subject: [PATCH] refactor(parser): clean up some lexer code --- crates/oxc_parser/src/cursor.rs | 8 +- crates/oxc_parser/src/lexer/constants.rs | 256 +++++++++++------------ crates/oxc_parser/src/lexer/mod.rs | 18 +- 3 files changed, 143 insertions(+), 139 deletions(-) diff --git a/crates/oxc_parser/src/cursor.rs b/crates/oxc_parser/src/cursor.rs index fe8cf023b..59fcf3fea 100644 --- a/crates/oxc_parser/src/cursor.rs +++ b/crates/oxc_parser/src/cursor.rs @@ -186,18 +186,18 @@ impl<'a> Parser<'a> { /// Expect the next next token to be a `JsxChild`, i.e. `<` or `{` or `JSXText` /// # Errors pub fn expect_jsx_child(&mut self, kind: Kind) -> Result<()> { - self.lexer.context = LexerContext::JsxChild; + self.lexer.set_context(LexerContext::JsxChild); self.expect(kind)?; - self.lexer.context = LexerContext::Regular; + self.lexer.set_context(LexerContext::Regular); Ok(()) } /// Expect the next next token to be a `JsxString` or any other token /// # Errors pub fn expect_jsx_attribute_value(&mut self, kind: Kind) -> Result<()> { - self.lexer.context = LexerContext::JsxAttributeValue; + self.lexer.set_context(LexerContext::JsxAttributeValue); self.expect(kind)?; - self.lexer.context = LexerContext::Regular; + self.lexer.set_context(LexerContext::Regular); Ok(()) } diff --git a/crates/oxc_parser/src/lexer/constants.rs b/crates/oxc_parser/src/lexer/constants.rs index a89742ecd..ce4f874e0 100644 --- a/crates/oxc_parser/src/lexer/constants.rs +++ b/crates/oxc_parser/src/lexer/constants.rs @@ -83,132 +83,132 @@ pub fn is_identifier_part(c: char) -> bool { } pub const SINGLE_CHAR_TOKENS: &[Kind; 128] = &[ - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::LParen, // 0x28 - Kind::RParen, // 0x29 - Kind::Undetermined, - Kind::Undetermined, - Kind::Comma, // 0x2C - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Colon, // 0x3A - Kind::Semicolon, // 0x3B - Kind::Undetermined, - Kind::Undetermined, - Kind::RAngle, // 0x3E - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::LBrack, // 0x5B - Kind::Undetermined, - Kind::RBrack, // 0x5D - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::Undetermined, - Kind::LCurly, // 0x7B - Kind::Undetermined, - Kind::RCurly, // 0x7D - Kind::Tilde, // 0x7E - Kind::Undetermined, + /* 0 */ Kind::Undetermined, + /* 1 */ Kind::Undetermined, + /* 2 */ Kind::Undetermined, + /* 3 */ Kind::Undetermined, + /* 4 */ Kind::Undetermined, + /* 5 */ Kind::Undetermined, + /* 6 */ Kind::Undetermined, + /* 7 */ Kind::Undetermined, + /* 8 */ Kind::Undetermined, + /* 9 */ Kind::Undetermined, + /* 10 */ Kind::Undetermined, + /* 11 */ Kind::Undetermined, + /* 12 */ Kind::Undetermined, + /* 13 */ Kind::Undetermined, + /* 14 */ Kind::Undetermined, + /* 15 */ Kind::Undetermined, + /* 16 */ Kind::Undetermined, + /* 17 */ Kind::Undetermined, + /* 18 */ Kind::Undetermined, + /* 19 */ Kind::Undetermined, + /* 20 */ Kind::Undetermined, + /* 21 */ Kind::Undetermined, + /* 22 */ Kind::Undetermined, + /* 23 */ Kind::Undetermined, + /* 24 */ Kind::Undetermined, + /* 25 */ Kind::Undetermined, + /* 26 */ Kind::Undetermined, + /* 27 */ Kind::Undetermined, + /* 28 */ Kind::Undetermined, + /* 29 */ Kind::Undetermined, + /* 30 */ Kind::Undetermined, + /* 31 */ Kind::Undetermined, + /* 32 */ Kind::Undetermined, + /* 33 */ Kind::Undetermined, + /* 34 */ Kind::Undetermined, + /* 35 */ Kind::Undetermined, + /* 36 */ Kind::Undetermined, + /* 37 */ Kind::Undetermined, + /* 38 */ Kind::Undetermined, + /* 39 */ Kind::Undetermined, + /* 40 */ Kind::LParen, // 0x28 + /* 41 */ Kind::RParen, // 0x29 + /* 42 */ Kind::Undetermined, + /* 43 */ Kind::Undetermined, + /* 44 */ Kind::Comma, // 0x2C + /* 45 */ Kind::Undetermined, + /* 46 */ Kind::Undetermined, + /* 47 */ Kind::Undetermined, + /* 48 */ Kind::Undetermined, + /* 49 */ Kind::Undetermined, + /* 50 */ Kind::Undetermined, + /* 51 */ Kind::Undetermined, + /* 52 */ Kind::Undetermined, + /* 53 */ Kind::Undetermined, + /* 54 */ Kind::Undetermined, + /* 55 */ Kind::Undetermined, + /* 56 */ Kind::Undetermined, + /* 57 */ Kind::Undetermined, + /* 58 */ Kind::Colon, // 0x3A + /* 59 */ Kind::Semicolon, // 0x3B + /* 60 */ Kind::Undetermined, + /* 61 */ Kind::Undetermined, + /* 62 */ Kind::RAngle, // 0x3E + /* 63 */ Kind::Undetermined, + /* 64 */ Kind::At, + /* 65 */ Kind::Undetermined, + /* 66 */ Kind::Undetermined, + /* 67 */ Kind::Undetermined, + /* 68 */ Kind::Undetermined, + /* 69 */ Kind::Undetermined, + /* 70 */ Kind::Undetermined, + /* 71 */ Kind::Undetermined, + /* 72 */ Kind::Undetermined, + /* 73 */ Kind::Undetermined, + /* 74 */ Kind::Undetermined, + /* 75 */ Kind::Undetermined, + /* 76 */ Kind::Undetermined, + /* 77 */ Kind::Undetermined, + /* 78 */ Kind::Undetermined, + /* 79 */ Kind::Undetermined, + /* 80 */ Kind::Undetermined, + /* 81 */ Kind::Undetermined, + /* 82 */ Kind::Undetermined, + /* 83 */ Kind::Undetermined, + /* 84 */ Kind::Undetermined, + /* 85 */ Kind::Undetermined, + /* 86 */ Kind::Undetermined, + /* 87 */ Kind::Undetermined, + /* 88 */ Kind::Undetermined, + /* 89 */ Kind::Undetermined, + /* 90 */ Kind::Undetermined, + /* 91 */ Kind::LBrack, // 0x5B + /* 92 */ Kind::Undetermined, + /* 93 */ Kind::RBrack, // 0x5D + /* 94 */ Kind::Undetermined, + /* 95 */ Kind::Undetermined, + /* 96 */ Kind::Undetermined, + /* 97 */ Kind::Undetermined, + /* 98 */ Kind::Undetermined, + /* 99 */ Kind::Undetermined, + /* 100 */ Kind::Undetermined, + /* 101 */ Kind::Undetermined, + /* 102 */ Kind::Undetermined, + /* 103 */ Kind::Undetermined, + /* 104 */ Kind::Undetermined, + /* 105 */ Kind::Undetermined, + /* 106 */ Kind::Undetermined, + /* 107 */ Kind::Undetermined, + /* 108 */ Kind::Undetermined, + /* 109 */ Kind::Undetermined, + /* 110 */ Kind::Undetermined, + /* 111 */ Kind::Undetermined, + /* 112 */ Kind::Undetermined, + /* 113 */ Kind::Undetermined, + /* 114 */ Kind::Undetermined, + /* 115 */ Kind::Undetermined, + /* 116 */ Kind::Undetermined, + /* 117 */ Kind::Undetermined, + /* 118 */ Kind::Undetermined, + /* 119 */ Kind::Undetermined, + /* 120 */ Kind::Undetermined, + /* 121 */ Kind::Undetermined, + /* 122 */ Kind::Undetermined, + /* 123 */ Kind::LCurly, // 0x7B + /* 124 */ Kind::Undetermined, + /* 125 */ Kind::RCurly, // 0x7D + /* 126 */ Kind::Tilde, // 0x7E + /* 127 */ Kind::Undetermined, ]; diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index f1fc9e500..cfd80a52f 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -28,9 +28,9 @@ pub use token::{RegExp, Token, TokenValue}; #[derive(Debug, Clone)] pub struct LexerCheckpoint<'a> { /// Remaining chars to be tokenized - pub(crate) chars: Chars<'a>, + chars: Chars<'a>, - pub(crate) token: Token, + token: Token, errors_pos: usize, } @@ -45,19 +45,19 @@ pub enum LexerContext { } pub struct Lexer<'a> { - pub(crate) allocator: &'a Allocator, + allocator: &'a Allocator, source: &'a str, source_type: SourceType, - pub(crate) current: LexerCheckpoint<'a>, + current: LexerCheckpoint<'a>, errors: Diagnostics, lookahead: VecDeque>, - pub context: LexerContext, + context: LexerContext, } #[allow(clippy::unused_self)] @@ -87,13 +87,13 @@ impl<'a> Lexer<'a> { } } + /// Remaining string from `Chars` #[must_use] pub fn remaining(&self) -> &'a str { self.current.chars.as_str() } /// Creates a checkpoint storing the current lexer state. - /// /// Use `rewind` to restore the lexer to the state stored in the checkpoint. #[must_use] pub fn checkpoint(&self) -> LexerCheckpoint<'a> { @@ -145,6 +145,11 @@ impl<'a> Lexer<'a> { &self.lookahead[n - 1].token } + /// Set context + pub fn set_context(&mut self, context: LexerContext) { + self.context = context; + } + /// Main entry point pub fn next_token(&mut self) -> Token { if let Some(checkpoint) = self.lookahead.pop_front() { @@ -414,7 +419,6 @@ impl<'a> Lexer<'a> { self.private_identifier(builder) } } - '@' => Kind::At, '\\' => { builder.force_allocation_without_current_ascii_char(self); self.identifier_unicode_escape_sequence(&mut builder, true);