refactor(parser): make is_identifier methods consistent

2026-05-21 21:29:01 +00:00 · 2024-01-22 14:28:05 +00:00 · 2024-01-22 14:28:05 +00:00 · bc7ea0bedb
commit bc7ea0bedb
parent 27aaff2bef
2 changed files with 33 additions and 23 deletions
--- a/crates/oxc_parser/src/lexer/mod.rs
+++ b/crates/oxc_parser/src/lexer/mod.rs
@ -18,12 +18,10 @@ use oxc_allocator::{Allocator, String};
 use oxc_ast::ast::RegExpFlags;
 use oxc_diagnostics::Error;
 use oxc_span::{SourceType, Span};
-use oxc_syntax::{
+use oxc_syntax::identifier::{
-    identifier::{
+    is_identifier_part, is_identifier_start, is_identifier_start_unicode,
-        is_identifier_part, is_identifier_start_all, is_irregular_line_terminator,
+    is_irregular_line_terminator, is_irregular_whitespace, is_line_terminator, CR, FF, LF, LS, PS,
-        is_irregular_whitespace, is_line_terminator, CR, FF, LF, LS, PS, TAB, VT,
+    TAB, VT,
    },
    unicode_id_start::is_id_start_unicode,
 };
 pub use self::{
@ -392,7 +390,7 @@ impl<'a> Lexer<'a> {
    fn unicode_char_handler(&mut self) -> Kind {
        let c = self.current.chars.clone().next().unwrap();
        match c {
-            c if is_id_start_unicode(c) => {
+            c if is_identifier_start_unicode(c) => {
                let mut builder = AutoCow::new(self);
                let c = self.consume_char();
                builder.push_matching(c);
@ -571,7 +569,7 @@ impl<'a> Lexer<'a> {
        let mut builder = AutoCow::new(self);
        let start = self.offset();
        match self.current.chars.next() {
-            Some(c) if is_identifier_start_all(c) => {
+            Some(c) if is_identifier_start(c) => {
                builder.push_matching(c);
            }
            Some('\\') => {
@ -773,12 +771,12 @@ impl<'a> Lexer<'a> {
        let offset = self.offset();
        // The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit.
        let c = self.peek();
-        if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start_all(ch)) {
+        if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start(ch)) {
            return kind;
        }
        self.current.chars.next();
        while let Some(c) = self.peek() {
-            if is_identifier_start_all(c) {
+            if is_identifier_start(c) {
                self.current.chars.next();
            } else {
                break;
@ -920,7 +918,7 @@ impl<'a> Lexer<'a> {
    ///   `JSXIdentifier` [no `WhiteSpace` or Comment here] -
    fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind {
        while let Some(c) = self.peek() {
-            if c == '-' || is_identifier_start_all(c) {
+            if c == '-' || is_identifier_start(c) {
                self.current.chars.next();
                while let Some(c) = self.peek() {
                    if is_identifier_part(c) {
@ -1049,11 +1047,8 @@ impl<'a> Lexer<'a> {
            }
        };
-        let is_valid = if check_identifier_start {
+        let is_valid =
-            is_identifier_start_all(ch)
+            if check_identifier_start { is_identifier_start(ch) } else { is_identifier_part(ch) };
        } else {
            is_identifier_part(ch)
        };
        if !is_valid {
            self.error(diagnostics::InvalidCharacter(ch, self.current_offset()));
--- a/crates/oxc_syntax/src/identifier.rs
+++ b/crates/oxc_syntax/src/identifier.rs
@ -97,17 +97,22 @@ pub static ASCII_CONTINUE: Align64<[bool; 128]> = Align64([
    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, // 7
 ]);
 /// Section 12.7 Detect `IdentifierStartChar`
 #[inline]
 pub fn is_identifier_start(c: char) -> bool {
    if c.is_ascii() {
        return is_identifier_start_ascii(c);
    }
    is_identifier_start_unicode(c)
 }
 #[inline]
 pub fn is_identifier_start_ascii(c: char) -> bool {
    ASCII_START.0[c as usize]
 }
 /// Section 12.7 Detect `IdentifierStartChar`
 #[inline]
-pub fn is_identifier_start_all(c: char) -> bool {
+pub fn is_identifier_start_unicode(c: char) -> bool {
    if c.is_ascii() {
        return is_identifier_start_ascii(c);
    }
    is_id_start_unicode(c)
 }
@ -116,12 +121,22 @@ pub fn is_identifier_start_all(c: char) -> bool {
 #[inline]
 pub fn is_identifier_part(c: char) -> bool {
    if c.is_ascii() {
-        return ASCII_CONTINUE.0[c as usize];
+        return is_identifier_part_ascii(c);
    }
    is_identifier_part_unicode(c)
 }
 #[inline]
 pub fn is_identifier_part_ascii(c: char) -> bool {
    ASCII_CONTINUE.0[c as usize]
 }
 #[inline]
 pub fn is_identifier_part_unicode(c: char) -> bool {
    is_id_continue_unicode(c) || c == ZWNJ || c == ZWJ
 }
 pub fn is_identifier_name(name: &str) -> bool {
    let mut chars = name.chars();
-    chars.next().is_some_and(is_identifier_start_all) && chars.all(is_identifier_part)
+    chars.next().is_some_and(is_identifier_start) && chars.all(is_identifier_part)
 }