refactor(parser): make is_identifier methods consistent

This commit is contained in:
overlookmotel 2024-01-22 14:28:05 +00:00 committed by Boshen
parent 27aaff2bef
commit bc7ea0bedb
2 changed files with 33 additions and 23 deletions

View file

@ -18,12 +18,10 @@ use oxc_allocator::{Allocator, String};
use oxc_ast::ast::RegExpFlags; use oxc_ast::ast::RegExpFlags;
use oxc_diagnostics::Error; use oxc_diagnostics::Error;
use oxc_span::{SourceType, Span}; use oxc_span::{SourceType, Span};
use oxc_syntax::{ use oxc_syntax::identifier::{
identifier::{ is_identifier_part, is_identifier_start, is_identifier_start_unicode,
is_identifier_part, is_identifier_start_all, is_irregular_line_terminator, is_irregular_line_terminator, is_irregular_whitespace, is_line_terminator, CR, FF, LF, LS, PS,
is_irregular_whitespace, is_line_terminator, CR, FF, LF, LS, PS, TAB, VT, TAB, VT,
},
unicode_id_start::is_id_start_unicode,
}; };
pub use self::{ pub use self::{
@ -392,7 +390,7 @@ impl<'a> Lexer<'a> {
fn unicode_char_handler(&mut self) -> Kind { fn unicode_char_handler(&mut self) -> Kind {
let c = self.current.chars.clone().next().unwrap(); let c = self.current.chars.clone().next().unwrap();
match c { match c {
c if is_id_start_unicode(c) => { c if is_identifier_start_unicode(c) => {
let mut builder = AutoCow::new(self); let mut builder = AutoCow::new(self);
let c = self.consume_char(); let c = self.consume_char();
builder.push_matching(c); builder.push_matching(c);
@ -571,7 +569,7 @@ impl<'a> Lexer<'a> {
let mut builder = AutoCow::new(self); let mut builder = AutoCow::new(self);
let start = self.offset(); let start = self.offset();
match self.current.chars.next() { match self.current.chars.next() {
Some(c) if is_identifier_start_all(c) => { Some(c) if is_identifier_start(c) => {
builder.push_matching(c); builder.push_matching(c);
} }
Some('\\') => { Some('\\') => {
@ -773,12 +771,12 @@ impl<'a> Lexer<'a> {
let offset = self.offset(); let offset = self.offset();
// The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit. // The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit.
let c = self.peek(); let c = self.peek();
if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start_all(ch)) { if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start(ch)) {
return kind; return kind;
} }
self.current.chars.next(); self.current.chars.next();
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
if is_identifier_start_all(c) { if is_identifier_start(c) {
self.current.chars.next(); self.current.chars.next();
} else { } else {
break; break;
@ -920,7 +918,7 @@ impl<'a> Lexer<'a> {
/// `JSXIdentifier` [no `WhiteSpace` or Comment here] - /// `JSXIdentifier` [no `WhiteSpace` or Comment here] -
fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind { fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind {
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
if c == '-' || is_identifier_start_all(c) { if c == '-' || is_identifier_start(c) {
self.current.chars.next(); self.current.chars.next();
while let Some(c) = self.peek() { while let Some(c) = self.peek() {
if is_identifier_part(c) { if is_identifier_part(c) {
@ -1049,11 +1047,8 @@ impl<'a> Lexer<'a> {
} }
}; };
let is_valid = if check_identifier_start { let is_valid =
is_identifier_start_all(ch) if check_identifier_start { is_identifier_start(ch) } else { is_identifier_part(ch) };
} else {
is_identifier_part(ch)
};
if !is_valid { if !is_valid {
self.error(diagnostics::InvalidCharacter(ch, self.current_offset())); self.error(diagnostics::InvalidCharacter(ch, self.current_offset()));

View file

@ -97,17 +97,22 @@ pub static ASCII_CONTINUE: Align64<[bool; 128]> = Align64([
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, // 7 XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, // 7
]); ]);
/// Section 12.7 Detect `IdentifierStartChar`
#[inline]
pub fn is_identifier_start(c: char) -> bool {
if c.is_ascii() {
return is_identifier_start_ascii(c);
}
is_identifier_start_unicode(c)
}
#[inline] #[inline]
pub fn is_identifier_start_ascii(c: char) -> bool { pub fn is_identifier_start_ascii(c: char) -> bool {
ASCII_START.0[c as usize] ASCII_START.0[c as usize]
} }
/// Section 12.7 Detect `IdentifierStartChar`
#[inline] #[inline]
pub fn is_identifier_start_all(c: char) -> bool { pub fn is_identifier_start_unicode(c: char) -> bool {
if c.is_ascii() {
return is_identifier_start_ascii(c);
}
is_id_start_unicode(c) is_id_start_unicode(c)
} }
@ -116,12 +121,22 @@ pub fn is_identifier_start_all(c: char) -> bool {
#[inline] #[inline]
pub fn is_identifier_part(c: char) -> bool { pub fn is_identifier_part(c: char) -> bool {
if c.is_ascii() { if c.is_ascii() {
return ASCII_CONTINUE.0[c as usize]; return is_identifier_part_ascii(c);
} }
is_identifier_part_unicode(c)
}
#[inline]
pub fn is_identifier_part_ascii(c: char) -> bool {
ASCII_CONTINUE.0[c as usize]
}
#[inline]
pub fn is_identifier_part_unicode(c: char) -> bool {
is_id_continue_unicode(c) || c == ZWNJ || c == ZWJ is_id_continue_unicode(c) || c == ZWNJ || c == ZWJ
} }
pub fn is_identifier_name(name: &str) -> bool { pub fn is_identifier_name(name: &str) -> bool {
let mut chars = name.chars(); let mut chars = name.chars();
chars.next().is_some_and(is_identifier_start_all) && chars.all(is_identifier_part) chars.next().is_some_and(is_identifier_start) && chars.all(is_identifier_part)
} }