mirror of
https://github.com/danbulant/oxc
synced 2026-05-21 21:29:01 +00:00
refactor(parser): make is_identifier methods consistent
This commit is contained in:
parent
27aaff2bef
commit
bc7ea0bedb
2 changed files with 33 additions and 23 deletions
|
|
@ -18,12 +18,10 @@ use oxc_allocator::{Allocator, String};
|
||||||
use oxc_ast::ast::RegExpFlags;
|
use oxc_ast::ast::RegExpFlags;
|
||||||
use oxc_diagnostics::Error;
|
use oxc_diagnostics::Error;
|
||||||
use oxc_span::{SourceType, Span};
|
use oxc_span::{SourceType, Span};
|
||||||
use oxc_syntax::{
|
use oxc_syntax::identifier::{
|
||||||
identifier::{
|
is_identifier_part, is_identifier_start, is_identifier_start_unicode,
|
||||||
is_identifier_part, is_identifier_start_all, is_irregular_line_terminator,
|
is_irregular_line_terminator, is_irregular_whitespace, is_line_terminator, CR, FF, LF, LS, PS,
|
||||||
is_irregular_whitespace, is_line_terminator, CR, FF, LF, LS, PS, TAB, VT,
|
TAB, VT,
|
||||||
},
|
|
||||||
unicode_id_start::is_id_start_unicode,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use self::{
|
pub use self::{
|
||||||
|
|
@ -392,7 +390,7 @@ impl<'a> Lexer<'a> {
|
||||||
fn unicode_char_handler(&mut self) -> Kind {
|
fn unicode_char_handler(&mut self) -> Kind {
|
||||||
let c = self.current.chars.clone().next().unwrap();
|
let c = self.current.chars.clone().next().unwrap();
|
||||||
match c {
|
match c {
|
||||||
c if is_id_start_unicode(c) => {
|
c if is_identifier_start_unicode(c) => {
|
||||||
let mut builder = AutoCow::new(self);
|
let mut builder = AutoCow::new(self);
|
||||||
let c = self.consume_char();
|
let c = self.consume_char();
|
||||||
builder.push_matching(c);
|
builder.push_matching(c);
|
||||||
|
|
@ -571,7 +569,7 @@ impl<'a> Lexer<'a> {
|
||||||
let mut builder = AutoCow::new(self);
|
let mut builder = AutoCow::new(self);
|
||||||
let start = self.offset();
|
let start = self.offset();
|
||||||
match self.current.chars.next() {
|
match self.current.chars.next() {
|
||||||
Some(c) if is_identifier_start_all(c) => {
|
Some(c) if is_identifier_start(c) => {
|
||||||
builder.push_matching(c);
|
builder.push_matching(c);
|
||||||
}
|
}
|
||||||
Some('\\') => {
|
Some('\\') => {
|
||||||
|
|
@ -773,12 +771,12 @@ impl<'a> Lexer<'a> {
|
||||||
let offset = self.offset();
|
let offset = self.offset();
|
||||||
// The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit.
|
// The SourceCharacter immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit.
|
||||||
let c = self.peek();
|
let c = self.peek();
|
||||||
if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start_all(ch)) {
|
if c.is_none() || c.is_some_and(|ch| !ch.is_ascii_digit() && !is_identifier_start(ch)) {
|
||||||
return kind;
|
return kind;
|
||||||
}
|
}
|
||||||
self.current.chars.next();
|
self.current.chars.next();
|
||||||
while let Some(c) = self.peek() {
|
while let Some(c) = self.peek() {
|
||||||
if is_identifier_start_all(c) {
|
if is_identifier_start(c) {
|
||||||
self.current.chars.next();
|
self.current.chars.next();
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
|
|
@ -920,7 +918,7 @@ impl<'a> Lexer<'a> {
|
||||||
/// `JSXIdentifier` [no `WhiteSpace` or Comment here] -
|
/// `JSXIdentifier` [no `WhiteSpace` or Comment here] -
|
||||||
fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind {
|
fn read_jsx_identifier(&mut self, _start_offset: u32) -> Kind {
|
||||||
while let Some(c) = self.peek() {
|
while let Some(c) = self.peek() {
|
||||||
if c == '-' || is_identifier_start_all(c) {
|
if c == '-' || is_identifier_start(c) {
|
||||||
self.current.chars.next();
|
self.current.chars.next();
|
||||||
while let Some(c) = self.peek() {
|
while let Some(c) = self.peek() {
|
||||||
if is_identifier_part(c) {
|
if is_identifier_part(c) {
|
||||||
|
|
@ -1049,11 +1047,8 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let is_valid = if check_identifier_start {
|
let is_valid =
|
||||||
is_identifier_start_all(ch)
|
if check_identifier_start { is_identifier_start(ch) } else { is_identifier_part(ch) };
|
||||||
} else {
|
|
||||||
is_identifier_part(ch)
|
|
||||||
};
|
|
||||||
|
|
||||||
if !is_valid {
|
if !is_valid {
|
||||||
self.error(diagnostics::InvalidCharacter(ch, self.current_offset()));
|
self.error(diagnostics::InvalidCharacter(ch, self.current_offset()));
|
||||||
|
|
|
||||||
|
|
@ -97,17 +97,22 @@ pub static ASCII_CONTINUE: Align64<[bool; 128]> = Align64([
|
||||||
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, // 7
|
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, // 7
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
/// Section 12.7 Detect `IdentifierStartChar`
|
||||||
|
#[inline]
|
||||||
|
pub fn is_identifier_start(c: char) -> bool {
|
||||||
|
if c.is_ascii() {
|
||||||
|
return is_identifier_start_ascii(c);
|
||||||
|
}
|
||||||
|
is_identifier_start_unicode(c)
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_identifier_start_ascii(c: char) -> bool {
|
pub fn is_identifier_start_ascii(c: char) -> bool {
|
||||||
ASCII_START.0[c as usize]
|
ASCII_START.0[c as usize]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Section 12.7 Detect `IdentifierStartChar`
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_identifier_start_all(c: char) -> bool {
|
pub fn is_identifier_start_unicode(c: char) -> bool {
|
||||||
if c.is_ascii() {
|
|
||||||
return is_identifier_start_ascii(c);
|
|
||||||
}
|
|
||||||
is_id_start_unicode(c)
|
is_id_start_unicode(c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -116,12 +121,22 @@ pub fn is_identifier_start_all(c: char) -> bool {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_identifier_part(c: char) -> bool {
|
pub fn is_identifier_part(c: char) -> bool {
|
||||||
if c.is_ascii() {
|
if c.is_ascii() {
|
||||||
return ASCII_CONTINUE.0[c as usize];
|
return is_identifier_part_ascii(c);
|
||||||
}
|
}
|
||||||
|
is_identifier_part_unicode(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn is_identifier_part_ascii(c: char) -> bool {
|
||||||
|
ASCII_CONTINUE.0[c as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn is_identifier_part_unicode(c: char) -> bool {
|
||||||
is_id_continue_unicode(c) || c == ZWNJ || c == ZWJ
|
is_id_continue_unicode(c) || c == ZWNJ || c == ZWJ
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_identifier_name(name: &str) -> bool {
|
pub fn is_identifier_name(name: &str) -> bool {
|
||||||
let mut chars = name.chars();
|
let mut chars = name.chars();
|
||||||
chars.next().is_some_and(is_identifier_start_all) && chars.all(is_identifier_part)
|
chars.next().is_some_and(is_identifier_start) && chars.all(is_identifier_part)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue