mirror of
https://github.com/danbulant/oxc
synced 2026-05-25 12:51:57 +00:00
110 lines
3.4 KiB
Rust
110 lines
3.4 KiB
Rust
use unicode_id_start::{is_id_continue_unicode, is_id_start_unicode};
|
|
|
|
pub const EOF: char = '\0';
|
|
|
|
// 11.1 Unicode Format-Control Characters
|
|
|
|
/// U+200C ZERO WIDTH NON-JOINER, abbreviated in the spec as `<ZWNJ>`.
|
|
/// Specially permitted in identifiers.
|
|
pub const ZWNJ: char = '\u{200c}';
|
|
|
|
/// U+200D ZERO WIDTH JOINER, abbreviated as `<ZWJ>`.
|
|
/// Specially permitted in identifiers.
|
|
pub const ZWJ: char = '\u{200d}';
|
|
|
|
/// U+FEFF ZERO WIDTH NO-BREAK SPACE, abbreviated `<ZWNBSP>`.
|
|
/// Considered a whitespace character in JS.
|
|
pub const ZWNBSP: char = '\u{feff}';
|
|
|
|
// 11.2 White Space
|
|
/// U+0009 CHARACTER TABULATION, abbreviated `<TAB>`.
|
|
pub const TAB: char = '\u{9}';
|
|
|
|
/// U+000B VERTICAL TAB, abbreviated `<VT>`.
|
|
pub const VT: char = '\u{b}';
|
|
|
|
/// U+000C FORM FEED, abbreviated `<FF>`.
|
|
pub const FF: char = '\u{c}';
|
|
|
|
/// U+00A0 NON-BREAKING SPACE, abbreviated `<NBSP>`.
|
|
pub const NBSP: char = '\u{a0}';
|
|
|
|
pub fn is_irregular_whitespace(c: char) -> bool {
|
|
matches!(
|
|
c,
|
|
VT | FF | NBSP | ZWNBSP | '\u{85}' | '\u{1680}' | '\u{2000}'
|
|
..='\u{200a}' | '\u{202f}' | '\u{205f}' | '\u{3000}'
|
|
)
|
|
}
|
|
|
|
// 11.3 Line Terminators
|
|
|
|
/// U+000A LINE FEED, abbreviated in the spec as `<LF>`.
|
|
pub const LF: char = '\u{a}';
|
|
|
|
/// U+000D CARRIAGE RETURN, abbreviated in the spec as `<CR>`.
|
|
pub const CR: char = '\u{d}';
|
|
|
|
/// U+2028 LINE SEPARATOR, abbreviated `<LS>`.
|
|
pub const LS: char = '\u{2028}';
|
|
|
|
/// U+2029 PARAGRAPH SEPARATOR, abbreviated `<PS>`.
|
|
pub const PS: char = '\u{2029}';
|
|
|
|
pub fn is_regular_line_terminator(c: char) -> bool {
|
|
matches!(c, LF | CR)
|
|
}
|
|
|
|
pub fn is_irregular_line_terminator(c: char) -> bool {
|
|
matches!(c, LS | PS)
|
|
}
|
|
|
|
pub fn is_line_terminator(c: char) -> bool {
|
|
is_regular_line_terminator(c) || is_irregular_line_terminator(c)
|
|
}
|
|
|
|
const T: bool = true;
|
|
const F: bool = false;
|
|
|
|
#[repr(C, align(64))]
|
|
pub struct Align64<T>(pub(crate) T);
|
|
|
|
// This contains `$` (36) and `_` (95)
|
|
pub const ASCII_START: Align64<[bool; 128]> = Align64([
|
|
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
|
F, F, F, F, T, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
|
F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, T,
|
|
F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F,
|
|
]);
|
|
|
|
// This contains `$` (36)
|
|
pub const ASCII_CONTINUE: Align64<[bool; 128]> = Align64([
|
|
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
|
F, F, F, F, T, F, F, F, F, F, F, F, F, F, F, F, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F, F,
|
|
F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, T,
|
|
F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F,
|
|
]);
|
|
|
|
#[inline]
|
|
pub fn is_identifier_start_ascii(c: char) -> bool {
|
|
ASCII_START.0[c as usize]
|
|
}
|
|
|
|
/// Section 12.6 Detect `IdentifierStartChar`
|
|
#[inline]
|
|
pub fn is_identifier_start_all(c: char) -> bool {
|
|
if c.is_ascii() {
|
|
return is_identifier_start_ascii(c);
|
|
}
|
|
is_id_start_unicode(c)
|
|
}
|
|
|
|
/// Section 12.6 Detect `IdentifierPartChar`
|
|
/// NOTE 2: The nonterminal `IdentifierPart` derives _ via `UnicodeIDContinue`.
|
|
#[inline]
|
|
pub fn is_identifier_part(c: char) -> bool {
|
|
if c.is_ascii() {
|
|
return ASCII_CONTINUE.0[c as usize];
|
|
}
|
|
is_id_continue_unicode(c) || c == ZWNJ || c == ZWJ
|
|
}
|