mirror of
https://github.com/danbulant/oxc
synced 2026-05-24 12:21:58 +00:00
feat(ast): add RegExpFlags bitflag for storing regex flags
This reduces `TokenValue` from 56 to 40 bytes, `Token` from 72 to 56 bytes.
This commit is contained in:
parent
a733856536
commit
f3a7d5a026
4 changed files with 90 additions and 25 deletions
|
|
@ -5,6 +5,7 @@ use std::{
|
|||
hash::{Hash, Hasher},
|
||||
};
|
||||
|
||||
use bitflags::bitflags;
|
||||
use num_bigint::BigUint;
|
||||
use ordered_float::NotNan;
|
||||
use serde::{
|
||||
|
|
@ -110,7 +111,7 @@ pub struct RegExpLiteral {
|
|||
#[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash)]
|
||||
pub struct RegExp {
|
||||
pub pattern: Atom,
|
||||
pub flags: Atom,
|
||||
pub flags: RegExpFlags,
|
||||
}
|
||||
|
||||
impl fmt::Display for RegExp {
|
||||
|
|
@ -119,6 +120,59 @@ impl fmt::Display for RegExp {
|
|||
}
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
pub struct RegExpFlags: u8 {
|
||||
const G = 1 << 0;
|
||||
const I = 1 << 1;
|
||||
const M = 1 << 2;
|
||||
const S = 1 << 3;
|
||||
const U = 1 << 4;
|
||||
const Y = 1 << 5;
|
||||
const D = 1 << 6;
|
||||
/// v flag from `https://github.com/tc39/proposal-regexp-set-notation`
|
||||
const V = 1 << 7;
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RegExpFlags {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if self.contains(Self::G) {
|
||||
write!(f, "g")?;
|
||||
}
|
||||
if self.contains(Self::I) {
|
||||
write!(f, "i")?;
|
||||
}
|
||||
if self.contains(Self::M) {
|
||||
write!(f, "m")?;
|
||||
}
|
||||
if self.contains(Self::S) {
|
||||
write!(f, "s")?;
|
||||
}
|
||||
if self.contains(Self::U) {
|
||||
write!(f, "u")?;
|
||||
}
|
||||
if self.contains(Self::Y) {
|
||||
write!(f, "y")?;
|
||||
}
|
||||
if self.contains(Self::D) {
|
||||
write!(f, "d")?;
|
||||
}
|
||||
if self.contains(Self::V) {
|
||||
write!(f, "v")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for RegExpFlags {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
serializer.serialize_str(&self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash)]
|
||||
pub struct EmptyObject {}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ use constants::{
|
|||
pub use kind::Kind;
|
||||
use number::{parse_big_int, parse_float, parse_int};
|
||||
use oxc_allocator::{Allocator, String};
|
||||
use oxc_ast::{Atom, Node, SourceType};
|
||||
use oxc_ast::{ast::RegExpFlags, Atom, Node, SourceType};
|
||||
use oxc_diagnostics::{Diagnostic, Diagnostics};
|
||||
use simd::{SkipMultilineComment, SkipWhitespace};
|
||||
use string_builder::AutoCow;
|
||||
|
|
@ -1010,35 +1010,39 @@ impl<'a> Lexer<'a> {
|
|||
|
||||
pattern.push_str(&start[..start.len() - self.current.chars.as_str().len() - 1]);
|
||||
|
||||
let mut flags = String::new_in(self.allocator);
|
||||
while let c @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9') = self.peek() {
|
||||
self.current.chars.next();
|
||||
flags.push(c);
|
||||
}
|
||||
let mut flags = RegExpFlags::empty();
|
||||
|
||||
// v flag from https://github.com/tc39/proposal-regexp-set-notation
|
||||
let gimsuy_mask: u32 =
|
||||
['g', 'i', 'm', 's', 'u', 'y', 'd', 'v'].iter().map(|x| 1 << ((*x as u8) - b'a')).sum();
|
||||
let mut flag_text_set: u32 = 0;
|
||||
for ch in flags.chars() {
|
||||
while let ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9') = self.peek() {
|
||||
self.current.chars.next();
|
||||
// dbg!(ch);
|
||||
if !ch.is_ascii_lowercase() {
|
||||
self.error(Diagnostic::RegExpFlag(ch, self.current_offset()));
|
||||
continue;
|
||||
}
|
||||
let ch_mask = 1 << ((ch as u8) - b'a');
|
||||
if ch_mask & gimsuy_mask == 0 {
|
||||
self.error(Diagnostic::RegExpFlag(ch, self.current_offset()));
|
||||
}
|
||||
if flag_text_set & ch_mask != 0 {
|
||||
let flag = match ch {
|
||||
'g' => RegExpFlags::G,
|
||||
'i' => RegExpFlags::I,
|
||||
'm' => RegExpFlags::M,
|
||||
's' => RegExpFlags::S,
|
||||
'u' => RegExpFlags::U,
|
||||
'y' => RegExpFlags::Y,
|
||||
'd' => RegExpFlags::D,
|
||||
'v' => RegExpFlags::V,
|
||||
_ => {
|
||||
self.error(Diagnostic::RegExpFlag(ch, self.current_offset()));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if flags.contains(flag) {
|
||||
self.error(Diagnostic::RegExpFlagTwice(ch, self.current_offset()));
|
||||
continue;
|
||||
}
|
||||
flag_text_set |= ch_mask;
|
||||
flags |= flag;
|
||||
}
|
||||
|
||||
self.current.token.value = TokenValue::RegExp(RegExp {
|
||||
pattern: Atom::from(pattern.as_str()),
|
||||
flags: Atom::from(flags.as_str()),
|
||||
});
|
||||
self.current.token.value =
|
||||
TokenValue::RegExp(RegExp { pattern: Atom::from(pattern.as_str()), flags });
|
||||
|
||||
Kind::RegExp
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
//! Token
|
||||
|
||||
use num_bigint::BigUint;
|
||||
use oxc_ast::{Atom, Node};
|
||||
use oxc_ast::{ast::RegExpFlags, Atom, Node};
|
||||
|
||||
use super::kind::Kind;
|
||||
|
||||
|
|
@ -25,6 +25,13 @@ pub struct Token {
|
|||
pub value: TokenValue,
|
||||
}
|
||||
|
||||
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
|
||||
#[test]
|
||||
fn no_bloat_token() {
|
||||
use std::mem::size_of;
|
||||
assert_eq!(size_of::<Token>(), 56);
|
||||
}
|
||||
|
||||
impl Token {
|
||||
#[must_use]
|
||||
pub const fn node(&self) -> Node {
|
||||
|
|
@ -44,7 +51,7 @@ pub enum TokenValue {
|
|||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct RegExp {
|
||||
pub pattern: Atom,
|
||||
pub flags: Atom,
|
||||
pub flags: RegExpFlags,
|
||||
}
|
||||
|
||||
impl Default for TokenValue {
|
||||
|
|
|
|||
|
|
@ -841,7 +841,7 @@ impl Gen for RegExpLiteral {
|
|||
p.print(b'/');
|
||||
p.print_str(self.regex.pattern.as_bytes());
|
||||
p.print(b'/');
|
||||
p.print_str(self.regex.flags.as_bytes());
|
||||
p.print_str(self.regex.flags.to_string().as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue