feat(ast): add RegExpFlags bitflag for storing regex flags

This reduces `TokenValue` from 56 to 40 bytes, `Token` from 72 to 56 bytes.
This commit is contained in:
Boshen 2023-02-21 17:27:05 +08:00
parent a733856536
commit f3a7d5a026
4 changed files with 90 additions and 25 deletions

View file

@ -5,6 +5,7 @@ use std::{
hash::{Hash, Hasher},
};
use bitflags::bitflags;
use num_bigint::BigUint;
use ordered_float::NotNan;
use serde::{
@ -110,7 +111,7 @@ pub struct RegExpLiteral {
#[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash)]
pub struct RegExp {
pub pattern: Atom,
pub flags: Atom,
pub flags: RegExpFlags,
}
impl fmt::Display for RegExp {
@ -119,6 +120,59 @@ impl fmt::Display for RegExp {
}
}
bitflags! {
pub struct RegExpFlags: u8 {
const G = 1 << 0;
const I = 1 << 1;
const M = 1 << 2;
const S = 1 << 3;
const U = 1 << 4;
const Y = 1 << 5;
const D = 1 << 6;
/// v flag from `https://github.com/tc39/proposal-regexp-set-notation`
const V = 1 << 7;
}
}
impl fmt::Display for RegExpFlags {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.contains(Self::G) {
write!(f, "g")?;
}
if self.contains(Self::I) {
write!(f, "i")?;
}
if self.contains(Self::M) {
write!(f, "m")?;
}
if self.contains(Self::S) {
write!(f, "s")?;
}
if self.contains(Self::U) {
write!(f, "u")?;
}
if self.contains(Self::Y) {
write!(f, "y")?;
}
if self.contains(Self::D) {
write!(f, "d")?;
}
if self.contains(Self::V) {
write!(f, "v")?;
}
Ok(())
}
}
impl Serialize for RegExpFlags {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&self.to_string())
}
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash)]
pub struct EmptyObject {}

View file

@ -21,7 +21,7 @@ use constants::{
pub use kind::Kind;
use number::{parse_big_int, parse_float, parse_int};
use oxc_allocator::{Allocator, String};
use oxc_ast::{Atom, Node, SourceType};
use oxc_ast::{ast::RegExpFlags, Atom, Node, SourceType};
use oxc_diagnostics::{Diagnostic, Diagnostics};
use simd::{SkipMultilineComment, SkipWhitespace};
use string_builder::AutoCow;
@ -1010,35 +1010,39 @@ impl<'a> Lexer<'a> {
pattern.push_str(&start[..start.len() - self.current.chars.as_str().len() - 1]);
let mut flags = String::new_in(self.allocator);
while let c @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9') = self.peek() {
self.current.chars.next();
flags.push(c);
}
let mut flags = RegExpFlags::empty();
// v flag from https://github.com/tc39/proposal-regexp-set-notation
let gimsuy_mask: u32 =
['g', 'i', 'm', 's', 'u', 'y', 'd', 'v'].iter().map(|x| 1 << ((*x as u8) - b'a')).sum();
let mut flag_text_set: u32 = 0;
for ch in flags.chars() {
while let ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9') = self.peek() {
self.current.chars.next();
// dbg!(ch);
if !ch.is_ascii_lowercase() {
self.error(Diagnostic::RegExpFlag(ch, self.current_offset()));
continue;
}
let ch_mask = 1 << ((ch as u8) - b'a');
if ch_mask & gimsuy_mask == 0 {
self.error(Diagnostic::RegExpFlag(ch, self.current_offset()));
}
if flag_text_set & ch_mask != 0 {
let flag = match ch {
'g' => RegExpFlags::G,
'i' => RegExpFlags::I,
'm' => RegExpFlags::M,
's' => RegExpFlags::S,
'u' => RegExpFlags::U,
'y' => RegExpFlags::Y,
'd' => RegExpFlags::D,
'v' => RegExpFlags::V,
_ => {
self.error(Diagnostic::RegExpFlag(ch, self.current_offset()));
continue;
}
};
if flags.contains(flag) {
self.error(Diagnostic::RegExpFlagTwice(ch, self.current_offset()));
continue;
}
flag_text_set |= ch_mask;
flags |= flag;
}
self.current.token.value = TokenValue::RegExp(RegExp {
pattern: Atom::from(pattern.as_str()),
flags: Atom::from(flags.as_str()),
});
self.current.token.value =
TokenValue::RegExp(RegExp { pattern: Atom::from(pattern.as_str()), flags });
Kind::RegExp
}

View file

@ -1,7 +1,7 @@
//! Token
use num_bigint::BigUint;
use oxc_ast::{Atom, Node};
use oxc_ast::{ast::RegExpFlags, Atom, Node};
use super::kind::Kind;
@ -25,6 +25,13 @@ pub struct Token {
pub value: TokenValue,
}
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
#[test]
fn no_bloat_token() {
use std::mem::size_of;
assert_eq!(size_of::<Token>(), 56);
}
impl Token {
#[must_use]
pub const fn node(&self) -> Node {
@ -44,7 +51,7 @@ pub enum TokenValue {
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RegExp {
pub pattern: Atom,
pub flags: Atom,
pub flags: RegExpFlags,
}
impl Default for TokenValue {

View file

@ -841,7 +841,7 @@ impl Gen for RegExpLiteral {
p.print(b'/');
p.print_str(self.regex.pattern.as_bytes());
p.print(b'/');
p.print_str(self.regex.flags.as_bytes());
p.print_str(self.regex.flags.to_string().as_bytes());
}
}