From a347e3993e9c700c7d45db0281f3d20d4d50d357 Mon Sep 17 00:00:00 2001 From: Boshen Date: Fri, 17 Feb 2023 14:50:42 +0800 Subject: [PATCH] Revert "perf(lexer): use portable-SIMD to speed up multiline comment scanning" This reverts commit a51c7f9ba22b6c192b6de1f2a79447869ba7c65f. --- Cargo.lock | 1 - crates/oxc_parser/Cargo.toml | 1 - crates/oxc_parser/src/lexer/mod.rs | 23 ++---- crates/oxc_parser/src/lexer/simd.rs | 122 ---------------------------- crates/oxc_parser/src/lib.rs | 2 - 5 files changed, 7 insertions(+), 142 deletions(-) delete mode 100644 crates/oxc_parser/src/lexer/simd.rs diff --git a/Cargo.lock b/Cargo.lock index 15d0812d9..8c594a41b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -595,7 +595,6 @@ name = "oxc_parser" version = "0.0.0" dependencies = [ "bitflags", - "lazy_static", "num-bigint", "oxc_allocator", "oxc_ast", diff --git a/crates/oxc_parser/Cargo.toml b/crates/oxc_parser/Cargo.toml index b23c9506c..ea9da0b08 100644 --- a/crates/oxc_parser/Cargo.toml +++ b/crates/oxc_parser/Cargo.toml @@ -19,4 +19,3 @@ rustc-hash = { workspace = true } unicode-id-start = "1.0.3" num-bigint = "0.4.3" -lazy_static = "1.4.0" diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 123973545..cfd80a52f 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -8,7 +8,6 @@ mod constants; mod kind; mod number; -mod simd; mod string_builder; mod token; @@ -23,7 +22,6 @@ use number::{parse_big_int, parse_float, parse_int}; use oxc_allocator::{Allocator, String}; use oxc_ast::{Atom, SourceType, Span}; use oxc_diagnostics::{Diagnostic, Diagnostics}; -use simd::MultiLineComment; use string_builder::AutoCow; pub use token::{RegExp, Token, TokenValue}; @@ -468,21 +466,14 @@ impl<'a> Lexer<'a> { /// Section 12.4 Multi Line Comment #[must_use] fn skip_multi_line_comment(&mut self) -> Kind { - let remaining = self.remaining().as_bytes(); - let state = MultiLineComment::new(remaining).simd(remaining); - - // SAFETY: offset is computed to the boundary - self.current.chars = - unsafe { std::str::from_utf8_unchecked(&remaining[state.offset..]) }.chars(); - - if state.newline { - self.current.token.is_on_new_line = state.newline; + while let Some(c) = self.current.chars.next() { + if c == '*' && self.next_eq('/') { + return Kind::MultiLineComment; + } + if is_line_terminator(c) { + self.current.token.is_on_new_line = true; + } } - - if state.found { - return Kind::MultiLineComment; - } - self.error(Diagnostic::UnterminatedMultiLineComment(self.unterminated_range())); Kind::Eof } diff --git a/crates/oxc_parser/src/lexer/simd.rs b/crates/oxc_parser/src/lexer/simd.rs deleted file mode 100644 index 452f0d965..000000000 --- a/crates/oxc_parser/src/lexer/simd.rs +++ /dev/null @@ -1,122 +0,0 @@ -//! Lexer methods using portable-SIMD -//! See: -//! * -//! * -//! * - -use std::simd::{Simd, SimdPartialEq, ToBitMask}; - -use lazy_static::lazy_static; - -const ELEMENTS: usize = 16; -type SimdVec = Simd; - -lazy_static! { - static ref STAR: SimdVec = SimdVec::splat(b'*'); - static ref SLASH: SimdVec = SimdVec::splat(b'/'); - static ref LF: SimdVec = SimdVec::splat(b'\n'); - static ref CR: SimdVec = SimdVec::splat(b'\r'); - static ref LSPS: SimdVec = SimdVec::splat(226); -} - -#[derive(Debug)] -pub struct MultiLineComment<'a> { - /// Total offset - pub offset: usize, - - /// Found multiline comment end '*/'? - pub found: bool, - - /// Found newline inside the comment? - pub newline: bool, - - /// Does the previous chunk has a '*' at the end? - /// For checking against the first '/' on the current chunk. - previous_star_at_end: bool, - - /// Remaining char bytes from the lexer - remaining: &'a [u8], -} - -impl<'a> MultiLineComment<'a> { - pub const fn new(remaining: &'a [u8]) -> Self { - Self { offset: 0, found: false, newline: false, previous_star_at_end: false, remaining } - } - - pub fn simd(mut self, remaining: &[u8]) -> Self { - let (chunks, remainder) = remaining.as_chunks::(); - - for chunk in chunks { - self.check(chunk, chunk.len()); - if self.found { - return self; - } - } - - if !remainder.is_empty() { - // Align the last chunk for avoiding the use of a scalar version - let mut chunk = [0; ELEMENTS]; - let len = remainder.len(); - chunk[..len].copy_from_slice(remainder); - self.check(&chunk, len); - } - - self - } - - /// Check and compute state for a single chunk - /// `chunk_len` can be < ELEMENTS for the last chunk - fn check(&mut self, chunk: &[u8], chunk_len: usize) { - let s = SimdVec::from_slice(chunk); - - let any_star = s.simd_eq(*STAR); - let any_slash = s.simd_eq(*SLASH); - let star_mask = any_star.to_bitmask(); - let slash_mask = any_slash.to_bitmask(); - - // Get the offset of '/' if '*' is immediately followed by '/' - let star_slash_mask = (star_mask << 1) & slash_mask; - let star_slash_pos = star_slash_mask.trailing_zeros(); - - let offset_total = if star_slash_mask > 0 { - self.found = true; - star_slash_pos as usize + 1 - } else if self.previous_star_at_end && slash_mask & 1 > 0 { - // at boundary - self.found = true; - 1 - } else { - // Is '*' at the end? - self.previous_star_at_end = star_mask & 1 << (ELEMENTS - 1) > 0; - chunk_len - }; - - // Look for '\n' and '\r' - if !self.newline { - let any_newline = s.simd_eq(*LF) | s.simd_eq(*CR); - let newline_mask = any_newline.to_bitmask(); - self.newline = newline_mask.trailing_zeros() < star_slash_pos; - // Look for LS '\u{2028}' [226, 128, 168] and PS '\u{2029}' [226, 128, 169] - if !self.newline { - let lspf_mask = s.simd_eq(*LSPS).to_bitmask(); - if lspf_mask > 0 { - let offset_by = lspf_mask.trailing_zeros(); - if offset_by < star_slash_pos { - let second = self.offset + offset_by as usize + 1; - // Using scalar version `.get` instead of simd - // to avoid checking on the next chunk - // because this may be on the chunk boundary - if self.remaining.get(second) == Some(&128) { - let third = self.remaining.get(second + 1); - if matches!(third, Some(&168 | &169)) { - self.newline = true; - } - } - } - } - } - } - - self.offset += offset_total; - } -} diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs index d0459cb0a..e52e5091d 100644 --- a/crates/oxc_parser/src/lib.rs +++ b/crates/oxc_parser/src/lib.rs @@ -1,8 +1,6 @@ //! Recursive Descent Parser for ECMAScript and TypeScript #![allow(clippy::wildcard_imports)] // allow for use `oxc_ast::ast::*` -#![feature(portable_simd)] -#![feature(slice_as_chunks)] mod cursor; mod list;