From 67225a8091c9cb3370e4f35047d331a4ed99f73f Mon Sep 17 00:00:00 2001 From: Boshen Date: Tue, 30 Apr 2024 22:49:39 +0800 Subject: [PATCH] refactor(coverage): replace yaml parsing with saphyr (#3144) closes #3128 Profiling `cargo run -p oxc_coverage -- parser` no longer shows a bottleneck in yaml parsing --- Cargo.lock | 94 +++++++++++++++++----- Cargo.toml | 2 +- tasks/coverage/Cargo.toml | 25 +++--- tasks/coverage/src/lib.rs | 1 + tasks/coverage/src/test262.rs | 78 ++---------------- tasks/coverage/src/test262_meta.rs | 125 +++++++++++++++++++++++++++++ 6 files changed, 220 insertions(+), 105 deletions(-) create mode 100644 tasks/coverage/src/test262_meta.rs diff --git a/Cargo.lock b/Cargo.lock index 1220efb27..80a94690c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,18 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -44,6 +56,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "735d4f398ca57cfa2880225c2bf81c3b9af3be5bb22e44ae70118dad38713e84" +[[package]] +name = "arraydeque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" + [[package]] name = "assert-unchecked" version = "0.1.2" @@ -691,6 +709,19 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown", +] [[package]] name = "heck" @@ -1308,9 +1339,9 @@ dependencies = [ "project-root", "rayon", "regex", + "saphyr", "serde", "serde_json", - "serde_yaml", "similar", "tokio", "walkdir", @@ -2092,6 +2123,28 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "saphyr" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80de2fc4848ba3cdd64b7127f2ffc420b8596bb10e80261a3895526c8c97f2e3" +dependencies = [ + "arraydeque", + "encoding_rs", + "hashlink", + "saphyr-parser", +] + +[[package]] +name = "saphyr-parser" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "886b4bb040ecd2944f54c3543e612c336396e3eba700c5063d8bad5f40bac3d7" +dependencies = [ + "arraydeque", + "hashlink", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2175,19 +2228,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_yaml" -version = "0.9.34+deprecated" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" -dependencies = [ - "indexmap", - "itoa", - "ryu", - "serde", - "unsafe-libyaml", -] - [[package]] name = "sha2" version = "0.10.8" @@ -2651,12 +2691,6 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" -[[package]] -name = "unsafe-libyaml" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" - [[package]] name = "untrusted" version = "0.9.0" @@ -2976,6 +3010,26 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zeroize" version = "1.7.0" diff --git a/Cargo.toml b/Cargo.toml index 9992ca687..771e56021 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -162,10 +162,10 @@ owo-colors = "4.0.0" oxc_resolver = "1.7.0" petgraph = "0.6.4" rust-lapper = "1.1.0" -serde_yaml = "0.9.34" similar = "2.5.0" textwrap = "0.16.0" unicode-width = "0.1.12" +saphyr = "0.0.1" [workspace.metadata.cargo-shear] ignored = ["napi"] diff --git a/tasks/coverage/Cargo.toml b/tasks/coverage/Cargo.toml index 4021bb3c7..e54bc4d88 100644 --- a/tasks/coverage/Cargo.toml +++ b/tasks/coverage/Cargo.toml @@ -34,20 +34,19 @@ oxc_tasks_common = { workspace = true } oxc_sourcemap = { workspace = true } oxc_transformer = { workspace = true } -serde = { workspace = true, features = ["derive"] } -serde_json = { workspace = true } -rayon = { workspace = true } -project-root = { workspace = true } -pico-args = { workspace = true } -lazy_static = { workspace = true } -walkdir = { workspace = true } -regex = { workspace = true } -phf = { workspace = true, features = ["macros"] } -futures = { workspace = true } -tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } - +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +rayon = { workspace = true } +project-root = { workspace = true } +pico-args = { workspace = true } +lazy_static = { workspace = true } +walkdir = { workspace = true } +regex = { workspace = true } +phf = { workspace = true, features = ["macros"] } +futures = { workspace = true } +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } +saphyr = { workspace = true } console = { workspace = true } encoding_rs = { workspace = true } encoding_rs_io = { workspace = true } -serde_yaml = { workspace = true } similar = { workspace = true } diff --git a/tasks/coverage/src/lib.rs b/tasks/coverage/src/lib.rs index 5ff01a152..e5e5a142c 100644 --- a/tasks/coverage/src/lib.rs +++ b/tasks/coverage/src/lib.rs @@ -5,6 +5,7 @@ mod suite; mod babel; mod misc; mod test262; +mod test262_meta; mod typescript; // Tools mod codegen; diff --git a/tasks/coverage/src/test262.rs b/tasks/coverage/src/test262.rs index 90f97ce7d..1d584313b 100644 --- a/tasks/coverage/src/test262.rs +++ b/tasks/coverage/src/test262.rs @@ -1,79 +1,16 @@ -use std::{ - io, - path::{Path, PathBuf}, -}; +use std::path::{Path, PathBuf}; use oxc_span::SourceType; -use serde::Deserialize; use crate::{ project_root, suite::{Case, Suite, TestResult}, }; +pub use crate::test262_meta::{MetaData, Phase, TestFlag}; + const FIXTURES_PATH: &str = "tasks/coverage/test262/test"; -#[derive(Debug, Clone, Deserialize, Default)] -pub struct MetaData { - pub description: Box, - pub esid: Option>, - pub es5id: Option>, - pub es6id: Option>, - #[serde(default)] - pub info: Box, - #[serde(default)] - pub features: Box<[Box]>, - #[serde(default)] - pub includes: Box<[Box]>, - #[serde(default)] - pub flags: Box<[TestFlag]>, - #[serde(default)] - pub negative: Option, - #[serde(default)] - pub locale: Box<[Box]>, -} - -/// Individual test flag. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)] -#[serde(rename_all = "camelCase")] -pub enum TestFlag { - OnlyStrict, - NoStrict, - Module, - Raw, - Async, - Generated, - #[serde(rename = "CanBlockIsFalse")] - CanBlockIsFalse, - #[serde(rename = "CanBlockIsTrue")] - CanBlockIsTrue, - #[serde(rename = "non-deterministic")] - NonDeterministic, -} - -/// Negative test information structure. -#[derive(Debug, Clone, Deserialize)] -pub struct Negative { - pub phase: Phase, - #[serde(rename = "type")] - pub error_type: Box, -} - -#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)] -#[serde(rename_all = "lowercase")] -pub enum Phase { - Parse, - Early, - Resolution, - Runtime, -} - -impl Phase { - pub fn is_runtime(self) -> bool { - matches!(self, Self::Runtime) - } -} - pub struct Test262Suite { test_root: PathBuf, test_cases: Vec, @@ -125,12 +62,11 @@ impl Test262Case { &self.meta } - /// # Errors /// # Panics - pub fn read_metadata(code: &str) -> io::Result { + pub fn read_metadata(code: &str) -> MetaData { let (start, end) = (code.find("/*---").unwrap(), code.find("---*/").unwrap()); - let yaml = &code[start + 5..end].replace('\r', "\n"); - serde_yaml::from_str(yaml).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) + let s = &code[start + 5..end].replace('\r', "\n"); + MetaData::from_str(s) } pub fn set_result(&mut self, result: TestResult) { @@ -144,7 +80,7 @@ impl Test262Case { impl Case for Test262Case { fn new(path: PathBuf, code: String) -> Self { - let meta = Self::read_metadata(&code).expect("read test262 yaml meta"); + let meta = Self::read_metadata(&code); let should_fail = Self::compute_should_fail(&meta); Self { path, code, meta, should_fail, result: TestResult::ToBeRun } } diff --git a/tasks/coverage/src/test262_meta.rs b/tasks/coverage/src/test262_meta.rs new file mode 100644 index 000000000..c662de0f3 --- /dev/null +++ b/tasks/coverage/src/test262_meta.rs @@ -0,0 +1,125 @@ +use saphyr::{Yaml, YamlLoader}; + +#[derive(Debug, Clone, Default)] +pub struct MetaData { + pub description: Box, + pub esid: Option>, + pub es5id: Option>, + pub es6id: Option>, + pub info: Box, + pub features: Box<[Box]>, + pub includes: Box<[Box]>, + pub flags: Box<[TestFlag]>, + pub negative: Option, + pub locale: Box<[Box]>, +} + +/// Individual test flag. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TestFlag { + OnlyStrict, + NoStrict, + Module, + Raw, + Async, + Generated, + CanBlockIsFalse, + CanBlockIsTrue, + NonDeterministic, +} + +impl TestFlag { + fn from_str(s: &str) -> Self { + match s { + "onlyStrict" => Self::OnlyStrict, + "noStrict" => Self::NoStrict, + "module" => Self::Module, + "raw" => Self::Raw, + "async" => Self::Async, + "generated" => Self::Generated, + "CanBlockIsFalse" => Self::CanBlockIsFalse, + "CanBlockIsTrue" => Self::CanBlockIsTrue, + "non-deterministic" => Self::NonDeterministic, + _ => panic!("{s} not supported for TestFlag"), + } + } +} + +/// Negative test information structure. +#[derive(Debug, Clone)] +pub struct Negative { + pub phase: Phase, + pub error_type: Box, +} + +impl Negative { + fn from_yaml(yaml: &Yaml) -> Self { + Self { + phase: Phase::from_str(yaml["phase"].as_str().unwrap()), + error_type: yaml["type"].as_str().unwrap().into(), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Phase { + Parse, + Early, + Resolution, + Runtime, +} + +impl Phase { + pub fn is_runtime(self) -> bool { + matches!(self, Self::Runtime) + } + + fn from_str(s: &str) -> Self { + match s { + "parse" => Self::Parse, + "early" => Self::Early, + "resolution" => Self::Resolution, + "runtime" => Self::Runtime, + _ => panic!("{s} not support for Phase"), + } + } +} + +impl MetaData { + pub fn from_str(s: &str) -> Self { + let yamls = YamlLoader::load_from_str(s).unwrap_or_default(); + let Some(yaml) = yamls.first() else { return Self::default() }; + Self { + description: yaml["description"].as_str().unwrap_or_default().into(), + esid: yaml["esid"].as_str().map(Into::into), + es5id: yaml["es5id"].as_str().map(Into::into), + es6id: yaml["es6id"].as_str().map(Into::into), + info: yaml["info"].as_str().unwrap_or_default().into(), + features: Self::get_vec_of_string(&yaml["features"]), + includes: Self::get_vec_of_string(&yaml["includes"]), + flags: yaml["flags"] + .as_vec() + .map_or_else(Vec::new, |a| { + a.iter() + .map(|v| v.as_str().map(TestFlag::from_str).unwrap()) + .collect::>() + }) + .into(), + negative: { + let yaml = &yaml["negative"]; + (!yaml.is_null() && !yaml.is_badvalue()).then(|| Negative::from_yaml(yaml)) + }, + locale: Self::get_vec_of_string(&yaml["locale"]), + } + } + + fn get_vec_of_string(yaml: &Yaml) -> Box<[Box]> { + yaml.as_vec() + .map_or_else(Vec::new, |a| { + a.iter() + .map(|v| v.as_str().unwrap_or_default().to_string().into_boxed_str()) + .collect::>() + }) + .into() + } +}