refactor(coverage): replace yaml parsing with saphyr (#3144)

closes #3128

Profiling `cargo run -p oxc_coverage -- parser` no longer shows a
bottleneck in yaml parsing
This commit is contained in:
Boshen 2024-04-30 22:49:39 +08:00 committed by GitHub
parent be8fabedab
commit 67225a8091
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 220 additions and 105 deletions

94
Cargo.lock generated
View file

@ -17,6 +17,18 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
@ -44,6 +56,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "735d4f398ca57cfa2880225c2bf81c3b9af3be5bb22e44ae70118dad38713e84"
[[package]]
name = "arraydeque"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236"
[[package]]
name = "assert-unchecked"
version = "0.1.2"
@ -691,6 +709,19 @@ name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
name = "hashlink"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
dependencies = [
"hashbrown",
]
[[package]]
name = "heck"
@ -1308,9 +1339,9 @@ dependencies = [
"project-root",
"rayon",
"regex",
"saphyr",
"serde",
"serde_json",
"serde_yaml",
"similar",
"tokio",
"walkdir",
@ -2092,6 +2123,28 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "saphyr"
version = "0.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80de2fc4848ba3cdd64b7127f2ffc420b8596bb10e80261a3895526c8c97f2e3"
dependencies = [
"arraydeque",
"encoding_rs",
"hashlink",
"saphyr-parser",
]
[[package]]
name = "saphyr-parser"
version = "0.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "886b4bb040ecd2944f54c3543e612c336396e3eba700c5063d8bad5f40bac3d7"
dependencies = [
"arraydeque",
"hashlink",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
@ -2175,19 +2228,6 @@ dependencies = [
"syn",
]
[[package]]
name = "serde_yaml"
version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "sha2"
version = "0.10.8"
@ -2651,12 +2691,6 @@ version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6"
[[package]]
name = "unsafe-libyaml"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
[[package]]
name = "untrusted"
version = "0.9.0"
@ -2976,6 +3010,26 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]]
name = "zerocopy"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "zeroize"
version = "1.7.0"

View file

@ -162,10 +162,10 @@ owo-colors = "4.0.0"
oxc_resolver = "1.7.0"
petgraph = "0.6.4"
rust-lapper = "1.1.0"
serde_yaml = "0.9.34"
similar = "2.5.0"
textwrap = "0.16.0"
unicode-width = "0.1.12"
saphyr = "0.0.1"
[workspace.metadata.cargo-shear]
ignored = ["napi"]

View file

@ -34,20 +34,19 @@ oxc_tasks_common = { workspace = true }
oxc_sourcemap = { workspace = true }
oxc_transformer = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
rayon = { workspace = true }
project-root = { workspace = true }
pico-args = { workspace = true }
lazy_static = { workspace = true }
walkdir = { workspace = true }
regex = { workspace = true }
phf = { workspace = true, features = ["macros"] }
futures = { workspace = true }
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
rayon = { workspace = true }
project-root = { workspace = true }
pico-args = { workspace = true }
lazy_static = { workspace = true }
walkdir = { workspace = true }
regex = { workspace = true }
phf = { workspace = true, features = ["macros"] }
futures = { workspace = true }
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
saphyr = { workspace = true }
console = { workspace = true }
encoding_rs = { workspace = true }
encoding_rs_io = { workspace = true }
serde_yaml = { workspace = true }
similar = { workspace = true }

View file

@ -5,6 +5,7 @@ mod suite;
mod babel;
mod misc;
mod test262;
mod test262_meta;
mod typescript;
// Tools
mod codegen;

View file

@ -1,79 +1,16 @@
use std::{
io,
path::{Path, PathBuf},
};
use std::path::{Path, PathBuf};
use oxc_span::SourceType;
use serde::Deserialize;
use crate::{
project_root,
suite::{Case, Suite, TestResult},
};
pub use crate::test262_meta::{MetaData, Phase, TestFlag};
const FIXTURES_PATH: &str = "tasks/coverage/test262/test";
#[derive(Debug, Clone, Deserialize, Default)]
pub struct MetaData {
pub description: Box<str>,
pub esid: Option<Box<str>>,
pub es5id: Option<Box<str>>,
pub es6id: Option<Box<str>>,
#[serde(default)]
pub info: Box<str>,
#[serde(default)]
pub features: Box<[Box<str>]>,
#[serde(default)]
pub includes: Box<[Box<str>]>,
#[serde(default)]
pub flags: Box<[TestFlag]>,
#[serde(default)]
pub negative: Option<Negative>,
#[serde(default)]
pub locale: Box<[Box<str>]>,
}
/// Individual test flag.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum TestFlag {
OnlyStrict,
NoStrict,
Module,
Raw,
Async,
Generated,
#[serde(rename = "CanBlockIsFalse")]
CanBlockIsFalse,
#[serde(rename = "CanBlockIsTrue")]
CanBlockIsTrue,
#[serde(rename = "non-deterministic")]
NonDeterministic,
}
/// Negative test information structure.
#[derive(Debug, Clone, Deserialize)]
pub struct Negative {
pub phase: Phase,
#[serde(rename = "type")]
pub error_type: Box<str>,
}
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum Phase {
Parse,
Early,
Resolution,
Runtime,
}
impl Phase {
pub fn is_runtime(self) -> bool {
matches!(self, Self::Runtime)
}
}
pub struct Test262Suite<T: Case> {
test_root: PathBuf,
test_cases: Vec<T>,
@ -125,12 +62,11 @@ impl Test262Case {
&self.meta
}
/// # Errors
/// # Panics
pub fn read_metadata(code: &str) -> io::Result<MetaData> {
pub fn read_metadata(code: &str) -> MetaData {
let (start, end) = (code.find("/*---").unwrap(), code.find("---*/").unwrap());
let yaml = &code[start + 5..end].replace('\r', "\n");
serde_yaml::from_str(yaml).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
let s = &code[start + 5..end].replace('\r', "\n");
MetaData::from_str(s)
}
pub fn set_result(&mut self, result: TestResult) {
@ -144,7 +80,7 @@ impl Test262Case {
impl Case for Test262Case {
fn new(path: PathBuf, code: String) -> Self {
let meta = Self::read_metadata(&code).expect("read test262 yaml meta");
let meta = Self::read_metadata(&code);
let should_fail = Self::compute_should_fail(&meta);
Self { path, code, meta, should_fail, result: TestResult::ToBeRun }
}

View file

@ -0,0 +1,125 @@
use saphyr::{Yaml, YamlLoader};
#[derive(Debug, Clone, Default)]
pub struct MetaData {
pub description: Box<str>,
pub esid: Option<Box<str>>,
pub es5id: Option<Box<str>>,
pub es6id: Option<Box<str>>,
pub info: Box<str>,
pub features: Box<[Box<str>]>,
pub includes: Box<[Box<str>]>,
pub flags: Box<[TestFlag]>,
pub negative: Option<Negative>,
pub locale: Box<[Box<str>]>,
}
/// Individual test flag.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TestFlag {
OnlyStrict,
NoStrict,
Module,
Raw,
Async,
Generated,
CanBlockIsFalse,
CanBlockIsTrue,
NonDeterministic,
}
impl TestFlag {
fn from_str(s: &str) -> Self {
match s {
"onlyStrict" => Self::OnlyStrict,
"noStrict" => Self::NoStrict,
"module" => Self::Module,
"raw" => Self::Raw,
"async" => Self::Async,
"generated" => Self::Generated,
"CanBlockIsFalse" => Self::CanBlockIsFalse,
"CanBlockIsTrue" => Self::CanBlockIsTrue,
"non-deterministic" => Self::NonDeterministic,
_ => panic!("{s} not supported for TestFlag"),
}
}
}
/// Negative test information structure.
#[derive(Debug, Clone)]
pub struct Negative {
pub phase: Phase,
pub error_type: Box<str>,
}
impl Negative {
fn from_yaml(yaml: &Yaml) -> Self {
Self {
phase: Phase::from_str(yaml["phase"].as_str().unwrap()),
error_type: yaml["type"].as_str().unwrap().into(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Phase {
Parse,
Early,
Resolution,
Runtime,
}
impl Phase {
pub fn is_runtime(self) -> bool {
matches!(self, Self::Runtime)
}
fn from_str(s: &str) -> Self {
match s {
"parse" => Self::Parse,
"early" => Self::Early,
"resolution" => Self::Resolution,
"runtime" => Self::Runtime,
_ => panic!("{s} not support for Phase"),
}
}
}
impl MetaData {
pub fn from_str(s: &str) -> Self {
let yamls = YamlLoader::load_from_str(s).unwrap_or_default();
let Some(yaml) = yamls.first() else { return Self::default() };
Self {
description: yaml["description"].as_str().unwrap_or_default().into(),
esid: yaml["esid"].as_str().map(Into::into),
es5id: yaml["es5id"].as_str().map(Into::into),
es6id: yaml["es6id"].as_str().map(Into::into),
info: yaml["info"].as_str().unwrap_or_default().into(),
features: Self::get_vec_of_string(&yaml["features"]),
includes: Self::get_vec_of_string(&yaml["includes"]),
flags: yaml["flags"]
.as_vec()
.map_or_else(Vec::new, |a| {
a.iter()
.map(|v| v.as_str().map(TestFlag::from_str).unwrap())
.collect::<Vec<_>>()
})
.into(),
negative: {
let yaml = &yaml["negative"];
(!yaml.is_null() && !yaml.is_badvalue()).then(|| Negative::from_yaml(yaml))
},
locale: Self::get_vec_of_string(&yaml["locale"]),
}
}
fn get_vec_of_string(yaml: &Yaml) -> Box<[Box<str>]> {
yaml.as_vec()
.map_or_else(Vec::new, |a| {
a.iter()
.map(|v| v.as_str().unwrap_or_default().to_string().into_boxed_str())
.collect::<Vec<_>>()
})
.into()
}
}