perf: use simdutf8 to validate UTF-8 when reading files (#5196)

closes #5191

---------

Co-authored-by: overlookmotel <j@dummett.org>
This commit is contained in:
dalaoshu 2024-08-26 10:14:21 +08:00 committed by GitHub
parent fb847bd0ba
commit ce454cf426
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 32 additions and 4 deletions

9
Cargo.lock generated
View file

@ -1020,7 +1020,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
dependencies = [
"cfg-if",
"windows-targets 0.48.5",
"windows-targets 0.52.6",
]
[[package]]
@ -1624,6 +1624,7 @@ dependencies = [
"schemars",
"serde",
"serde_json",
"simdutf8",
"url",
]
@ -2680,6 +2681,12 @@ dependencies = [
"libc",
]
[[package]]
name = "simdutf8"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
[[package]]
name = "similar"
version = "2.6.0"

View file

@ -170,6 +170,7 @@ seq-macro = "0.3.5"
serde = "1.0.206"
serde_json = "1.0.124"
serde-wasm-bindgen = "0.6.5"
simdutf8 = { version = "0.1.4", features = ["aarch64_neon"] }
similar = "2.6.0"
syn = { version = "2.0.74", default-features = false }
tempfile = "3.12.0"

View file

@ -52,6 +52,7 @@ once_cell = { workspace = true }
memchr = { workspace = true }
json-strip-comments = { workspace = true }
schemars = { workspace = true, features = ["indexmap2"] }
simdutf8 = { workspace = true }
[dev-dependencies]
insta = { workspace = true }

View file

@ -17,7 +17,9 @@ pub use self::{
settings::{jsdoc::JSDocPluginSettings, OxlintSettings},
};
use crate::{
rules::RuleEnum, utils::is_jest_rule_adapted_to_vitest, AllowWarnDeny, RuleWithSeverity,
rules::RuleEnum,
utils::{is_jest_rule_adapted_to_vitest, read_to_string},
AllowWarnDeny, RuleWithSeverity,
};
/// Oxlint Configuration File
@ -68,7 +70,7 @@ impl OxlintConfig {
///
/// * Parse Failure
pub fn from_file(path: &Path) -> Result<Self, OxcDiagnostic> {
let mut string = std::fs::read_to_string(path).map_err(|e| {
let mut string = read_to_string(path).map_err(|e| {
OxcDiagnostic::error(format!("Failed to parse config {path:?} with error {e:?}"))
})?;

View file

@ -19,6 +19,7 @@ use rustc_hash::FxHashSet;
use crate::{
partial_loader::{JavaScriptSource, PartialLoader, LINT_PARTIAL_LOADER_EXT},
utils::read_to_string,
Fixer, Linter, Message,
};
@ -176,7 +177,7 @@ impl Runtime {
return None;
}
let source_type = source_type.unwrap_or_default();
let file_result = fs::read_to_string(path).map_err(|e| {
let file_result = read_to_string(path).map_err(|e| {
Error::new(OxcDiagnostic::error(format!(
"Failed to open file {path:?} with error \"{e}\""
)))

View file

@ -9,6 +9,8 @@ mod tree_shaking;
mod unicorn;
mod vitest;
use std::{io, path::Path};
pub use self::{
config::*, jest::*, jsdoc::*, nextjs::*, promise::*, react::*, react_perf::*, tree_shaking::*,
unicorn::*, vitest::*,
@ -37,3 +39,17 @@ pub fn is_jest_rule_adapted_to_vitest(rule_name: &str) -> bool {
jest_rules.contains(&rule_name)
}
pub fn read_to_string(path: &Path) -> io::Result<String> {
// `simdutf8` is faster than `std::str::from_utf8` which `fs::read_to_string` uses internally
let bytes = std::fs::read(path)?;
if simdutf8::basic::from_utf8(&bytes).is_err() {
// Same error as `fs::read_to_string` produces (`io::Error::INVALID_UTF8`)
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"stream did not contain valid UTF-8",
));
}
// SAFETY: `simdutf8` has ensured it's a valid UTF-8 string
Ok(unsafe { String::from_utf8_unchecked(bytes) })
}