perf(linter): use aho-corasick instead of regex for string matching in jsx-a11y/img-redundant-alt (#5892)

hypothesis: profiling shows that Regex creation takes a decent amount of time. the `regex` crate uses `aho-corasick` internally for string matching, which is all we need in some cases. in theory, we could save time by using the lib directly and not needing the full regex syntax.
This commit is contained in:
camchenry 2024-09-19 22:40:01 +00:00
parent 66b468857a
commit 608d637e4d
4 changed files with 23 additions and 10 deletions

1
Cargo.lock generated
View file

@ -1627,6 +1627,7 @@ dependencies = [
name = "oxc_linter"
version = "0.9.6"
dependencies = [
"aho-corasick",
"bitflags 2.6.0",
"convert_case",
"cow-utils",

View file

@ -108,6 +108,7 @@ napi = "3.0.0-alpha.8"
napi-build = "2.1.3"
napi-derive = "3.0.0-alpha.7"
aho-corasick = "1.1.3"
allocator-api2 = "0.2.18"
assert-unchecked = "0.1.2"
base64 = "0.22.1"

View file

@ -34,6 +34,7 @@ oxc_semantic = { workspace = true }
oxc_span = { workspace = true, features = ["schemars", "serialize"] }
oxc_syntax = { workspace = true }
aho-corasick = { workspace = true }
bitflags = { workspace = true }
convert_case = { workspace = true }
cow-utils = { workspace = true }

View file

@ -1,3 +1,4 @@
use aho_corasick::AhoCorasick;
use oxc_ast::{
ast::{JSXAttributeItem, JSXAttributeName, JSXAttributeValue, JSXExpression},
AstKind,
@ -5,7 +6,6 @@ use oxc_ast::{
use oxc_diagnostics::OxcDiagnostic;
use oxc_macros::declare_oxc_lint;
use oxc_span::{CompactStr, Span};
use regex::{Regex, RegexBuilder};
use serde_json::Value;
use crate::{
@ -28,7 +28,7 @@ pub struct ImgRedundantAlt(Box<ImgRedundantAltConfig>);
#[derive(Debug, Clone)]
pub struct ImgRedundantAltConfig {
types_to_validate: Vec<CompactStr>,
redundant_words: Regex,
redundant_words: AhoCorasick,
}
impl std::ops::Deref for ImgRedundantAlt {
@ -45,21 +45,25 @@ impl Default for ImgRedundantAltConfig {
fn default() -> Self {
Self {
types_to_validate: vec![CompactStr::new("img")],
redundant_words: Self::union(&REDUNDANT_WORDS).unwrap(),
redundant_words: AhoCorasick::builder()
.ascii_case_insensitive(true)
.build(REDUNDANT_WORDS)
.expect("Could not build AhoCorasick"),
}
}
}
impl ImgRedundantAltConfig {
fn new(types_to_validate: Vec<&str>, redundant_words: &[&str]) -> Result<Self, regex::Error> {
fn new(
types_to_validate: Vec<&str>,
redundant_words: &[&str],
) -> Result<Self, aho_corasick::BuildError> {
Ok(Self {
types_to_validate: types_to_validate.into_iter().map(Into::into).collect(),
redundant_words: Self::union(redundant_words)?,
redundant_words: AhoCorasick::builder()
.ascii_case_insensitive(true)
.build(redundant_words)?,
})
}
fn union(strs: &[&str]) -> Result<Regex, regex::Error> {
RegexBuilder::new(&format!(r"(?i)\b({})\b", strs.join("|"))).case_insensitive(true).build()
}
}
declare_oxc_lint!(
@ -191,7 +195,13 @@ impl Rule for ImgRedundantAlt {
impl ImgRedundantAlt {
#[inline]
fn is_redundant_alt_text(&self, alt_text: &str) -> bool {
self.redundant_words.is_match(alt_text)
for mat in self.redundant_words.find_iter(alt_text) {
// check if followed by space or is whole text
if mat.end() == alt_text.len() || alt_text.as_bytes()[mat.end()] == b' ' {
return true;
}
}
false
}
}