perf(regex): reduce string allocations in Display impls (#6528)

There's still room for improvement here.
This commit is contained in:
DonIsaac 2024-10-13 19:34:18 +00:00
parent 2c32dac691
commit 7c200560c7

View file

@ -1,4 +1,5 @@
use std::{ use std::{
borrow::Cow,
fmt::{self, Display}, fmt::{self, Display},
iter::Peekable, iter::Peekable,
}; };
@ -31,7 +32,7 @@ impl<'a> Display for Alternative<'a> {
write_join_with(f, "", &self.body, |iter| { write_join_with(f, "", &self.body, |iter| {
let next = iter.next()?; let next = iter.next()?;
let Some(next) = as_character(next) else { return Some(next.to_string()) }; let Some(next) = as_character(next) else { return Some(Cow::Owned(next.to_string())) };
let peek = iter.peek().and_then(|it| as_character(it)); let peek = iter.peek().and_then(|it| as_character(it));
let (result, eat) = character_to_string(next, peek); let (result, eat) = character_to_string(next, peek);
@ -106,10 +107,12 @@ impl<'a> Display for Quantifier<'a> {
(1, None) => write!(f, "+")?, (1, None) => write!(f, "+")?,
(0, Some(1)) => write!(f, "?")?, (0, Some(1)) => write!(f, "?")?,
(min, Some(max)) if min == max => write!(f, "{{{min}}}",)?, (min, Some(max)) if min == max => write!(f, "{{{min}}}",)?,
(min, max) => { (min, Some(max)) => {
let max = max.map_or_else(String::default, |it| it.to_string());
write!(f, "{{{min},{max}}}",)?; write!(f, "{{{min},{max}}}",)?;
} }
(min, None) => {
write!(f, "{{{min},}}",)?;
}
} }
if !self.greedy { if !self.greedy {
@ -194,7 +197,9 @@ impl<'a> Display for CharacterClass<'a> {
write_join_with(f, sep, &self.body, |iter| { write_join_with(f, sep, &self.body, |iter| {
let next = iter.next()?; let next = iter.next()?;
let Some(next) = as_character(next) else { return Some(next.to_string()) }; let Some(next) = as_character(next) else {
return Some(Cow::Owned(next.to_string()));
};
let peek = iter.peek().and_then(|it| as_character(it)); let peek = iter.peek().and_then(|it| as_character(it));
let (result, eat) = character_to_string(next, peek); let (result, eat) = character_to_string(next, peek);
@ -304,13 +309,13 @@ impl<'a> Display for NamedReference<'a> {
fn character_to_string( fn character_to_string(
this: &Character, this: &Character,
peek: Option<&Character>, peek: Option<&Character>,
) -> (/* result */ String, /* true of peek should be consumed */ bool) { ) -> (/* result */ Cow<'static, str>, /* true of peek should be consumed */ bool) {
let cp = this.value; let cp = this.value;
if matches!(this.kind, CharacterKind::Symbol | CharacterKind::UnicodeEscape) { if matches!(this.kind, CharacterKind::Symbol | CharacterKind::UnicodeEscape) {
// Trail only // Trail only
if is_trail_surrogate(cp) { if is_trail_surrogate(cp) {
return (format!(r"\u{cp:X}"), false); return (Cow::Owned(format!(r"\u{cp:X}")), false);
} }
if is_lead_surrogate(cp) { if is_lead_surrogate(cp) {
@ -318,62 +323,48 @@ fn character_to_string(
// Lead+Trail // Lead+Trail
let cp = combine_surrogate_pair(cp, peek.value); let cp = combine_surrogate_pair(cp, peek.value);
let ch = char::from_u32(cp).expect("Invalid surrogate pair `Character`!"); let ch = char::from_u32(cp).expect("Invalid surrogate pair `Character`!");
return (format!("{ch}"), true); return (Cow::Owned(format!("{ch}")), true);
} }
// Lead only // Lead only
return (format!(r"\u{cp:X}"), false); return (Cow::Owned(format!(r"\u{cp:X}")), false);
} }
} }
let ch = char::from_u32(cp).expect("Invalid `Character`!"); let ch = char::from_u32(cp).expect("Invalid `Character`!");
let result = match this.kind { let result = match this.kind {
// Not a surrogate, like BMP, or all units in unicode mode // Not a surrogate, like BMP, or all units in unicode mode
CharacterKind::Symbol => format!("{ch}"), CharacterKind::Symbol => Cow::Owned(ch.to_string()),
CharacterKind::ControlLetter => match ch { CharacterKind::ControlLetter => match ch {
'\n' => r"\cJ".to_string(), '\n' => Cow::Borrowed(r"\cJ"),
'\r' => r"\cM".to_string(), '\r' => Cow::Borrowed(r"\cM"),
'\t' => r"\cI".to_string(), '\t' => Cow::Borrowed(r"\cI"),
_ => format!(r"\c{ch}"), _ => Cow::Owned(format!(r"\c{ch}")),
}, },
CharacterKind::Identifier => { CharacterKind::Identifier => Cow::Owned(format!(r"\{ch}")),
format!(r"\{ch}")
}
CharacterKind::SingleEscape => match ch { CharacterKind::SingleEscape => match ch {
'\n' => String::from(r"\n"), '\n' => Cow::Borrowed(r"\n"),
'\r' => String::from(r"\r"), '\r' => Cow::Borrowed(r"\r"),
'\t' => String::from(r"\t"), '\t' => Cow::Borrowed(r"\t"),
'\u{b}' => String::from(r"\v"), '\u{b}' => Cow::Borrowed(r"\v"),
'\u{c}' => String::from(r"\f"), '\u{c}' => Cow::Borrowed(r"\f"),
'\u{8}' => String::from(r"\b"), '\u{8}' => Cow::Borrowed(r"\b"),
'\u{2D}' => String::from(r"\-"), '\u{2D}' => Cow::Borrowed(r"\-"),
_ => format!(r"\{ch}"), _ => Cow::Owned(format!(r"\{ch}")),
}, },
CharacterKind::Null => String::from(r"\0"), CharacterKind::Null => Cow::Borrowed(r"\0"),
CharacterKind::UnicodeEscape => { CharacterKind::UnicodeEscape => {
let hex = &format!("{cp:04X}"); let hex = &format!("{cp:04X}");
if hex.len() <= 4 { if hex.len() <= 4 {
format!(r"\u{hex}") Cow::Owned(format!(r"\u{hex}"))
} else { } else {
format!(r"\u{{{hex}}}") Cow::Owned(format!(r"\u{{{hex}}}"))
} }
} }
CharacterKind::HexadecimalEscape => { CharacterKind::HexadecimalEscape => Cow::Owned(format!(r"\x{cp:02X}")),
let hex = &format!("{cp:02X}"); CharacterKind::Octal1 => Cow::Owned(format!(r"\{cp:o}")),
format!(r"\x{hex}") CharacterKind::Octal2 => Cow::Owned(format!(r"\{cp:02o}")),
} CharacterKind::Octal3 => Cow::Owned(format!(r"\{cp:03o}")),
CharacterKind::Octal1 => {
let octal = format!("{cp:o}");
format!(r"\{octal}")
}
CharacterKind::Octal2 => {
let octal = format!("{cp:02o}");
format!(r"\{octal}")
}
CharacterKind::Octal3 => {
let octal = format!("{cp:03o}");
format!(r"\{octal}")
}
}; };
(result, false) (result, false)
@ -390,12 +381,18 @@ where
write_join_with(f, sep, items, |iter| iter.next().map(|it| it.to_string())) write_join_with(f, sep, items, |iter| iter.next().map(|it| it.to_string()))
} }
fn write_join_with<S, I, E, F>(f: &mut fmt::Formatter<'_>, sep: S, items: I, next: F) -> fmt::Result fn write_join_with<S, I, E, F, D>(
f: &mut fmt::Formatter<'_>,
sep: S,
items: I,
next: F,
) -> fmt::Result
where where
S: AsRef<str>, S: AsRef<str>,
E: Display, E: Display,
I: IntoIterator<Item = E>, I: IntoIterator<Item = E>,
F: Fn(&mut Peekable<I::IntoIter>) -> Option<String>, F: Fn(&mut Peekable<I::IntoIter>) -> Option<D>,
D: fmt::Display,
{ {
let sep = sep.as_ref(); let sep = sep.as_ref();
let iter = &mut items.into_iter().peekable(); let iter = &mut items.into_iter().peekable();