perf(traverse): faster string operations generating UIDs (#5626)

`get_unique_name` perform string manipulation faster, avoiding bounds checks, and use `itoa` crate for faster conversion of integers to strings for postfixes above 100. No apparent difference on benchmarks, but I imagine that's only because it's outweighed by cost of hashing strings. If measured alone, I believe it would be a perf improvement.
2026-05-19 12:19:15 +00:00 · 2024-09-10 01:12:21 +00:00 · 2024-09-10 01:12:21 +00:00 · e8013d259e
commit e8013d259e
parent 4996874dda
4 changed files with 208 additions and 41 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1980,6 +1980,7 @@ name = "oxc_traverse"
 version = "0.27.0"
 dependencies = [
 "compact_str",
+ "itoa",
 "memoffset",
 "oxc_allocator",
 "oxc_ast",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -136,6 +136,7 @@ ignore = "0.4.22"
 indexmap = "2.3.0"
 insta = "1.39.0"
 itertools = "0.13.0"
+itoa = "1.0.11"
 jemallocator = "0.5.4"
 json-strip-comments = "1.0.4"
 language-tags = "0.3.2"
--- a/crates/oxc_traverse/Cargo.toml
+++ b/crates/oxc_traverse/Cargo.toml
@ -30,5 +30,6 @@ oxc_span = { workspace = true }
 oxc_syntax = { workspace = true }

 compact_str = { workspace = true }
+itoa = { workspace = true }
 memoffset = { workspace = true }
 rustc-hash = { workspace = true }
--- a/crates/oxc_traverse/src/context/scoping.rs
+++ b/crates/oxc_traverse/src/context/scoping.rs
@ -1,6 +1,7 @@
 use std::{cell::Cell, str};

-use compact_str::{format_compact, CompactString};
+use compact_str::CompactString;
+use itoa::Buffer as ItoaBuffer;
 use rustc_hash::FxHashSet;

 #[allow(clippy::wildcard_imports)]
@ -513,6 +514,14 @@ fn get_unique_name(base: &str, uid_names: &FxHashSet<CompactStr>) -> CompactStr
    CompactStr::from(get_unique_name_impl(base, uid_names))
 }

+// TODO: We could make this function more performant, especially when it checks a lot of names
+// before it reaches one that is unused.
+// This function repeatedly creates strings which have only differ from each other by digits added on end,
+// and then hashes each of those strings to test them against the hash set `uid_names`.
+// Hashing strings is fairly expensive. As here only the end of the string changes on each iteration,
+// we could calculate an "unfinished" hash not including the last block, and then just add the final
+// block to "finish" the hash on each iteration. With `FxHash` this would be straight line code and only
+// a few operations.
 fn get_unique_name_impl(base: &str, uid_names: &FxHashSet<CompactStr>) -> CompactString {
    // Create `CompactString` prepending name with `_`, and with 1 byte excess capacity.
    // The extra byte is to avoid reallocation if need to add a digit on the end later,
@ -522,63 +531,218 @@ fn get_unique_name_impl(base: &str, uid_names: &FxHashSet<CompactStr>) -> Compac
    name.push('_');
    name.push_str(base);

-    let name_is_unique = |name: &str| !uid_names.contains(name);
-
-    // Try the name without a numerical postfix (i.e. plain `_temp`)
-    if name_is_unique(&name) {
-        return name;
-    }
-
    // It's fairly common that UIDs may need a numerical postfix, so we try to keep string
-    // operations to a minimum for postfixes up to 99 - using `replace_range` on a single
-    // `CompactStr`, rather than generating a new string on each attempt.
+    // operations to a minimum for postfixes up to 99 - reusing a single `CompactString`,
+    // rather than generating a new string on each attempt.
+    // For speed we manipulate the string as bytes.
    // Postfixes greater than 99 should be very uncommon, so don't bother optimizing.
+    //
+    // SAFETY: Only modifications to string are replacing last byte/last 2 bytes with ASCII digits.
+    // These bytes are already ASCII chars, so cannot produce an invalid UTF-8 string.
+    // Writes are always in bounds (`bytes` is redefined after string grows due to `push`).
+    unsafe {
+        let name_is_unique = |bytes: &[u8]| {
+            let name = str::from_utf8_unchecked(bytes);
+            !uid_names.contains(name)
+        };

-    // Try single-digit postfixes (i.e. `_temp2`, `_temp3` ... `_temp9`)
-    name.push('2');
-    if name_is_unique(&name) {
-        return name;
-    }
-    for c in b'3'..=b'9' {
-        name.replace_range(name.len() - 1.., str::from_utf8(&[c]).unwrap());
-        if name_is_unique(&name) {
+        // Try the name without a numerical postfix (i.e. plain `_temp`)
+        let bytes = name.as_bytes_mut();
+        if name_is_unique(bytes) {
            return name;
        }
-    }

-    // Try double-digit postfixes (i.e. `_temp10` ... `_temp99`)
-    name.replace_range(name.len() - 1.., "1");
-    name.push('0');
-    let mut c1 = b'1';
-    loop {
-        if name_is_unique(&name) {
+        // Try single-digit postfixes (i.e. `_temp2`, `_temp3` ... `_temp9`)
+        name.push('2');
+        let bytes = name.as_bytes_mut();
+        if name_is_unique(bytes) {
            return name;
        }
-        for c2 in b'1'..=b'9' {
-            name.replace_range(name.len() - 1.., str::from_utf8(&[c2]).unwrap());
-            if name_is_unique(&name) {
+
+        let last_index = bytes.len() - 1;
+        for c in b'3'..=b'9' {
+            *bytes.get_unchecked_mut(last_index) = c;
+            if name_is_unique(bytes) {
                return name;
            }
        }
-        if c1 == b'9' {
-            break;
+
+        // Try double-digit postfixes (i.e. `_temp10` ... `_temp99`)
+        *bytes.get_unchecked_mut(last_index) = b'1';
+        name.push('0');
+        let bytes = name.as_bytes_mut();
+        let last_index = last_index + 1;
+
+        let mut c1 = b'1';
+        loop {
+            if name_is_unique(bytes) {
+                return name;
+            }
+            for c2 in b'1'..=b'9' {
+                *bytes.get_unchecked_mut(last_index) = c2;
+                if name_is_unique(bytes) {
+                    return name;
+                }
+            }
+            if c1 == b'9' {
+                break;
+            }
+            c1 += 1;
+
+            let last_two: &mut [u8; 2] =
+                bytes.get_unchecked_mut(last_index - 1..=last_index).try_into().unwrap();
+            *last_two = [c1, b'0'];
        }
-        c1 += 1;
-        name.replace_range(name.len() - 2.., str::from_utf8(&[c1, b'0']).unwrap());
    }

    // Try longer postfixes (`_temp100` upwards)
-    let name_base = {
-        name.pop();
-        name.pop();
-        &*name
-    };
-    for n in 100..=usize::MAX {
-        let name = format_compact!("{}{}", name_base, n);
-        if name_is_unique(&name) {
+
+    // Reserve space for 1 more byte for the additional 3rd digit.
+    // Do this here so that `name.push_str(digits)` will never need to grow the string until it reaches
+    // `n == 1000`, which makes the branch on "is there sufficient capacity to push?" in the loop below
+    // completely predictable for `n < 1000`.
+    name.reserve(1);
+
+    // At this point, `name` has had 2 digits added on end. `base_len` is length without those 2 digits.
+    let base_len = name.len() - 2;
+
+    let mut buffer = ItoaBuffer::new();
+    for n in 100..=u32::MAX {
+        let digits = buffer.format(n);
+        // SAFETY: `base_len` is always shorter than current `name.len()`, on a UTF-8 char boundary,
+        // and `name` contains at least `base_len` initialized bytes
+        unsafe { name.set_len(base_len) };
+        name.push_str(digits);
+        if !uid_names.contains(name.as_str()) {
            return name;
        }
    }

-    panic!("Cannot generate UID");
+    // Limit for size of source text is `u32::MAX` bytes, so there cannot be `u32::MAX`
+    // identifier names in the AST. So loop above cannot fail to find an unused name.
+    unreachable!();
+}
+
+#[cfg(test)]
+#[test]
+fn test_get_unique_name() {
+    let cases: &[(&[&str], &str, &str)] = &[
+        (&[], "foo", "_foo"),
+        (&["_foo"], "foo", "_foo2"),
+        (&["_foo0", "_foo1"], "foo", "_foo"),
+        (&["_foo2", "_foo3", "_foo4"], "foo", "_foo"),
+        (&["_foo", "_foo2"], "foo", "_foo3"),
+        (&["_foo", "_foo2", "_foo4"], "foo", "_foo3"),
+        (&["_foo", "_foo2", "_foo3", "_foo4", "_foo5", "_foo6", "_foo7", "_foo8"], "foo", "_foo9"),
+        (
+            &["_foo", "_foo2", "_foo3", "_foo4", "_foo5", "_foo6", "_foo7", "_foo8", "_foo9"],
+            "foo",
+            "_foo10",
+        ),
+        (
+            &[
+                "_foo", "_foo2", "_foo3", "_foo4", "_foo5", "_foo6", "_foo7", "_foo8", "_foo9",
+                "_foo10",
+            ],
+            "foo",
+            "_foo11",
+        ),
+        (
+            &[
+                "_foo", "_foo2", "_foo3", "_foo4", "_foo5", "_foo6", "_foo7", "_foo8", "_foo9",
+                "_foo10", "_foo11",
+            ],
+            "foo",
+            "_foo12",
+        ),
+        (
+            &[
+                "_foo", "_foo2", "_foo3", "_foo4", "_foo5", "_foo6", "_foo7", "_foo8", "_foo9",
+                "_foo10", "_foo11", "_foo12", "_foo13", "_foo14", "_foo15", "_foo16", "_foo17",
+                "_foo18",
+            ],
+            "foo",
+            "_foo19",
+        ),
+        (
+            &[
+                "_foo", "_foo2", "_foo3", "_foo4", "_foo5", "_foo6", "_foo7", "_foo8", "_foo9",
+                "_foo10", "_foo11", "_foo12", "_foo13", "_foo14", "_foo15", "_foo16", "_foo17",
+                "_foo18", "_foo19",
+            ],
+            "foo",
+            "_foo20",
+        ),
+        (
+            &[
+                "_foo", "_foo2", "_foo3", "_foo4", "_foo5", "_foo6", "_foo7", "_foo8", "_foo9",
+                "_foo10", "_foo11", "_foo12", "_foo13", "_foo14", "_foo15", "_foo16", "_foo17",
+                "_foo18", "_foo19", "_foo20",
+            ],
+            "foo",
+            "_foo21",
+        ),
+        (
+            &[
+                "_foo", "_foo2", "_foo3", "_foo4", "_foo5", "_foo6", "_foo7", "_foo8", "_foo9",
+                "_foo10", "_foo11", "_foo12", "_foo13", "_foo14", "_foo15", "_foo16", "_foo17",
+                "_foo18", "_foo19", "_foo20", "_foo21", "_foo22", "_foo23", "_foo24", "_foo25",
+                "_foo26", "_foo27", "_foo28", "_foo29", "_foo30", "_foo31", "_foo32", "_foo33",
+                "_foo34", "_foo35", "_foo36", "_foo37", "_foo38", "_foo39", "_foo40", "_foo41",
+                "_foo42", "_foo43", "_foo44", "_foo45", "_foo46", "_foo47", "_foo48", "_foo49",
+                "_foo50", "_foo51", "_foo52", "_foo53", "_foo54", "_foo55", "_foo56", "_foo57",
+                "_foo58", "_foo59", "_foo60", "_foo61", "_foo62", "_foo63", "_foo64", "_foo65",
+                "_foo66", "_foo67", "_foo68", "_foo69", "_foo70", "_foo71", "_foo72", "_foo73",
+                "_foo74", "_foo75", "_foo76", "_foo77", "_foo78", "_foo79", "_foo80", "_foo81",
+                "_foo82", "_foo83", "_foo84", "_foo85", "_foo86", "_foo87", "_foo88", "_foo89",
+                "_foo90", "_foo91", "_foo92", "_foo93", "_foo94", "_foo95", "_foo96", "_foo97",
+                "_foo98",
+            ],
+            "foo",
+            "_foo99",
+        ),
+        (
+            &[
+                "_foo", "_foo2", "_foo3", "_foo4", "_foo5", "_foo6", "_foo7", "_foo8", "_foo9",
+                "_foo10", "_foo11", "_foo12", "_foo13", "_foo14", "_foo15", "_foo16", "_foo17",
+                "_foo18", "_foo19", "_foo20", "_foo21", "_foo22", "_foo23", "_foo24", "_foo25",
+                "_foo26", "_foo27", "_foo28", "_foo29", "_foo30", "_foo31", "_foo32", "_foo33",
+                "_foo34", "_foo35", "_foo36", "_foo37", "_foo38", "_foo39", "_foo40", "_foo41",
+                "_foo42", "_foo43", "_foo44", "_foo45", "_foo46", "_foo47", "_foo48", "_foo49",
+                "_foo50", "_foo51", "_foo52", "_foo53", "_foo54", "_foo55", "_foo56", "_foo57",
+                "_foo58", "_foo59", "_foo60", "_foo61", "_foo62", "_foo63", "_foo64", "_foo65",
+                "_foo66", "_foo67", "_foo68", "_foo69", "_foo70", "_foo71", "_foo72", "_foo73",
+                "_foo74", "_foo75", "_foo76", "_foo77", "_foo78", "_foo79", "_foo80", "_foo81",
+                "_foo82", "_foo83", "_foo84", "_foo85", "_foo86", "_foo87", "_foo88", "_foo89",
+                "_foo90", "_foo91", "_foo92", "_foo93", "_foo94", "_foo95", "_foo96", "_foo97",
+                "_foo98", "_foo99",
+            ],
+            "foo",
+            "_foo100",
+        ),
+        (
+            &[
+                "_foo", "_foo2", "_foo3", "_foo4", "_foo5", "_foo6", "_foo7", "_foo8", "_foo9",
+                "_foo10", "_foo11", "_foo12", "_foo13", "_foo14", "_foo15", "_foo16", "_foo17",
+                "_foo18", "_foo19", "_foo20", "_foo21", "_foo22", "_foo23", "_foo24", "_foo25",
+                "_foo26", "_foo27", "_foo28", "_foo29", "_foo30", "_foo31", "_foo32", "_foo33",
+                "_foo34", "_foo35", "_foo36", "_foo37", "_foo38", "_foo39", "_foo40", "_foo41",
+                "_foo42", "_foo43", "_foo44", "_foo45", "_foo46", "_foo47", "_foo48", "_foo49",
+                "_foo50", "_foo51", "_foo52", "_foo53", "_foo54", "_foo55", "_foo56", "_foo57",
+                "_foo58", "_foo59", "_foo60", "_foo61", "_foo62", "_foo63", "_foo64", "_foo65",
+                "_foo66", "_foo67", "_foo68", "_foo69", "_foo70", "_foo71", "_foo72", "_foo73",
+                "_foo74", "_foo75", "_foo76", "_foo77", "_foo78", "_foo79", "_foo80", "_foo81",
+                "_foo82", "_foo83", "_foo84", "_foo85", "_foo86", "_foo87", "_foo88", "_foo89",
+                "_foo90", "_foo91", "_foo92", "_foo93", "_foo94", "_foo95", "_foo96", "_foo97",
+                "_foo98", "_foo99", "_foo100",
+            ],
+            "foo",
+            "_foo101",
+        ),
+    ];
+
+    for (used, name, expected) in cases {
+        let used = used.iter().map(|name| CompactStr::from(*name)).collect();
+        assert_eq!(get_unique_name(name, &used), expected);
+    }
 }