mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 12:19:15 +00:00
refactor(allocator): String type (#8568)
Wrap `bumpalo::collections::String` in a new type instead of exporting it directly. This opens the door to: 1. Replacing it with our own `String` type which wraps our `Vec` type, rather than having 2 different implementations of `Vec` (`String` is just a wrapper around `Vec`, but a *different* `Vec` implementation). 2. Adding additional methods to `String` (`String::from_utf8` added in this PR).
This commit is contained in:
parent
93df57f4c0
commit
ac05134a6d
7 changed files with 262 additions and 21 deletions
|
|
@ -49,7 +49,7 @@ impl<'a> FromIn<'a, String> for crate::String<'a> {
|
|||
impl<'a> FromIn<'a, String> for &'a str {
|
||||
#[inline(always)]
|
||||
fn from_in(value: String, allocator: &'a Allocator) -> Self {
|
||||
crate::String::from_str_in(value.as_str(), allocator).into_bump_str()
|
||||
allocator.alloc_str(value.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@ use std::{
|
|||
ops::{Deref, DerefMut},
|
||||
};
|
||||
|
||||
pub use bumpalo::collections::String;
|
||||
use bumpalo::Bump;
|
||||
|
||||
mod address;
|
||||
|
|
@ -53,6 +52,7 @@ mod boxed;
|
|||
mod clone_in;
|
||||
mod convert;
|
||||
pub mod hash_map;
|
||||
pub mod string;
|
||||
mod vec;
|
||||
|
||||
pub use address::{Address, GetAddress};
|
||||
|
|
@ -60,6 +60,7 @@ pub use boxed::Box;
|
|||
pub use clone_in::CloneIn;
|
||||
pub use convert::{FromIn, IntoIn};
|
||||
pub use hash_map::HashMap;
|
||||
pub use string::String;
|
||||
pub use vec::Vec;
|
||||
|
||||
/// A bump-allocated memory arena based on [bumpalo].
|
||||
|
|
|
|||
249
crates/oxc_allocator/src/string.rs
Normal file
249
crates/oxc_allocator/src/string.rs
Normal file
|
|
@ -0,0 +1,249 @@
|
|||
//! Arena String.
|
||||
//!
|
||||
//! See [`String`] for more details.
|
||||
|
||||
// All methods which just delegate to `bumpalo::collections::String` methods marked `#[inline(always)]`
|
||||
#![expect(clippy::inline_always)]
|
||||
|
||||
use std::{
|
||||
fmt::{self, Debug, Display},
|
||||
hash::{Hash, Hasher},
|
||||
mem::ManuallyDrop,
|
||||
ops::{Deref, DerefMut},
|
||||
};
|
||||
|
||||
use bumpalo::collections::String as BumpaloString;
|
||||
use simdutf8::basic::from_utf8;
|
||||
pub use simdutf8::basic::Utf8Error;
|
||||
|
||||
use crate::{Allocator, Vec};
|
||||
|
||||
/// Arena String.
|
||||
///
|
||||
/// UTF-8 encoded, growable string. Identical to [`std::string::String`] except that it stores
|
||||
/// string contents in arena allocator.
|
||||
#[derive(PartialOrd, Eq, Ord)]
|
||||
pub struct String<'alloc>(BumpaloString<'alloc>);
|
||||
|
||||
impl<'alloc> String<'alloc> {
|
||||
/// Creates a new empty [`String`].
|
||||
///
|
||||
/// Given that the `String` is empty, this will not allocate any initial
|
||||
/// buffer. While that means that this initial operation is very
|
||||
/// inexpensive, it may cause excessive allocation later when you add
|
||||
/// data. If you have an idea of how much data the `String` will hold,
|
||||
/// consider the [`with_capacity_in`] method to prevent excessive
|
||||
/// re-allocation.
|
||||
///
|
||||
/// [`with_capacity_in`]: String::with_capacity_in
|
||||
#[inline(always)]
|
||||
pub fn new_in(allocator: &'alloc Allocator) -> String<'alloc> {
|
||||
Self(BumpaloString::new_in(allocator))
|
||||
}
|
||||
|
||||
/// Creates a new empty [`String`] with specified capacity.
|
||||
///
|
||||
/// `String`s have an internal buffer to hold their data. The capacity is
|
||||
/// the length of that buffer, and can be queried with the `capacity`
|
||||
/// method. This method creates an empty `String`, but one with an initial
|
||||
/// buffer that can hold `capacity` bytes. This is useful when you may be
|
||||
/// appending a bunch of data to the `String`, reducing the number of
|
||||
/// reallocations it needs to do.
|
||||
///
|
||||
/// If the given capacity is `0`, no allocation will occur, and this method
|
||||
/// is identical to the [`new_in`] method.
|
||||
///
|
||||
/// [`capacity`]: String::capacity
|
||||
/// [`new_in`]: String::new_in
|
||||
#[inline(always)]
|
||||
pub fn with_capacity_in(capacity: usize, allocator: &'alloc Allocator) -> String<'alloc> {
|
||||
Self(BumpaloString::with_capacity_in(capacity, allocator))
|
||||
}
|
||||
|
||||
/// Construct a new [`String`] from a string slice.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use oxc_allocator::{Allocator, String};
|
||||
///
|
||||
/// let allocator = Allocator::default();
|
||||
///
|
||||
/// let s = String::from_str_in("hello", &allocator);
|
||||
/// assert_eq!(s, "hello");
|
||||
/// ```
|
||||
#[inline(always)]
|
||||
pub fn from_str_in(s: &str, allocator: &'alloc Allocator) -> String<'alloc> {
|
||||
Self(BumpaloString::from_str_in(s, allocator))
|
||||
}
|
||||
|
||||
/// Convert `Vec<u8>` into [`String`].
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string.
|
||||
pub fn from_utf8(bytes: Vec<'alloc, u8>) -> Result<String<'alloc>, Utf8Error> {
|
||||
// Check vec comprises a valid UTF-8 string.
|
||||
from_utf8(&bytes)?;
|
||||
// SAFETY: We just checked it's a valid UTF-8 string
|
||||
let s = unsafe { Self::from_utf8_unchecked(bytes) };
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
/// Convert `Vec<u8>` into [`String`], without checking bytes comprise a valid UTF-8 string.
|
||||
///
|
||||
/// Does not copy the contents of the `Vec`, converts in place. This is a zero-cost operation.
|
||||
///
|
||||
/// # SAFETY
|
||||
/// Caller must ensure this `Vec<u8>` comprises a valid UTF-8 string.
|
||||
//
|
||||
// `#[inline(always)]` because this is a no-op at runtime
|
||||
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
|
||||
#[inline(always)]
|
||||
pub unsafe fn from_utf8_unchecked(bytes: Vec<'alloc, u8>) -> String<'alloc> {
|
||||
// Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`,
|
||||
// and our inner `Vec` type is `allocator_api2::vec::Vec`.
|
||||
// SAFETY: Conversion is safe because both types store data in arena in same way.
|
||||
// Lifetime of returned `String` is same as lifetime of original `Vec<u8>`.
|
||||
let inner = ManuallyDrop::into_inner(bytes.0);
|
||||
let (ptr, len, capacity, bump) = inner.into_raw_parts_with_alloc();
|
||||
Self(BumpaloString::from_raw_parts_in(ptr, len, capacity, bump))
|
||||
}
|
||||
|
||||
/// Creates a new [`String`] from a length, capacity, and pointer.
|
||||
///
|
||||
/// # SAFETY
|
||||
///
|
||||
/// This is highly unsafe, due to the number of invariants that aren't checked:
|
||||
///
|
||||
/// * The memory at `ptr` needs to have been previously allocated by the same [`Allocator`].
|
||||
/// * `length` needs to be less than or equal to `capacity`.
|
||||
/// * `capacity` needs to be the correct value.
|
||||
///
|
||||
/// Violating these may cause problems like corrupting the allocator's internal data structures.
|
||||
///
|
||||
/// The ownership of `ptr` is effectively transferred to the `String` which may then deallocate,
|
||||
/// reallocate or change the contents of memory pointed to by the pointer at will. Ensure that
|
||||
/// nothing else uses the pointer after calling this function.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Basic usage:
|
||||
///
|
||||
/// ```
|
||||
/// use std::mem;
|
||||
/// use oxc_allocator::{Allocator, String};
|
||||
///
|
||||
/// let allocator = Allocator::default();
|
||||
///
|
||||
/// unsafe {
|
||||
/// let mut s = String::from_str_in("hello", &allocator);
|
||||
/// let ptr = s.as_mut_ptr();
|
||||
/// let len = s.len();
|
||||
/// let capacity = s.capacity();
|
||||
///
|
||||
/// mem::forget(s);
|
||||
///
|
||||
/// let s = String::from_raw_parts_in(ptr, len, capacity, &allocator);
|
||||
///
|
||||
/// assert_eq!(s, "hello");
|
||||
/// }
|
||||
/// ```
|
||||
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
|
||||
#[inline(always)]
|
||||
pub unsafe fn from_raw_parts_in(
|
||||
buf: *mut u8,
|
||||
length: usize,
|
||||
capacity: usize,
|
||||
allocator: &'alloc Allocator,
|
||||
) -> String<'alloc> {
|
||||
// SAFETY: Safety conditions of this method are the same as `BumpaloString`'s method
|
||||
Self(BumpaloString::from_raw_parts_in(buf, length, capacity, allocator))
|
||||
}
|
||||
|
||||
/// Convert this `String<'alloc>` into an `&'alloc str`. This is analogous to
|
||||
/// [`std::string::String::into_boxed_str`].
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use oxc_allocator::{Allocator, String};
|
||||
///
|
||||
/// let allocator = Allocator::default();
|
||||
///
|
||||
/// let s = String::from_str_in("foo", &allocator);
|
||||
/// assert_eq!(s.into_bump_str(), "foo");
|
||||
/// ```
|
||||
#[inline(always)]
|
||||
pub fn into_bump_str(self) -> &'alloc str {
|
||||
self.0.into_bump_str()
|
||||
}
|
||||
}
|
||||
|
||||
// Provide access to all `bumpalo::String`'s methods via deref
|
||||
impl<'alloc> Deref for String<'alloc> {
|
||||
type Target = BumpaloString<'alloc>;
|
||||
|
||||
#[inline]
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'alloc> DerefMut for String<'alloc> {
|
||||
#[inline]
|
||||
fn deref_mut(&mut self) -> &mut BumpaloString<'alloc> {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for String<'_> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &String) -> bool {
|
||||
PartialEq::eq(&self[..], &other[..])
|
||||
}
|
||||
}
|
||||
|
||||
// `impl_eq!` macro copied from `bumpalo`
|
||||
macro_rules! impl_eq {
|
||||
($lhs:ty, $rhs: ty) => {
|
||||
impl<'a, 'alloc> PartialEq<$rhs> for $lhs {
|
||||
#[inline]
|
||||
fn eq(&self, other: &$rhs) -> bool {
|
||||
PartialEq::eq(&self[..], &other[..])
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'alloc> PartialEq<$lhs> for $rhs {
|
||||
#[inline]
|
||||
fn eq(&self, other: &$lhs) -> bool {
|
||||
PartialEq::eq(&self[..], &other[..])
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_eq! { String<'alloc>, str }
|
||||
impl_eq! { String<'alloc>, &'a str }
|
||||
impl_eq! { std::borrow::Cow<'a, str>, String<'alloc> }
|
||||
impl_eq! { std::string::String, String<'alloc> }
|
||||
|
||||
impl Display for String<'_> {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
Display::fmt(self.as_str(), f)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for String<'_> {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
Debug::fmt(self.as_str(), f)
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for String<'_> {
|
||||
#[inline]
|
||||
fn hash<H: Hasher>(&self, hasher: &mut H) {
|
||||
self.as_str().hash(hasher);
|
||||
}
|
||||
}
|
||||
|
|
@ -19,7 +19,7 @@ use allocator_api2::vec::Vec as InnerVec;
|
|||
use bumpalo::Bump;
|
||||
#[cfg(any(feature = "serialize", test))]
|
||||
use serde::{ser::SerializeSeq, Serialize, Serializer};
|
||||
use simdutf8::basic::{from_utf8, Utf8Error};
|
||||
use simdutf8::basic::Utf8Error;
|
||||
|
||||
use crate::{Allocator, Box, String};
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ use crate::{Allocator, Box, String};
|
|||
/// Note: This is not a soundness issue, as Rust does not support relying on `drop`
|
||||
/// being called to guarantee soundness.
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub struct Vec<'alloc, T>(ManuallyDrop<InnerVec<T, &'alloc Bump>>);
|
||||
pub struct Vec<'alloc, T>(pub(crate) ManuallyDrop<InnerVec<T, &'alloc Bump>>);
|
||||
|
||||
/// SAFETY: Not actually safe, but for enabling `Send` for downstream crates.
|
||||
unsafe impl<T> Send for Vec<'_, T> {}
|
||||
|
|
@ -190,16 +190,12 @@ impl<'alloc, T> Vec<'alloc, T> {
|
|||
}
|
||||
|
||||
impl<'alloc> Vec<'alloc, u8> {
|
||||
/// Convert `Vec<u8>` into `String`.
|
||||
/// Convert `Vec<u8>` into [`String`].
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string.
|
||||
pub fn into_string(self) -> Result<String<'alloc>, Utf8Error> {
|
||||
// Check vec comprises a valid UTF-8 string.
|
||||
from_utf8(&self.0)?;
|
||||
// SAFETY: We just checked it's a valid UTF-8 string
|
||||
let s = unsafe { self.into_string_unchecked() };
|
||||
Ok(s)
|
||||
String::from_utf8(self)
|
||||
}
|
||||
|
||||
/// Convert `Vec<u8>` into [`String`], without checking bytes comprise a valid UTF-8 string.
|
||||
|
|
@ -211,13 +207,8 @@ impl<'alloc> Vec<'alloc, u8> {
|
|||
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
|
||||
#[inline(always)] // `#[inline(always)]` because this is a no-op at runtime
|
||||
pub unsafe fn into_string_unchecked(self) -> String<'alloc> {
|
||||
// Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`,
|
||||
// and our inner `Vec` type is `allocator_api2::vec::Vec`.
|
||||
// SAFETY: Conversion is safe because both types store data in arena in same way.
|
||||
// Lifetime of returned `String` is same as lifetime of original `Vec<u8>`.
|
||||
let inner = ManuallyDrop::into_inner(self.0);
|
||||
let (ptr, len, cap, bump) = inner.into_raw_parts_with_alloc();
|
||||
String::from_raw_parts_in(ptr, len, cap, bump)
|
||||
// SAFETY: Caller guarantees vec comprises a valid UTF-8 string.
|
||||
String::from_utf8_unchecked(self)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
use std::{borrow::Cow, mem};
|
||||
|
||||
use oxc_allocator::{Allocator, Box, FromIn, String, Vec};
|
||||
use oxc_allocator::{Allocator, Box, FromIn, Vec};
|
||||
use oxc_span::{Atom, Span, SPAN};
|
||||
use oxc_syntax::{number::NumberBase, operator::UnaryOperator, scope::ScopeId};
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ impl<'a> AstBuilder<'a> {
|
|||
/// in the heap.
|
||||
#[inline]
|
||||
pub fn str(self, value: &str) -> &'a str {
|
||||
String::from_str_in(value, self.allocator).into_bump_str()
|
||||
self.allocator.alloc_str(value)
|
||||
}
|
||||
|
||||
/// Allocate an [`Atom`] from a string slice.
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ macro_rules! text {
|
|||
#[macro_export]
|
||||
macro_rules! dynamic_text {
|
||||
($p:ident, $str:expr) => {{
|
||||
let s = oxc_allocator::String::from_str_in($str, $p.allocator).into_bump_str();
|
||||
let s = $p.allocator.alloc_str($str);
|
||||
$crate::ir::Doc::Str(s)
|
||||
}};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ impl<'alloc> FromIn<'alloc, &Atom<'alloc>> for Atom<'alloc> {
|
|||
|
||||
impl<'alloc> FromIn<'alloc, &str> for Atom<'alloc> {
|
||||
fn from_in(s: &str, allocator: &'alloc Allocator) -> Self {
|
||||
Self::from(oxc_allocator::String::from_str_in(s, allocator))
|
||||
Self::from(&*allocator.alloc_str(s))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue