mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 04:08:41 +00:00
feat(allocator): Vec<u8>::into_string (#8017)
Bumpalo has a method `String::from_utf8_unchecked` to covert a `Vec<u8>` to a `String`. But we can't use it because we use Bumpalo's `String` as our arena string type, but allocator_api2's `Vec` as our arena vec. Provide the same functionality, that works around this incompatibility. Also use the faster `simdutf8` for checking that `Vec` contains a valid UTF-8 string.
This commit is contained in:
parent
0deb9e6c2e
commit
75b775c055
3 changed files with 36 additions and 1 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -1452,6 +1452,7 @@ dependencies = [
|
|||
"bumpalo",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"simdutf8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ doctest = false
|
|||
[dependencies]
|
||||
allocator-api2 = { workspace = true }
|
||||
bumpalo = { workspace = true, features = ["allocator-api2", "collections"] }
|
||||
simdutf8 = { workspace = true }
|
||||
|
||||
serde = { workspace = true, optional = true }
|
||||
|
||||
|
|
|
|||
|
|
@ -15,8 +15,9 @@ use allocator_api2::vec;
|
|||
use bumpalo::Bump;
|
||||
#[cfg(any(feature = "serialize", test))]
|
||||
use serde::{ser::SerializeSeq, Serialize, Serializer};
|
||||
use simdutf8::basic::{from_utf8, Utf8Error};
|
||||
|
||||
use crate::{Allocator, Box};
|
||||
use crate::{Allocator, Box, String};
|
||||
|
||||
/// A `Vec` without [`Drop`], which stores its data in the arena allocator.
|
||||
///
|
||||
|
|
@ -180,6 +181,38 @@ impl<'alloc, T> Vec<'alloc, T> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'alloc> Vec<'alloc, u8> {
|
||||
/// Convert `Vec<u8>` into `String`.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string.
|
||||
pub fn into_string(self) -> Result<String<'alloc>, Utf8Error> {
|
||||
// Check vec comprises a valid UTF-8 string.
|
||||
from_utf8(&self.0)?;
|
||||
// SAFETY: We just checked it's a valid UTF-8 string
|
||||
let s = unsafe { self.into_string_unchecked() };
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
/// Convert `Vec<u8>` into [`String`], without checking bytes comprise a valid UTF-8 string.
|
||||
///
|
||||
/// Does not copy the contents of the `Vec`, converts in place. This is a zero-cost operation.
|
||||
///
|
||||
/// # SAFETY
|
||||
/// Caller must ensure this `Vec<u8>` comprises a valid UTF-8 string.
|
||||
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
|
||||
#[inline] // `#[inline]` because this is a no-op at runtime
|
||||
pub unsafe fn into_string_unchecked(self) -> String<'alloc> {
|
||||
// Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`,
|
||||
// and our inner `Vec` type is `allocator_api2::vec::Vec`.
|
||||
// SAFETY: Conversion is safe because both types store data in arena in same way.
|
||||
// Lifetime of returned `String` is same as lifetime of original `Vec<u8>`.
|
||||
let inner = ManuallyDrop::into_inner(self.0);
|
||||
let (ptr, len, cap, bump) = inner.into_raw_parts_with_alloc();
|
||||
String::from_raw_parts_in(ptr, len, cap, bump)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'alloc, T> ops::Deref for Vec<'alloc, T> {
|
||||
type Target = vec::Vec<T, &'alloc Bump>;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue