feat(span): fix memory leak by implementing inlineable string for oxc_allocator (#2294)

closes #1803

This string is currently unsafe, but I want to get miri working before
introducing more changes.

I want to make a progress from memory leak to unsafe then to safety.
It's harder to do the steps in one go.
This commit is contained in:
Boshen 2024-02-04 19:28:23 +08:00 committed by GitHub
parent 1822cfe18d
commit 6002560fa1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 139 additions and 81 deletions

View file

@ -2,6 +2,17 @@ name: Miri
on:
workflow_dispatch:
pull_request:
types: [opened, synchronize]
paths:
- 'crates/oxc_parser/**'
- '.github/workflows/miri.yml'
push:
branches:
- main
paths:
- 'crates/oxc_parser/**'
- '.github/workflows/miri.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
@ -15,6 +26,12 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Install Rust
uses: ./.github/actions/rustup
with:
shared-key: miri
save-cache: ${{ github.ref_name == 'main' }}
- name: Install Miri
run: |
rustup toolchain install nightly --component miri

40
Cargo.lock generated
View file

@ -207,15 +207,6 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "castaway"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc"
dependencies = [
"rustversion",
]
[[package]]
name = "cc"
version = "1.0.83"
@ -315,20 +306,6 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "compact_str"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f"
dependencies = [
"castaway",
"cfg-if",
"itoa",
"ryu",
"serde",
"static_assertions",
]
[[package]]
name = "console"
version = "0.15.8"
@ -862,6 +839,15 @@ dependencies = [
"serde",
]
[[package]]
name = "inlinable_string"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
dependencies = [
"serde",
]
[[package]]
name = "insta"
version = "1.34.0"
@ -1724,7 +1710,7 @@ dependencies = [
name = "oxc_span"
version = "0.6.0"
dependencies = [
"compact_str",
"inlinable_string",
"miette",
"serde",
"tsify",
@ -2256,12 +2242,6 @@ dependencies = [
"untrusted",
]
[[package]]
name = "rustversion"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
[[package]]
name = "ryu"
version = "1.0.16"

View file

@ -88,7 +88,6 @@ assert-unchecked = { version = "0.1.2" }
bpaf = { version = "0.9.9" }
bitflags = { version = "2.4.2" }
bumpalo = { version = "3.14.0" }
compact_str = { version = "0.7.1" }
convert_case = { version = "0.6.0" }
criterion = { version = "0.5.1", default-features = false }
crossbeam-channel = { version = "0.5.11" }

View file

@ -283,11 +283,10 @@ impl ModuleRecordBuilder {
};
let export_entry = ExportEntry {
export_name: ExportExportName::Default(exported_name.span()),
local_name: id
.as_ref()
.map_or(ExportLocalName::Default(exported_name.span()), |ident| {
ExportLocalName::Name(NameSpan::new(ident.name.clone(), ident.span))
}),
local_name: id.as_ref().map_or_else(
|| ExportLocalName::Default(exported_name.span()),
|ident| ExportLocalName::Name(NameSpan::new(ident.name.clone(), ident.span)),
),
span: decl.declaration.span(),
..ExportEntry::default()
};

View file

@ -18,16 +18,16 @@ workspace = true
[lib]
doctest = false
[features]
default = []
serde = ["dep:serde", "compact_str/serde"]
wasm = ["dep:tsify", "dep:wasm-bindgen"]
[dependencies]
miette = { workspace = true }
compact_str = { workspace = true }
miette = { workspace = true }
inlinable_string = "0.1.15"
tsify = { workspace = true, optional = true }
wasm-bindgen = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"], optional = true }
[features]
default = []
serde = ["dep:serde", "inlinable_string/serde"]
wasm = ["dep:tsify", "dep:wasm-bindgen"]

View file

@ -1,17 +1,15 @@
use std::{
borrow::{Borrow, Cow},
fmt,
fmt, hash,
ops::Deref,
};
use compact_str::CompactString;
#[cfg(feature = "serde")]
use serde::Serialize;
use serde::{Serialize, Serializer};
/// Newtype for [`CompactString`]
#[derive(Clone, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize))]
pub struct Atom(CompactString);
use inlinable_string::inline_string::{InlineString, INLINE_STRING_CAPACITY};
const BASE54_CHARS: &[u8; 64] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789";
#[cfg_attr(
all(feature = "serde", feature = "wasm"),
@ -22,16 +20,57 @@ const TS_APPEND_CONTENT: &'static str = r#"
export type Atom = string;
"#;
const BASE54_CHARS: &[u8; 64] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789";
/// An inlinable string for oxc_allocator.
///
/// SAFETY: It is unsafe to use this string after the allocator is dropped.
///
#[derive(Clone, Eq, Hash)]
pub struct Atom(AtomImpl);
/// Immutable Inlinable String
///
/// https://github.com/fitzgen/inlinable_string/blob/master/src/lib.rs
#[derive(Clone, Eq, PartialEq)]
enum AtomImpl {
/// A arena heap-allocated string.
Arena(&'static str),
/// A heap-allocated string.
Heap(Box<str>),
/// A small string stored inline.
Inline(InlineString),
}
#[cfg(feature = "serde")]
impl Serialize for Atom {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.as_str())
}
}
impl Atom {
pub const fn new_inline(s: &str) -> Self {
Self(CompactString::new_inline(s))
pub fn new_inline(s: &str) -> Self {
Self(AtomImpl::Inline(InlineString::from(s)))
}
#[inline]
pub fn as_str(&self) -> &str {
self.0.as_str()
match &self.0 {
AtomImpl::Arena(s) => s,
AtomImpl::Heap(s) => s,
AtomImpl::Inline(s) => s.as_ref(),
}
}
#[inline]
pub fn into_string(self) -> String {
match self.0 {
AtomImpl::Arena(s) => String::from(s),
AtomImpl::Heap(s) => s.to_string(),
AtomImpl::Inline(s) => s.to_string(),
}
}
/// Get the shortest mangled name for a given n.
@ -41,7 +80,7 @@ impl Atom {
// Base 54 at first because these are the usable first characters in JavaScript identifiers
// <https://tc39.es/ecma262/#prod-IdentifierStart>
let base = 54usize;
let mut ret = CompactString::default();
let mut ret = String::new();
ret.push(BASE54_CHARS[num % base] as char);
num /= base;
// Base 64 for the rest because after the first character we can also use 0-9 too
@ -52,7 +91,38 @@ impl Atom {
ret.push(BASE54_CHARS[num % base] as char);
num /= base;
}
Self(ret)
Self(AtomImpl::Heap(ret.into_boxed_str()))
}
}
impl<'a> From<&'a str> for Atom {
fn from(s: &'a str) -> Self {
if s.len() <= INLINE_STRING_CAPACITY {
Self(AtomImpl::Inline(InlineString::from(s)))
} else {
// SAFETY: It is unsafe to use this string after the allocator is dropped.
Self(AtomImpl::Arena(unsafe { std::mem::transmute(s) }))
}
}
}
impl From<String> for Atom {
fn from(s: String) -> Self {
if s.len() <= INLINE_STRING_CAPACITY {
Self(AtomImpl::Inline(InlineString::from(s.as_str())))
} else {
Self(AtomImpl::Heap(s.into_boxed_str()))
}
}
}
impl From<Cow<'_, str>> for Atom {
fn from(s: Cow<'_, str>) -> Self {
if s.len() <= INLINE_STRING_CAPACITY {
Self(AtomImpl::Inline(InlineString::from(s.borrow())))
} else {
Self(AtomImpl::Heap(s.into()))
}
}
}
@ -60,62 +130,55 @@ impl Deref for Atom {
type Target = str;
fn deref(&self) -> &Self::Target {
self.0.as_str()
}
}
impl<'a> From<&'a str> for Atom {
fn from(s: &'a str) -> Self {
Self(s.into())
}
}
impl From<String> for Atom {
fn from(s: String) -> Self {
Self(s.into())
}
}
impl From<Cow<'_, str>> for Atom {
fn from(s: Cow<'_, str>) -> Self {
Self(s.into())
self.as_str()
}
}
impl AsRef<str> for Atom {
#[inline]
fn as_ref(&self) -> &str {
self.0.as_str()
self.as_str()
}
}
impl Borrow<str> for Atom {
#[inline]
fn borrow(&self) -> &str {
self.0.as_str()
self.as_str()
}
}
impl<T: AsRef<str>> PartialEq<T> for Atom {
fn eq(&self, other: &T) -> bool {
self.0.as_str() == other.as_ref()
self.as_str() == other.as_ref()
}
}
impl PartialEq<Atom> for &str {
fn eq(&self, other: &Atom) -> bool {
*self == other.0.as_str()
*self == other.as_str()
}
}
impl hash::Hash for AtomImpl {
#[inline]
fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
match self {
Self::Arena(s) => s.hash(hasher),
Self::Heap(s) => s.hash(hasher),
Self::Inline(s) => s.hash(hasher),
}
}
}
impl fmt::Debug for Atom {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.0.as_str(), f)
fmt::Debug::fmt(self.as_str(), f)
}
}
impl fmt::Display for Atom {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.0.as_str(), f)
fmt::Display::fmt(self.as_str(), f)
}
}