feat(span): fix memory leak by implementing inlineable string for oxc_allocator (#2294)

closes #1803

This string is currently unsafe, but I want to get miri working before
introducing more changes.

I want to make a progress from memory leak to unsafe then to safety.
It's harder to do the steps in one go.
This commit is contained in:
Boshen 2024-02-04 19:28:23 +08:00 committed by GitHub
parent 1822cfe18d
commit 6002560fa1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 139 additions and 81 deletions

View file

@ -2,6 +2,17 @@ name: Miri
on: on:
workflow_dispatch: workflow_dispatch:
pull_request:
types: [opened, synchronize]
paths:
- 'crates/oxc_parser/**'
- '.github/workflows/miri.yml'
push:
branches:
- main
paths:
- 'crates/oxc_parser/**'
- '.github/workflows/miri.yml'
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
@ -15,6 +26,12 @@ jobs:
- name: Checkout - name: Checkout
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Install Rust
uses: ./.github/actions/rustup
with:
shared-key: miri
save-cache: ${{ github.ref_name == 'main' }}
- name: Install Miri - name: Install Miri
run: | run: |
rustup toolchain install nightly --component miri rustup toolchain install nightly --component miri

40
Cargo.lock generated
View file

@ -207,15 +207,6 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "castaway"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc"
dependencies = [
"rustversion",
]
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.83" version = "1.0.83"
@ -315,20 +306,6 @@ dependencies = [
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
[[package]]
name = "compact_str"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f"
dependencies = [
"castaway",
"cfg-if",
"itoa",
"ryu",
"serde",
"static_assertions",
]
[[package]] [[package]]
name = "console" name = "console"
version = "0.15.8" version = "0.15.8"
@ -862,6 +839,15 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "inlinable_string"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "insta" name = "insta"
version = "1.34.0" version = "1.34.0"
@ -1724,7 +1710,7 @@ dependencies = [
name = "oxc_span" name = "oxc_span"
version = "0.6.0" version = "0.6.0"
dependencies = [ dependencies = [
"compact_str", "inlinable_string",
"miette", "miette",
"serde", "serde",
"tsify", "tsify",
@ -2256,12 +2242,6 @@ dependencies = [
"untrusted", "untrusted",
] ]
[[package]]
name = "rustversion"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.16" version = "1.0.16"

View file

@ -88,7 +88,6 @@ assert-unchecked = { version = "0.1.2" }
bpaf = { version = "0.9.9" } bpaf = { version = "0.9.9" }
bitflags = { version = "2.4.2" } bitflags = { version = "2.4.2" }
bumpalo = { version = "3.14.0" } bumpalo = { version = "3.14.0" }
compact_str = { version = "0.7.1" }
convert_case = { version = "0.6.0" } convert_case = { version = "0.6.0" }
criterion = { version = "0.5.1", default-features = false } criterion = { version = "0.5.1", default-features = false }
crossbeam-channel = { version = "0.5.11" } crossbeam-channel = { version = "0.5.11" }

View file

@ -283,11 +283,10 @@ impl ModuleRecordBuilder {
}; };
let export_entry = ExportEntry { let export_entry = ExportEntry {
export_name: ExportExportName::Default(exported_name.span()), export_name: ExportExportName::Default(exported_name.span()),
local_name: id local_name: id.as_ref().map_or_else(
.as_ref() || ExportLocalName::Default(exported_name.span()),
.map_or(ExportLocalName::Default(exported_name.span()), |ident| { |ident| ExportLocalName::Name(NameSpan::new(ident.name.clone(), ident.span)),
ExportLocalName::Name(NameSpan::new(ident.name.clone(), ident.span)) ),
}),
span: decl.declaration.span(), span: decl.declaration.span(),
..ExportEntry::default() ..ExportEntry::default()
}; };

View file

@ -18,16 +18,16 @@ workspace = true
[lib] [lib]
doctest = false doctest = false
[features]
default = []
serde = ["dep:serde", "compact_str/serde"]
wasm = ["dep:tsify", "dep:wasm-bindgen"]
[dependencies] [dependencies]
miette = { workspace = true } miette = { workspace = true }
compact_str = { workspace = true } inlinable_string = "0.1.15"
tsify = { workspace = true, optional = true } tsify = { workspace = true, optional = true }
wasm-bindgen = { workspace = true, optional = true } wasm-bindgen = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"], optional = true } serde = { workspace = true, features = ["derive"], optional = true }
[features]
default = []
serde = ["dep:serde", "inlinable_string/serde"]
wasm = ["dep:tsify", "dep:wasm-bindgen"]

View file

@ -1,17 +1,15 @@
use std::{ use std::{
borrow::{Borrow, Cow}, borrow::{Borrow, Cow},
fmt, fmt, hash,
ops::Deref, ops::Deref,
}; };
use compact_str::CompactString;
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
use serde::Serialize; use serde::{Serialize, Serializer};
/// Newtype for [`CompactString`] use inlinable_string::inline_string::{InlineString, INLINE_STRING_CAPACITY};
#[derive(Clone, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize))] const BASE54_CHARS: &[u8; 64] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789";
pub struct Atom(CompactString);
#[cfg_attr( #[cfg_attr(
all(feature = "serde", feature = "wasm"), all(feature = "serde", feature = "wasm"),
@ -22,16 +20,57 @@ const TS_APPEND_CONTENT: &'static str = r#"
export type Atom = string; export type Atom = string;
"#; "#;
const BASE54_CHARS: &[u8; 64] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789"; /// An inlinable string for oxc_allocator.
///
/// SAFETY: It is unsafe to use this string after the allocator is dropped.
///
#[derive(Clone, Eq, Hash)]
pub struct Atom(AtomImpl);
/// Immutable Inlinable String
///
/// https://github.com/fitzgen/inlinable_string/blob/master/src/lib.rs
#[derive(Clone, Eq, PartialEq)]
enum AtomImpl {
/// A arena heap-allocated string.
Arena(&'static str),
/// A heap-allocated string.
Heap(Box<str>),
/// A small string stored inline.
Inline(InlineString),
}
#[cfg(feature = "serde")]
impl Serialize for Atom {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.as_str())
}
}
impl Atom { impl Atom {
pub const fn new_inline(s: &str) -> Self { pub fn new_inline(s: &str) -> Self {
Self(CompactString::new_inline(s)) Self(AtomImpl::Inline(InlineString::from(s)))
} }
#[inline] #[inline]
pub fn as_str(&self) -> &str { pub fn as_str(&self) -> &str {
self.0.as_str() match &self.0 {
AtomImpl::Arena(s) => s,
AtomImpl::Heap(s) => s,
AtomImpl::Inline(s) => s.as_ref(),
}
}
#[inline]
pub fn into_string(self) -> String {
match self.0 {
AtomImpl::Arena(s) => String::from(s),
AtomImpl::Heap(s) => s.to_string(),
AtomImpl::Inline(s) => s.to_string(),
}
} }
/// Get the shortest mangled name for a given n. /// Get the shortest mangled name for a given n.
@ -41,7 +80,7 @@ impl Atom {
// Base 54 at first because these are the usable first characters in JavaScript identifiers // Base 54 at first because these are the usable first characters in JavaScript identifiers
// <https://tc39.es/ecma262/#prod-IdentifierStart> // <https://tc39.es/ecma262/#prod-IdentifierStart>
let base = 54usize; let base = 54usize;
let mut ret = CompactString::default(); let mut ret = String::new();
ret.push(BASE54_CHARS[num % base] as char); ret.push(BASE54_CHARS[num % base] as char);
num /= base; num /= base;
// Base 64 for the rest because after the first character we can also use 0-9 too // Base 64 for the rest because after the first character we can also use 0-9 too
@ -52,7 +91,38 @@ impl Atom {
ret.push(BASE54_CHARS[num % base] as char); ret.push(BASE54_CHARS[num % base] as char);
num /= base; num /= base;
} }
Self(ret) Self(AtomImpl::Heap(ret.into_boxed_str()))
}
}
impl<'a> From<&'a str> for Atom {
fn from(s: &'a str) -> Self {
if s.len() <= INLINE_STRING_CAPACITY {
Self(AtomImpl::Inline(InlineString::from(s)))
} else {
// SAFETY: It is unsafe to use this string after the allocator is dropped.
Self(AtomImpl::Arena(unsafe { std::mem::transmute(s) }))
}
}
}
impl From<String> for Atom {
fn from(s: String) -> Self {
if s.len() <= INLINE_STRING_CAPACITY {
Self(AtomImpl::Inline(InlineString::from(s.as_str())))
} else {
Self(AtomImpl::Heap(s.into_boxed_str()))
}
}
}
impl From<Cow<'_, str>> for Atom {
fn from(s: Cow<'_, str>) -> Self {
if s.len() <= INLINE_STRING_CAPACITY {
Self(AtomImpl::Inline(InlineString::from(s.borrow())))
} else {
Self(AtomImpl::Heap(s.into()))
}
} }
} }
@ -60,62 +130,55 @@ impl Deref for Atom {
type Target = str; type Target = str;
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
self.0.as_str() self.as_str()
}
}
impl<'a> From<&'a str> for Atom {
fn from(s: &'a str) -> Self {
Self(s.into())
}
}
impl From<String> for Atom {
fn from(s: String) -> Self {
Self(s.into())
}
}
impl From<Cow<'_, str>> for Atom {
fn from(s: Cow<'_, str>) -> Self {
Self(s.into())
} }
} }
impl AsRef<str> for Atom { impl AsRef<str> for Atom {
#[inline] #[inline]
fn as_ref(&self) -> &str { fn as_ref(&self) -> &str {
self.0.as_str() self.as_str()
} }
} }
impl Borrow<str> for Atom { impl Borrow<str> for Atom {
#[inline] #[inline]
fn borrow(&self) -> &str { fn borrow(&self) -> &str {
self.0.as_str() self.as_str()
} }
} }
impl<T: AsRef<str>> PartialEq<T> for Atom { impl<T: AsRef<str>> PartialEq<T> for Atom {
fn eq(&self, other: &T) -> bool { fn eq(&self, other: &T) -> bool {
self.0.as_str() == other.as_ref() self.as_str() == other.as_ref()
} }
} }
impl PartialEq<Atom> for &str { impl PartialEq<Atom> for &str {
fn eq(&self, other: &Atom) -> bool { fn eq(&self, other: &Atom) -> bool {
*self == other.0.as_str() *self == other.as_str()
}
}
impl hash::Hash for AtomImpl {
#[inline]
fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
match self {
Self::Arena(s) => s.hash(hasher),
Self::Heap(s) => s.hash(hasher),
Self::Inline(s) => s.hash(hasher),
}
} }
} }
impl fmt::Debug for Atom { impl fmt::Debug for Atom {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.0.as_str(), f) fmt::Debug::fmt(self.as_str(), f)
} }
} }
impl fmt::Display for Atom { impl fmt::Display for Atom {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.0.as_str(), f) fmt::Display::fmt(self.as_str(), f)
} }
} }