perf(transformer): introduce NonEmptyStack (#6092)

`NonEmptyStack` is a stack structure, optimized for fast push/pop and reading/writing the last entry on the stack. By always having 1 at least one entry, `last` and `last_mut` can be made branchless and extremely cheap (only 1 CPU op).

Use `NonEmptyStack` as one of the backing stores in `SparseStack`.
This commit is contained in:
overlookmotel 2024-09-27 04:28:53 +00:00
parent 1399d2ce1f
commit ad4ef31f9f
6 changed files with 773 additions and 37 deletions

1
Cargo.lock generated
View file

@ -1971,6 +1971,7 @@ dependencies = [
name = "oxc_transformer"
version = "0.30.1"
dependencies = [
"assert-unchecked",
"base64",
"dashmap 6.0.1",
"indexmap",

View file

@ -30,6 +30,7 @@ oxc_span = { workspace = true }
oxc_syntax = { workspace = true, features = ["to_js_string"] }
oxc_traverse = { workspace = true }
assert-unchecked = { workspace = true }
base64 = { workspace = true }
dashmap = { workspace = true }
indexmap = { workspace = true }

View file

@ -0,0 +1,143 @@
use std::mem::{align_of, size_of};
/// Trait for defining maximum and default capacity of stacks.
///
/// `MAX_CAPACITY` and `MAX_CAPACITY_BYTES` being calculated correctly is required for soundness
/// of stack types.
pub trait StackCapacity {
/// Type that the stack contains
type Item: Sized;
/// Maximum capacity of stack.
///
/// This is guaranteed to be a legal size for a stack of `Item`s, without exceeding Rust's
/// allocation size limits.
///
/// From [`std::alloc::Layout`]'s docs:
/// > size, when rounded up to the nearest multiple of align, must not overflow `isize`
/// > (i.e., the rounded value must be less than or equal to `isize::MAX`).
const MAX_CAPACITY: usize = {
// This assertion is not needed as next line will cause a compile failure anyway
// if `size_of::<Self::Item>() == 0`, due to division by zero.
// But keep it anyway as soundness depends on it.
assert!(size_of::<Self::Item>() > 0, "Zero sized types are not supported");
// As it's always true that `size_of::<T>() >= align_of::<T>()` and `/` rounds down,
// this fulfills `Layout`'s alignment requirement
let max_capacity = isize::MAX as usize / size_of::<Self::Item>();
assert!(max_capacity > 0);
max_capacity
};
/// Maximum capacity of stack in bytes
const MAX_CAPACITY_BYTES: usize = {
let capacity_bytes = Self::MAX_CAPACITY * size_of::<Self::Item>();
// Just double-checking `Layout`'s alignment requirement is fulfilled
assert!(capacity_bytes <= isize::MAX as usize + 1 - align_of::<Self::Item>());
capacity_bytes
};
/// Default capacity of stack.
///
/// Same defaults as [`std::vec::Vec`] uses.
const DEFAULT_CAPACITY: usize = {
// It's impossible for this to exceed `MAX_CAPACITY` because `size_of::<T>() >= align_of::<T>()`
match size_of::<Self::Item>() {
1 => 8,
size if size <= 1024 => 4,
_ => 1,
}
};
/// Default capacity of stack in bytes
const DEFAULT_CAPACITY_BYTES: usize = Self::DEFAULT_CAPACITY * size_of::<Self::Item>();
}
#[cfg(test)]
#[expect(clippy::assertions_on_constants)]
mod tests {
use super::*;
const ISIZE_MAX: usize = isize::MAX as usize;
const ISIZE_MAX_PLUS_ONE: usize = ISIZE_MAX + 1;
#[test]
fn bool() {
struct TestStack;
impl StackCapacity for TestStack {
type Item = bool;
}
assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX);
assert_eq!(TestStack::MAX_CAPACITY_BYTES, ISIZE_MAX);
assert_eq!(TestStack::DEFAULT_CAPACITY, 8);
assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 8);
}
#[test]
fn u64() {
struct TestStack;
impl StackCapacity for TestStack {
type Item = u64;
}
assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX / 8);
assert_eq!(TestStack::MAX_CAPACITY_BYTES, TestStack::MAX_CAPACITY * 8);
assert!(TestStack::MAX_CAPACITY_BYTES <= ISIZE_MAX_PLUS_ONE - 8);
assert_eq!(TestStack::DEFAULT_CAPACITY, 4);
assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 32);
}
#[test]
fn u32_pair() {
struct TestStack;
impl StackCapacity for TestStack {
type Item = [u32; 2];
}
assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX / 8);
assert_eq!(TestStack::MAX_CAPACITY_BYTES, TestStack::MAX_CAPACITY * 8);
assert!(TestStack::MAX_CAPACITY_BYTES <= ISIZE_MAX_PLUS_ONE - 4);
assert_eq!(TestStack::DEFAULT_CAPACITY, 4);
assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 32);
}
#[test]
fn u32_triple() {
struct TestStack;
impl StackCapacity for TestStack {
type Item = [u32; 3];
}
assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX / 12);
assert_eq!(TestStack::MAX_CAPACITY_BYTES, TestStack::MAX_CAPACITY * 12);
assert!(TestStack::MAX_CAPACITY_BYTES <= ISIZE_MAX_PLUS_ONE - 4);
assert_eq!(TestStack::DEFAULT_CAPACITY, 4);
assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 48);
}
#[test]
fn large_low_alignment() {
struct TestStack;
impl StackCapacity for TestStack {
type Item = [u16; 1000];
}
assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX / 2000);
assert_eq!(TestStack::MAX_CAPACITY_BYTES, TestStack::MAX_CAPACITY * 2000);
assert!(TestStack::MAX_CAPACITY_BYTES <= ISIZE_MAX_PLUS_ONE - 2);
assert_eq!(TestStack::DEFAULT_CAPACITY, 1);
assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 2000);
}
#[test]
fn large_high_alignment() {
#[repr(align(4096))]
#[expect(dead_code)]
struct TestItem(u8);
struct TestStack;
impl StackCapacity for TestStack {
type Item = TestItem;
}
assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX / 4096);
assert_eq!(TestStack::MAX_CAPACITY_BYTES, TestStack::MAX_CAPACITY * 4096);
assert!(TestStack::MAX_CAPACITY_BYTES <= ISIZE_MAX_PLUS_ONE - 4096);
assert_eq!(TestStack::DEFAULT_CAPACITY, 1);
assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 4096);
}
}

View file

@ -1,3 +1,7 @@
mod capacity;
mod non_empty;
mod sparse;
use capacity::StackCapacity;
pub use non_empty::NonEmptyStack;
pub use sparse::SparseStack;

View file

@ -0,0 +1,615 @@
#![expect(clippy::unnecessary_safety_comment)]
use std::{
alloc::{self, Layout},
mem::{align_of, size_of},
ptr::{self, NonNull},
};
use assert_unchecked::assert_unchecked;
use super::StackCapacity;
/// A stack which can never be empty.
///
/// `NonEmptyStack` is created initially with 1 entry, and `pop` does not allow removing it
/// (though that initial entry can be mutated with `last_mut`).
///
/// The fact that the stack is never empty makes all operations except `pop` infallible.
/// `last` and `last_mut` are branchless.
///
/// The trade-off is that you cannot create a `NonEmptyStack` without allocating (unlike `Vec`).
///
/// To simplify implementation, zero size types are not supported (e.g. `NonEmptyStack<()>`).
///
/// ## Design
/// Designed for maximally efficient `push`, `pop`, and reading/writing the last value on stack.
///
/// The alternative would likely be to use a `Vec`. But `Vec` is optimized for indexing into at
/// arbitrary positions, not for `push` and `pop`. `Vec` stores `len` and `capacity` as integers,
/// so requires pointer maths on every operation: `let entry_ptr = base_ptr + index * size_of::<T>();`.
///
/// In comparison, `NonEmptyStack` contains a `cursor` pointer, which always points to last entry
/// on stack, so it can be read/written with a minimum of operations.
///
/// This design is similar to `std`'s slice iterator.
///
/// Comparison to `Vec`:
/// * `last` and `last_mut` are 1 instruction, instead of `Vec`'s 4.
/// * `pop` is 1 instruction shorter than `Vec`'s equivalent.
/// * `push` is 1 instruction shorter than `Vec`'s equivalent, and uses 1 less register.
///
/// ### Possible alternative designs
/// 1. `cursor` could point to *after* last entry, rather than *to* it. This has advantage that `pop`
/// uses 1 less register, but disadvantage that `last` and `last_mut` are 2 instructions, not 1.
/// <https://godbolt.org/z/xnx7YP5de>
///
/// 2. Stack could grow downwards, like `bumpalo` allocator does. This would probably make `pop` use
/// 1 less register, but at the cost that the stack can never grow in place, which would incur more
/// memory copies when the stack grows.
pub struct NonEmptyStack<T> {
/// Pointer to last entry on stack.
/// Points *to* last entry, not *after* last entry.
cursor: NonNull<T>,
/// Pointer to start of allocation (first entry)
start: NonNull<T>,
/// Pointer to end of allocation
end: NonNull<T>,
}
impl<T> StackCapacity for NonEmptyStack<T> {
type Item = T;
}
impl<T> NonEmptyStack<T> {
/// Maximum capacity.
///
/// Effectively unlimited on 64-bit systems.
pub const MAX_CAPACITY: usize = <Self as StackCapacity>::MAX_CAPACITY;
/// Create new [`NonEmptyStack`] with default pre-allocated capacity, and initial value `initial_value`.
///
/// # Panics
/// Panics if `T` is a zero-sized type.
#[inline]
pub fn new(initial_value: T) -> Self {
// SAFETY: `DEFAULT_CAPACITY_BYTES` satisfies requirements
unsafe {
Self::new_with_capacity_bytes_unchecked(Self::DEFAULT_CAPACITY_BYTES, initial_value)
}
}
/// Create new [`NonEmptyStack`] with pre-allocated capacity for `capacity` entries,
/// and initial value `initial_value`.
///
/// `capacity` cannot be 0.
///
/// # Panics
/// Panics if any of these requirements are not satisfied:
/// * `T` must not be a zero-sized type.
/// * `capacity` must not be 0.
/// * `capacity` must not exceed [`Self::MAX_CAPACITY`].
#[inline]
#[cfg_attr(not(test), expect(dead_code))]
pub fn with_capacity(capacity: usize, initial_value: T) -> Self {
assert!(capacity > 0, "`capacity` cannot be zero");
assert!(capacity <= Self::MAX_CAPACITY, "`capacity` must not exceed `Self::MAX_CAPACITY`");
// SAFETY: Assertions above ensure `capacity` satisfies requirements
unsafe { Self::with_capacity_unchecked(capacity, initial_value) }
}
/// Create new [`NonEmptyStack`] with pre-allocated capacity for `capacity` entries,
/// and initial value `initial_value`, without checks.
///
/// `capacity` cannot be 0.
///
/// # Panics
/// Panics if `T` is a zero-sized type.
///
/// # SAFETY
/// * `capacity` must not be 0.
/// * `capacity` must not exceed [`Self::MAX_CAPACITY`].
#[inline]
pub unsafe fn with_capacity_unchecked(capacity: usize, initial_value: T) -> Self {
debug_assert!(capacity > 0);
debug_assert!(capacity <= Self::MAX_CAPACITY);
// Cannot overflow if `capacity <= MAX_CAPACITY`
let capacity_bytes = capacity * size_of::<T>();
// SAFETY: Safety invariants which caller must satisfy guarantee that `capacity_bytes`
// satisfies requirements
Self::new_with_capacity_bytes_unchecked(capacity_bytes, initial_value)
}
/// Create new [`NonEmptyStack`] with provided capacity in bytes, and initial value `initial_value`,
/// without checks.
///
/// # Panics
/// Panics if `T` is a zero-sized type.
///
/// # SAFETY
/// * `capacity_bytes` must not be 0.
/// * `capacity_bytes` must be a multiple of `mem::size_of::<T>()`.
/// * `capacity_bytes` must not exceed [`Self::MAX_CAPACITY_BYTES`].
#[inline]
unsafe fn new_with_capacity_bytes_unchecked(capacity_bytes: usize, initial_value: T) -> Self {
// ZSTs are not supported for simplicity
assert!(size_of::<T>() > 0, "Zero sized types are not supported");
// SAFETY: Caller guarantees `capacity_bytes` satisfies requirements
let layout = Self::layout_for(capacity_bytes);
let ptr = alloc::alloc(layout);
if ptr.is_null() {
alloc::handle_alloc_error(layout);
}
// `layout_for` produces a layout with `T`'s alignment, so `ptr` is aligned for `T`
let ptr = ptr.cast::<T>();
// SAFETY: We checked `ptr` is non-null
let start = NonNull::new_unchecked(ptr);
// SAFETY: We allocated `capacity_bytes` bytes, so `end` is end of allocation
let end = NonNull::new_unchecked(ptr.byte_add(capacity_bytes));
// Write initial value to start of allocation.
// SAFETY: Allocation was created with alignment of `T`, and with capacity for at least 1 entry,
// so `start` is valid for writing a `T`.
start.as_ptr().write(initial_value);
// `cursor` is positioned at start i.e. pointing at initial value
Self { cursor: start, start, end }
}
/// Get layout for allocation of `capacity_bytes` bytes.
///
/// # SAFETY
/// * `capacity_bytes` must not be 0.
/// * `capacity_bytes` must be a multiple of `mem::size_of::<T>()`.
/// * `capacity_bytes` must not exceed [`Self::MAX_CAPACITY_BYTES`].
#[inline]
unsafe fn layout_for(capacity_bytes: usize) -> Layout {
// `capacity_bytes` must not be 0 because stack can never be empty.
debug_assert!(capacity_bytes > 0);
// `capacity_bytes` must be a multiple of `size_of::<T>()` so that `new_cursor == self.end`
// check in `push` accurately detects when full to capacity
debug_assert!(capacity_bytes % size_of::<T>() == 0);
// `capacity_bytes` must not exceed `Self::MAX_CAPACITY_BYTES` to prevent creating an allocation
// of illegal size
debug_assert!(capacity_bytes <= Self::MAX_CAPACITY_BYTES);
// SAFETY: `align_of::<T>()` trivially satisfies alignment requirements.
// Caller guarantees `capacity_bytes <= MAX_CAPACITY_BYTES`.
// `MAX_CAPACITY_BYTES` takes into account the rounding-up by alignment requirement.
Layout::from_size_align_unchecked(capacity_bytes, align_of::<T>())
}
/// Get reference to last value on stack.
#[inline]
pub fn last(&self) -> &T {
// SAFETY: All methods ensure `self.cursor` is always in bounds, is aligned for `T`,
// and points to a valid initialized `T`
unsafe { self.cursor.as_ref() }
}
/// Get mutable reference to last value on stack.
#[inline]
pub fn last_mut(&mut self) -> &mut T {
// SAFETY: All methods ensure `self.cursor` is always in bounds, is aligned for `T`,
// and points to a valid initialized `T`
unsafe { self.cursor.as_mut() }
}
/// Push value to stack.
///
/// # Panics
/// Panics if stack is already filled to maximum capacity.
#[inline]
pub fn push(&mut self, value: T) {
// SAFETY: Stack is never empty and `self.cursor` is always less than `self.end`, which is end
// of allocation. So advancing by a `T` cannot be out of bounds.
// The distance between `self.cursor` and `self.end` is always a multiple of `size_of::<T>()`,
// so `==` check is sufficient to detect when full to capacity.
let new_cursor = unsafe { NonNull::new_unchecked(self.cursor.as_ptr().add(1)) };
if new_cursor == self.end {
// Needs to grow
// SAFETY: Stack is full to capacity
unsafe { self.push_slow(value) };
} else {
// Capacity for at least 1 more entry
self.cursor = new_cursor;
// SAFETY: We checked there is capacity for 1 more entry, so `self.cursor` is in bounds.
// `self.cursor` was aligned for `T`, and we added `size_of::<T>()` to pointer.
// `size_of::<T>()` is always a multiple of `T`'s alignment, so `self.cursor` must still be
// aligned for `T`.
unsafe { self.cursor.as_ptr().write(value) };
}
}
/// Push value to stack when stack is full to capacity.
///
/// This is the slow branch of `push`, which is rarely taken, so marked as `#[cold]` and
/// `#[inline(never)]` to make `push` as small as possible, so it can be inlined.
///
/// # Panics
/// Panics if stack is already at maximum capacity.
///
/// # SAFETY
/// Stack must be full to capacity. i.e. `self.cursor.add(1) == self.end`.
#[cold]
#[inline(never)]
unsafe fn push_slow(&mut self, value: T) {
// Get new capacity
let old_capacity_bytes = self.capacity_bytes();
// Capacity in bytes cannot be larger than `isize::MAX`, so `* 2` cannot overflow
let mut new_capacity_bytes = old_capacity_bytes * 2;
if new_capacity_bytes > Self::MAX_CAPACITY_BYTES {
assert!(
old_capacity_bytes < Self::MAX_CAPACITY_BYTES,
"Cannot grow beyond `Self::MAX_CAPACITY`"
);
new_capacity_bytes = Self::MAX_CAPACITY_BYTES;
}
debug_assert!(new_capacity_bytes > old_capacity_bytes);
// Reallocate.
// SAFETY:
// Stack is always allocated, and `self.start` and `self.end` are boundaries of that allocation.
// So `self.start` and `old_layout` accurately describe the current allocation.
// `old_capacity_bytes` was a multiple of `size_of::<T>()`, so double that must be too.
// `MAX_CAPACITY_BYTES` is also a multiple of `size_of::<T>()`.
// So `new_capacity_bytes` must be a multiple of `size_of::<T>()`.
// `new_capacity_bytes` is `<= MAX_CAPACITY_BYTES`, so is a legal allocation size.
// `layout_for` produces a layout with `T`'s alignment, so `new_ptr` is aligned for `T`.
let new_ptr = unsafe {
let old_ptr = self.start.as_ptr().cast::<u8>();
let old_layout = Self::layout_for(old_capacity_bytes);
let new_ptr = alloc::realloc(old_ptr, old_layout, new_capacity_bytes);
if new_ptr.is_null() {
let new_layout = Self::layout_for(new_capacity_bytes);
alloc::handle_alloc_error(new_layout);
}
new_ptr.cast::<T>()
};
// Update pointers.
// Stack was full to capacity, so new last index after push is the old capacity.
// i.e. `self.cursor - self.start == old_end - old_start`.
// Note: All pointers need to be updated even if allocation grew in place.
// From docs for `GlobalAlloc::realloc`:
// "Any access to the old `ptr` is Undefined Behavior, even if the allocation remained in-place."
// <https://doc.rust-lang.org/std/alloc/trait.GlobalAlloc.html#method.realloc>
// `end` changes whatever happens, so always need to be updated.
// `cursor` needs to be derived from `start` to make `offset_from` valid, so also needs updating.
// SAFETY: We checked that `new_ptr` is non-null.
// `old_capacity_bytes` and `new_capacity_bytes` are both multiples of `size_of::<T>()`.
// `size_of::<T>()` is always a multiple of `T`'s alignment, and `new_ptr` is aligned for `T`,
// so new `self.cursor` and `self.end` are aligned for `T`.
// `old_capacity_bytes` is always `< new_capacity_bytes`, so new `self.cursor` must be in bounds.
unsafe {
self.start = NonNull::new_unchecked(new_ptr);
self.end = NonNull::new_unchecked(new_ptr.byte_add(new_capacity_bytes));
self.cursor = NonNull::new_unchecked(new_ptr.byte_add(old_capacity_bytes));
}
// Write value.
// SAFETY: We just allocated additional capacity, so `self.cursor` is in bounds.
// `self.cursor` is aligned for `T`.
unsafe { self.cursor.as_ptr().write(value) }
}
/// Pop value from stack.
///
/// # Panics
/// Panics if the stack has only 1 entry on it.
#[inline]
pub fn pop(&mut self) -> T {
// Panic if trying to remove last entry from stack
assert!(self.cursor != self.start, "Cannot pop all entries");
// SAFETY: Assertion above ensures stack has at least 2 entries
unsafe { self.pop_unchecked() }
}
/// Pop value from stack, without checking that stack isn't empty.
///
/// # SAFETY
/// Stack must have at least 2 entries, so that after pop, it still has at least 1.
#[inline]
pub unsafe fn pop_unchecked(&mut self) -> T {
debug_assert!(self.cursor > self.start);
debug_assert!(self.cursor < self.end);
// SAFETY: All methods ensure `self.cursor` is always in bounds, is aligned for `T`,
// and points to a valid initialized `T`
let value = self.cursor.as_ptr().read();
// SAFETY: Caller guarantees there's at least 2 entries on stack, so subtracting 1
// cannot be out of bounds
self.cursor = NonNull::new_unchecked(self.cursor.as_ptr().sub(1));
value
}
/// Get number of values on stack.
///
/// Number of entries is always at least 1. Stack is never empty.
#[inline]
pub fn len(&self) -> usize {
// `offset_from` returns offset in units of `T`.
// When stack has 1 entry, `start - cursor == 0`, so add 1 to get number of entries.
// SAFETY: `self.start` and `self.cursor` are both derived from same pointer
// (in `new_with_capacity_bytes_unchecked` and `push_slow`).
// Both pointers are always within bounds of a single allocation.
// Distance between pointers is always a multiple of `size_of::<T>()`.
// Byte size of allocation cannot exceed `isize::MAX`, so `+ 1` cannot wrap around.
// `self.cursor` is always >= `self.start`.
// `assert_unchecked!` is to help compiler to optimize.
// See: https://doc.rust-lang.org/std/primitive.pointer.html#method.sub_ptr
#[expect(clippy::cast_sign_loss)]
unsafe {
assert_unchecked!(self.cursor >= self.start);
self.cursor.as_ptr().offset_from(self.start.as_ptr()) as usize + 1
}
}
/// Get capacity.
#[inline]
#[cfg_attr(not(test), expect(dead_code))]
pub fn capacity(&self) -> usize {
// SAFETY: `self.start` and `self.end` are both derived from same pointer
// (in `new_with_capacity_bytes_unchecked` and `push_slow`).
// Both pointers are always within bounds of single allocation.
// Distance between pointers is always a multiple of `size_of::<T>()`.
// `self.end` is always > `self.start`, because stack is never empty.
// `assert_unchecked!` is to help compiler to optimize.
// See: https://doc.rust-lang.org/std/primitive.pointer.html#method.sub_ptr
#[expect(clippy::cast_sign_loss)]
unsafe {
assert_unchecked!(self.end > self.start);
self.end.as_ptr().offset_from(self.start.as_ptr()) as usize
}
}
/// Get capacity in bytes.
#[inline]
fn capacity_bytes(&self) -> usize {
// SAFETY: `self.start` and `self.end` are both derived from same pointer
// (in `new_with_capacity_bytes_unchecked` and `push_slow`).
// Both pointers are always within bounds of single allocation.
// Distance between pointers is always a multiple of `size_of::<T>()`.
// `self.end` is always > `self.start`, because stack is never empty.
// `assert_unchecked!` is to help compiler to optimize.
// See: https://doc.rust-lang.org/std/primitive.pointer.html#method.sub_ptr
#[expect(clippy::cast_sign_loss)]
unsafe {
assert_unchecked!(self.end > self.start);
self.end.as_ptr().byte_offset_from(self.start.as_ptr()) as usize
}
}
}
impl<T> Drop for NonEmptyStack<T> {
fn drop(&mut self) {
// Drop contents. This block copied from `std`'s `Vec`.
// Will be optimized out if `T` is non-drop, as `drop_in_place` calls `std::mem::needs_drop`.
// SAFETY: Stack contains `self.len()` initialized entries, starting at `self.start`.
unsafe {
ptr::drop_in_place(ptr::slice_from_raw_parts_mut(self.start.as_ptr(), self.len()));
}
// Drop the memory
// SAFETY:
// Stack is always allocated, and `self.start` and `self.end` are boundaries of that allocation.
// So `self.start` and `layout` accurately describe the current allocation.
unsafe {
let layout = Self::layout_for(self.capacity_bytes());
alloc::dealloc(self.start.as_ptr().cast::<u8>(), layout);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! assert_len_cap_last {
($stack:ident, $len:expr, $capacity:expr, $last:expr) => {
assert_eq!($stack.len(), $len);
assert_eq!($stack.capacity(), $capacity);
assert_eq!($stack.last(), $last);
};
}
#[test]
fn new() {
let stack = NonEmptyStack::new(true);
assert_len_cap_last!(stack, 1, 8, &true);
assert_eq!(stack.capacity_bytes(), 8);
let stack = NonEmptyStack::new(10u64);
assert_len_cap_last!(stack, 1, 4, &10);
assert_eq!(stack.capacity_bytes(), 32);
let stack = NonEmptyStack::new([10u8; 1024]);
assert_len_cap_last!(stack, 1, 4, &[10; 1024]);
assert_eq!(stack.capacity_bytes(), 4096);
let stack = NonEmptyStack::new([10u8; 1025]);
assert_len_cap_last!(stack, 1, 1, &[10; 1025]);
assert_eq!(stack.capacity_bytes(), 1025);
}
#[test]
fn with_capacity() {
let stack = NonEmptyStack::with_capacity(16, 10u64);
assert_len_cap_last!(stack, 1, 16, &10);
assert_eq!(stack.capacity_bytes(), 128);
}
#[test]
#[should_panic(expected = "`capacity` cannot be zero")]
fn with_capacity_zero() {
NonEmptyStack::with_capacity(0, 10u64);
}
#[test]
fn push_then_pop() {
let mut stack = NonEmptyStack::new(10u64);
assert_len_cap_last!(stack, 1, 4, &10);
assert_eq!(stack.capacity_bytes(), 32);
stack.push(20);
assert_len_cap_last!(stack, 2, 4, &20);
stack.push(30);
assert_len_cap_last!(stack, 3, 4, &30);
stack.push(40);
assert_len_cap_last!(stack, 4, 4, &40);
assert_eq!(stack.capacity_bytes(), 32);
stack.push(50);
assert_len_cap_last!(stack, 5, 8, &50);
assert_eq!(stack.capacity_bytes(), 64);
stack.push(60);
assert_len_cap_last!(stack, 6, 8, &60);
stack.push(70);
assert_len_cap_last!(stack, 7, 8, &70);
stack.push(80);
assert_len_cap_last!(stack, 8, 8, &80);
assert_eq!(stack.capacity_bytes(), 64);
stack.push(90);
assert_len_cap_last!(stack, 9, 16, &90);
assert_eq!(stack.capacity_bytes(), 128);
assert_eq!(stack.pop(), 90);
assert_len_cap_last!(stack, 8, 16, &80);
assert_eq!(stack.pop(), 80);
assert_len_cap_last!(stack, 7, 16, &70);
assert_eq!(stack.pop(), 70);
assert_len_cap_last!(stack, 6, 16, &60);
assert_eq!(stack.pop(), 60);
assert_len_cap_last!(stack, 5, 16, &50);
assert_eq!(stack.pop(), 50);
assert_len_cap_last!(stack, 4, 16, &40);
assert_eq!(stack.pop(), 40);
assert_len_cap_last!(stack, 3, 16, &30);
assert_eq!(stack.pop(), 30);
assert_len_cap_last!(stack, 2, 16, &20);
assert_eq!(stack.pop(), 20);
assert_len_cap_last!(stack, 1, 16, &10);
assert_eq!(stack.capacity_bytes(), 128);
}
#[test]
fn push_and_pop_mixed() {
let mut stack = NonEmptyStack::new(10u64);
assert_len_cap_last!(stack, 1, 4, &10);
assert_eq!(stack.capacity_bytes(), 32);
stack.push(20);
assert_len_cap_last!(stack, 2, 4, &20);
stack.push(30);
assert_len_cap_last!(stack, 3, 4, &30);
assert_eq!(stack.pop(), 30);
assert_len_cap_last!(stack, 2, 4, &20);
stack.push(31);
assert_len_cap_last!(stack, 3, 4, &31);
stack.push(40);
assert_len_cap_last!(stack, 4, 4, &40);
stack.push(50);
assert_len_cap_last!(stack, 5, 8, &50);
assert_eq!(stack.pop(), 50);
assert_len_cap_last!(stack, 4, 8, &40);
assert_eq!(stack.pop(), 40);
assert_len_cap_last!(stack, 3, 8, &31);
assert_eq!(stack.pop(), 31);
assert_len_cap_last!(stack, 2, 8, &20);
stack.push(32);
assert_len_cap_last!(stack, 3, 8, &32);
assert_eq!(stack.pop(), 32);
assert_len_cap_last!(stack, 2, 8, &20);
assert_eq!(stack.pop(), 20);
assert_len_cap_last!(stack, 1, 8, &10);
}
#[test]
#[should_panic(expected = "Cannot pop all entries")]
fn pop_panic() {
let mut stack = NonEmptyStack::new(10u64);
stack.pop();
}
#[test]
#[should_panic(expected = "Cannot pop all entries")]
fn pop_panic2() {
let mut stack = NonEmptyStack::new(10u64);
stack.push(20);
stack.push(30);
stack.pop();
stack.pop();
stack.pop();
}
#[test]
fn last_mut() {
let mut stack = NonEmptyStack::new(10u64);
assert_len_cap_last!(stack, 1, 4, &10);
*stack.last_mut() = 11;
assert_len_cap_last!(stack, 1, 4, &11);
*stack.last_mut() = 12;
assert_len_cap_last!(stack, 1, 4, &12);
stack.push(20);
assert_len_cap_last!(stack, 2, 4, &20);
*stack.last_mut() = 21;
assert_len_cap_last!(stack, 2, 4, &21);
*stack.last_mut() = 22;
assert_len_cap_last!(stack, 2, 4, &22);
}
#[test]
#[expect(clippy::items_after_statements)]
fn drop() {
use std::sync::{Mutex, OnceLock};
static DROPS: OnceLock<Mutex<Vec<u32>>> = OnceLock::new();
DROPS.get_or_init(|| Mutex::new(vec![]));
fn drops() -> Vec<u32> {
std::mem::take(DROPS.get().unwrap().lock().unwrap().as_mut())
}
#[derive(PartialEq, Debug)]
struct Droppy(u32);
impl Drop for Droppy {
fn drop(&mut self) {
DROPS.get().unwrap().lock().unwrap().push(self.0);
}
}
{
let mut stack = NonEmptyStack::new(Droppy(10));
stack.push(Droppy(20));
stack.push(Droppy(30));
assert_eq!(stack.len(), 3);
assert_eq!(stack.capacity(), 4);
stack.pop();
assert_eq!(drops(), &[30]);
assert!(drops().is_empty());
stack.push(Droppy(31));
stack.push(Droppy(40));
stack.push(Droppy(50));
assert_eq!(stack.len(), 5);
assert_eq!(stack.capacity(), 8);
assert!(drops().is_empty());
}
assert_eq!(drops(), &[10, 20, 31, 40, 50]);
}
}

View file

@ -1,3 +1,5 @@
use super::NonEmptyStack;
/// Stack which is sparsely filled.
///
/// Functionally equivalent to a stack implemented as `Vec<Option<T>>`, but more memory-efficient
@ -22,7 +24,7 @@
/// When the stack grows and reallocates, `SparseStack` has less memory to copy, which is a performance
/// win too.
pub struct SparseStack<T> {
has_values: Vec<bool>,
has_values: NonEmptyStack<bool>,
values: Vec<T>,
}
@ -32,7 +34,7 @@ impl<T> SparseStack<T> {
// `has_values` starts with a single empty entry, which will never be popped off.
// This means `take_last`, `last_or_init`, and `last_mut_or_init` can all be infallible,
// as there's always an entry on the stack to read.
Self { has_values: vec![false], values: vec![] }
Self { has_values: NonEmptyStack::new(false), values: vec![] }
}
/// Push an entry to the stack.
@ -53,25 +55,7 @@ impl<T> SparseStack<T> {
/// Panics if the stack has only 1 entry on it.
#[inline]
pub fn pop(&mut self) -> Option<T> {
// SAFETY: `self.has_values` starts with 1 entry. Only `pop` removes entries from it.
// We check that popping an entry does not leave the stack empty before performing the pop.
// So `self.has_values` can never be left in an empty state.
//
// This would be equivalent:
// ```
// assert!(self.has_values.len() > 1);
// self.has_values.pop().unwrap()
// ```
// But checking `original_len > 1` is 1 more CPU op than decrementing length first,
// and then checking for `new_len > 0`. https://godbolt.org/z/eqx385E5K
let has_value = unsafe {
let new_len = self.has_values.len() - 1;
assert!(new_len > 0);
let has_value = *self.has_values.get_unchecked(new_len);
self.has_values.set_len(new_len);
has_value
};
let has_value = self.has_values.pop();
if has_value {
debug_assert!(!self.values.is_empty());
// SAFETY: Last `self.has_values` is only `true` if there's a corresponding value in `self.values`.
@ -88,10 +72,7 @@ impl<T> SparseStack<T> {
/// Get value of last entry on the stack.
#[inline]
pub fn last(&self) -> Option<&T> {
debug_assert!(!self.has_values.is_empty());
// SAFETY: `self.has_values` starts with 1 entry. Only `pop` removes entries from it,
// and it ensures `self.has_values` always has at least 1 entry.
let has_value = unsafe { *self.has_values.last().unwrap_unchecked() };
let has_value = *self.has_values.last();
if has_value {
debug_assert!(!self.values.is_empty());
// SAFETY: Last `self.has_values` is only `true` if there's a corresponding value in `self.values`.
@ -106,10 +87,7 @@ impl<T> SparseStack<T> {
/// Take value from last entry on the stack, leaving last entry empty.
#[inline]
pub fn take_last(&mut self) -> Option<T> {
debug_assert!(!self.has_values.is_empty());
// SAFETY: `self.has_values` starts with 1 entry. Only `pop` removes entries from it,
// and it ensures `self.has_values` always has at least 1 entry.
let has_value = unsafe { self.has_values.last_mut().unwrap_unchecked() };
let has_value = self.has_values.last_mut();
if *has_value {
*has_value = false;
@ -129,10 +107,7 @@ impl<T> SparseStack<T> {
/// Return reference to value.
#[inline]
pub fn last_or_init<I: FnOnce() -> T>(&mut self, init: I) -> &T {
debug_assert!(!self.has_values.is_empty());
// SAFETY: `self.has_values` starts with 1 entry. Only `pop` removes entries from it,
// and it ensures `self.has_values` always has at least 1 entry.
let has_value = unsafe { self.has_values.last_mut().unwrap_unchecked() };
let has_value = self.has_values.last_mut();
if !*has_value {
*has_value = true;
self.values.push(init());
@ -150,10 +125,7 @@ impl<T> SparseStack<T> {
/// Return mutable reference to value.
#[inline]
pub fn last_mut_or_init<I: FnOnce() -> T>(&mut self, init: I) -> &mut T {
debug_assert!(!self.has_values.is_empty());
// SAFETY: `self.has_values` starts with 1 entry. Only `pop` removes entries from it,
// and it ensures `self.has_values` always has at least 1 entry.
let has_value = unsafe { self.has_values.last_mut().unwrap_unchecked() };
let has_value = self.has_values.last_mut();
if !*has_value {
*has_value = true;
self.values.push(init());