Skip to content

Implement efficient large object allocation sizing #205

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 119 additions & 2 deletions src/eval/memory/heap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,8 @@ pub struct HeapState {
rest: LinkedList<BumpBlock>,
/// Large object blocks - each contains single object
lobs: Vec<LargeObjectBlock>,
/// Recycled large object blocks available for reuse
recycled_lobs: Vec<LargeObjectBlock>,
}

impl Default for HeapState {
Expand Down Expand Up @@ -356,6 +358,7 @@ impl HeapState {
recycled: LinkedList::default(),
rest: LinkedList::default(),
lobs: vec![],
recycled_lobs: vec![],
}
}

Expand Down Expand Up @@ -463,12 +466,55 @@ impl HeapState {
}

/// Create and return a new large object block able to store data
/// of the specified size
/// of the specified size. Tries to reuse recycled blocks first.
pub fn lob(&mut self, size: usize) -> &mut LargeObjectBlock {
self.lobs.push(LargeObjectBlock::new(size));
// First, try to find a recycled block that can fit this allocation
if let Some(recycled_block) = self.find_suitable_recycled_lob(size) {
self.lobs.push(recycled_block);
} else {
// No suitable recycled block, create a new one
self.lobs.push(LargeObjectBlock::new(size));
}
self.lobs.last_mut().unwrap()
}

/// Find and remove a suitable recycled large object block for the given size
/// Returns the block if found, or None if no suitable block exists
fn find_suitable_recycled_lob(&mut self, required_size: usize) -> Option<LargeObjectBlock> {
// Find the best fit among recycled blocks
let mut best_index: Option<usize> = None;
let mut best_waste: f64 = f64::MAX;

for (index, lob) in self.recycled_lobs.iter().enumerate() {
if lob.can_fit(required_size) {
let waste = lob.waste_percentage(required_size);
if waste < best_waste {
best_waste = waste;
best_index = Some(index);
}
}
}

// Remove and return the best block if found
if let Some(index) = best_index {
Some(self.recycled_lobs.remove(index))
} else {
None
}
}

/// Recycle a large object block for future reuse
/// This would typically be called during garbage collection when a large object is freed
pub fn recycle_lob(&mut self, lob: LargeObjectBlock) {
// For now, we'll add a simple limit to prevent unbounded growth
const MAX_RECYCLED_LOBS: usize = 16;

if self.recycled_lobs.len() < MAX_RECYCLED_LOBS {
self.recycled_lobs.push(lob);
}
// If we're at the limit, just drop the block (let it be deallocated)
}

/// Look for reclaimable blocks and move to recycled list
pub fn sweep(&mut self) {
let mut unusable: LinkedList<BumpBlock> = LinkedList::default();
Expand Down Expand Up @@ -2600,4 +2646,75 @@ pub mod tests {
let _block = state.replace_head_targeted(1024);
assert!(state.head.is_some());
}

#[test]
fn test_large_object_recycling() {
let heap = Heap::new();
let state = unsafe { &mut *heap.state.get() };

// Initially no LOBs
assert_eq!(state.lobs.len(), 0);
assert_eq!(state.recycled_lobs.len(), 0);

// Allocate a large object
let _lob1 = state.lob(100 * 1024);
assert_eq!(state.lobs.len(), 1);

// Simulate recycling (normally done during GC)
let recycled_lob = state.lobs.pop().unwrap();
state.recycle_lob(recycled_lob);
assert_eq!(state.recycled_lobs.len(), 1);

// Allocate another large object of similar size - should reuse recycled block
let _lob2 = state.lob(90 * 1024);
assert_eq!(state.lobs.len(), 1);
assert_eq!(state.recycled_lobs.len(), 0); // Should have consumed recycled block
}

#[test]
fn test_large_object_best_fit_recycling() {
let heap = Heap::new();
let state = unsafe { &mut *heap.state.get() };

// Create and recycle blocks of different sizes
let small_lob = LargeObjectBlock::new(50 * 1024); // ~64KB allocation
let medium_lob = LargeObjectBlock::new(100 * 1024); // ~112KB allocation
let large_lob = LargeObjectBlock::new(200 * 1024); // ~256KB allocation

state.recycle_lob(large_lob);
state.recycle_lob(small_lob);
state.recycle_lob(medium_lob);
assert_eq!(state.recycled_lobs.len(), 3);

// Request 90KB - should get the medium block (best fit)
let _allocated = state.lob(90 * 1024);
assert_eq!(state.recycled_lobs.len(), 2); // Medium block should be consumed

// Verify the remaining blocks are small and large
let remaining_sizes: Vec<_> = state
.recycled_lobs
.iter()
.map(|lob| lob.allocated_size())
.collect();

// Should have kept the small and large blocks
assert!(remaining_sizes.len() == 2);
assert!(remaining_sizes.contains(&(64 * 1024))); // Small block
assert!(remaining_sizes.contains(&(256 * 1024))); // Large block
}

#[test]
fn test_large_object_recycling_limit() {
let heap = Heap::new();
let state = unsafe { &mut *heap.state.get() };

// Try to recycle more than the limit (16)
for _ in 0..20 {
let lob = LargeObjectBlock::new(50 * 1024);
state.recycle_lob(lob);
}

// Should be capped at the limit
assert_eq!(state.recycled_lobs.len(), 16);
}
}
216 changes: 206 additions & 10 deletions src/eval/memory/lob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,229 @@
//!
//! A memory region that contains a single object and header

use std::alloc::{alloc, dealloc, Layout};
use std::process::abort;

use super::block::Block;
use std::ptr::NonNull;

/// A memory allocation containing a single large object with its
/// header, this differs from Block in that it needn't be a power of
/// two.
#[derive(Debug)]
pub struct LargeObjectBlock {
/// Block
block: Block,
/// Pointer to memory
ptr: NonNull<u8>,
/// Size of allocation
size: usize,
}

impl LargeObjectBlock {
/// Create a new LargeObjectBlock of size sufficient to contain
/// `required_size` bytes (but potentially much larger). The size
/// of any object headers is assumed to be already included.
/// `required_size` bytes. Uses efficient sizing to minimize waste
/// while maintaining reasonable allocation granularity.
pub fn new(required_size: usize) -> Self {
// TODO: extraordinarily wasteful!
let size = required_size.next_power_of_two();
let size = Self::efficient_size_for(required_size);
LargeObjectBlock {
block: Block::new(size).unwrap_or_else(|_| abort()),
ptr: Self::alloc_block(size).unwrap_or_else(|_| abort()),
size,
}
}

/// Allocate a block directly from the system allocator
fn alloc_block(size: usize) -> Result<NonNull<u8>, ()> {
unsafe {
// Use page alignment for better performance
let align = std::cmp::max(size.next_power_of_two().min(4096), 8);
let layout = Layout::from_size_align(size, align).map_err(|_| ())?;
let ptr = alloc(layout);
if ptr.is_null() {
Err(())
} else {
if cfg!(debug_assertions) {
// Fill memory with 0xff to aid debugging
let mem = std::slice::from_raw_parts_mut(ptr, size);
mem.fill(0xff);
}
Ok(NonNull::new_unchecked(ptr))
}
}
}

/// Calculate efficient allocation size that minimizes waste while maintaining
/// reasonable granularity for the underlying allocator.
///
/// Uses a tiered approach:
/// - Up to 128KB: round to next 16KB boundary (max 15KB waste = ~12%)
/// - Up to 1MB: round to next 64KB boundary (max 63KB waste = ~6%)
/// - Above 1MB: round to next 256KB boundary (max 255KB waste = ~25% max, but rare)
fn efficient_size_for(required_size: usize) -> usize {
const KB: usize = 1024;
const MB: usize = 1024 * KB;

if required_size <= 128 * KB {
// Round up to next 16KB boundary
required_size.div_ceil(16 * KB) * (16 * KB)
} else if required_size <= MB {
// Round up to next 64KB boundary
required_size.div_ceil(64 * KB) * (64 * KB)
} else {
// Round up to next 256KB boundary
required_size.div_ceil(256 * KB) * (256 * KB)
}
}

/// Pointer to the writeable memory area
pub fn space(&self) -> *const u8 {
self.block.as_ptr()
self.ptr.as_ptr()
}

/// Get the actual allocated size of this large object block
pub fn allocated_size(&self) -> usize {
self.size
}

/// Check if this block can accommodate the requested size
pub fn can_fit(&self, required_size: usize) -> bool {
self.size >= required_size
}

/// Calculate waste percentage for a given required size
pub fn waste_percentage(&self, required_size: usize) -> f64 {
if required_size == 0 {
100.0
} else {
let waste = self.size.saturating_sub(required_size);
(waste as f64 / self.size as f64) * 100.0
}
}
}

impl Drop for LargeObjectBlock {
fn drop(&mut self) {
unsafe {
let align = std::cmp::max(self.size.next_power_of_two().min(4096), 8);
let layout = Layout::from_size_align_unchecked(self.size, align);
dealloc(self.ptr.as_ptr(), layout);
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_efficient_size_for_small_range() {
// Test 16KB boundaries for sizes up to 128KB

// Exact boundaries should not change
assert_eq!(LargeObjectBlock::efficient_size_for(16 * 1024), 16 * 1024);
assert_eq!(LargeObjectBlock::efficient_size_for(32 * 1024), 32 * 1024);
assert_eq!(LargeObjectBlock::efficient_size_for(128 * 1024), 128 * 1024);

// Values just above boundaries should round up
assert_eq!(LargeObjectBlock::efficient_size_for(33 * 1024), 48 * 1024);
assert_eq!(LargeObjectBlock::efficient_size_for(50 * 1024), 64 * 1024);
assert_eq!(LargeObjectBlock::efficient_size_for(65 * 1024), 80 * 1024);
}

#[test]
fn test_efficient_size_for_medium_range() {
// Test 64KB boundaries for 128KB < size <= 1MB

// Exact boundaries
assert_eq!(LargeObjectBlock::efficient_size_for(192 * 1024), 192 * 1024);
assert_eq!(LargeObjectBlock::efficient_size_for(256 * 1024), 256 * 1024);
assert_eq!(
LargeObjectBlock::efficient_size_for(1024 * 1024),
1024 * 1024
);

// Round up cases
assert_eq!(LargeObjectBlock::efficient_size_for(129 * 1024), 192 * 1024);
assert_eq!(LargeObjectBlock::efficient_size_for(200 * 1024), 256 * 1024);
assert_eq!(LargeObjectBlock::efficient_size_for(900 * 1024), 960 * 1024);
// 15 * 64KB
}

#[test]
fn test_efficient_size_for_large_range() {
// Test 256KB boundaries for size > 1MB

// Exact boundaries
assert_eq!(
LargeObjectBlock::efficient_size_for(1280 * 1024),
1280 * 1024
);
assert_eq!(
LargeObjectBlock::efficient_size_for(2048 * 1024),
2048 * 1024
);

// Round up cases
assert_eq!(
LargeObjectBlock::efficient_size_for(1025 * 1024),
1280 * 1024
);
assert_eq!(
LargeObjectBlock::efficient_size_for(1500 * 1024),
1536 * 1024
);
}

#[test]
fn test_waste_percentage_calculation() {
let lob = LargeObjectBlock::new(50 * 1024); // Should allocate 64KB

// Perfect fit should have some waste due to rounding
let waste = lob.waste_percentage(50 * 1024);
assert!(waste > 0.0);
assert!(waste < 30.0); // Should be reasonable

// Very small allocation in large block should have high waste
let high_waste = lob.waste_percentage(1024);
assert!(high_waste > 90.0);

// Zero size should be 100% waste
assert_eq!(lob.waste_percentage(0), 100.0);
}

#[test]
fn test_can_fit() {
let lob = LargeObjectBlock::new(50 * 1024); // Allocates 64KB

assert!(lob.can_fit(32 * 1024));
assert!(lob.can_fit(50 * 1024));
assert!(lob.can_fit(64 * 1024));
assert!(!lob.can_fit(65 * 1024));
assert!(!lob.can_fit(100 * 1024));
}

#[test]
fn test_size_efficiency_vs_power_of_two() {
// Compare our efficient sizing vs power-of-two for various sizes
let test_sizes = [
33 * 1024, // 33KB
50 * 1024, // 50KB
100 * 1024, // 100KB
200 * 1024, // 200KB
500 * 1024, // 500KB
];

for &size in &test_sizes {
let efficient = LargeObjectBlock::efficient_size_for(size);
let power_of_two = size.next_power_of_two();

// Our efficient sizing should always be <= power of two
assert!(efficient <= power_of_two);

// For most sizes, we should be significantly more efficient
if size > 32 * 1024 {
let efficient_waste = (efficient - size) as f64 / efficient as f64;
let power_waste = (power_of_two - size) as f64 / power_of_two as f64;

// Our algorithm should generally produce less waste
assert!(efficient_waste <= power_waste);
}
}
}
}