Skip to content

Commit

Permalink
bpf_loader: use an explicit thread-local pool for stack and heap memo…
Browse files Browse the repository at this point in the history
…ry (anza-xyz#1370)

* Rename ComputeBudget::max_invoke_stack_height to max_instruction_stack_depth

The new name is consistent with the existing
ComputeBudget::max_instruction_trace_length.

Also expose compute_budget:MAX_INSTRUCTION_DEPTH.

* bpf_loader: use an explicit thread-local pool for stack and heap memory

Use a fixed thread-local pool to hold stack and heap memory. This
mitigates the long standing issue of jemalloc causing TLB shootdowns to
serve such frequent large allocations.

Because we need 1 stack and 1 heap region per instruction, and the
current max instruction nesting is hardcoded to 5, the pre-allocated
size is (MAX_STACK + MAX_HEAP) * 5 * NUM_THREADS. With the current
limits that's about 2.5MB per thread. Note that this is memory that
would eventually get allocated anyway, we're just pre-allocating it now.

* programs/sbf: add test for stack/heap zeroing

Add TEST_STACK_HEAP_ZEROED which tests that stack and heap regions are
zeroed across reuse from the memory pool.
  • Loading branch information
alessandrod authored and samkim-crypto committed Jul 31, 2024
1 parent 63d5e9a commit a78c562
Show file tree
Hide file tree
Showing 15 changed files with 358 additions and 83 deletions.
22 changes: 16 additions & 6 deletions compute-budget/src/compute_budget.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ impl ::solana_frozen_abi::abi_example::AbiExample for ComputeBudget {
}
}

/// Max instruction stack depth. This is the maximum nesting of instructions that can happen during
/// a transaction.
pub const MAX_INSTRUCTION_STACK_DEPTH: usize = 5;

/// Max call depth. This is the maximum nesting of SBF to SBF call that can happen within a program.
pub const MAX_CALL_DEPTH: usize = 64;

/// The size of one SBF stack frame.
pub const STACK_FRAME_SIZE: usize = 4096;

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct ComputeBudget {
/// Number of compute units that a transaction or individual instruction is
Expand All @@ -26,11 +36,11 @@ pub struct ComputeBudget {
/// Number of compute units consumed by an invoke call (not including the cost incurred by
/// the called program)
pub invoke_units: u64,
/// Maximum program instruction invocation stack height. Invocation stack
/// height starts at 1 for transaction instructions and the stack height is
/// Maximum program instruction invocation stack depth. Invocation stack
/// depth starts at 1 for transaction instructions and the stack depth is
/// incremented each time a program invokes an instruction and decremented
/// when a program returns.
pub max_invoke_stack_height: usize,
pub max_instruction_stack_depth: usize,
/// Maximum cross-program invocation and instructions per transaction
pub max_instruction_trace_length: usize,
/// Base number of compute units consumed to call SHA256
Expand Down Expand Up @@ -133,13 +143,13 @@ impl ComputeBudget {
log_64_units: 100,
create_program_address_units: 1500,
invoke_units: 1000,
max_invoke_stack_height: 5,
max_instruction_stack_depth: MAX_INSTRUCTION_STACK_DEPTH,
max_instruction_trace_length: 64,
sha256_base_cost: 85,
sha256_byte_cost: 1,
sha256_max_slices: 20_000,
max_call_depth: 64,
stack_frame_size: 4_096,
max_call_depth: MAX_CALL_DEPTH,
stack_frame_size: STACK_FRAME_SIZE,
log_pubkey_units: 100,
max_cpi_instruction_size: 1280, // IPv6 Min MTU size
cpi_bytes_per_unit: 250, // ~50MB at 200,000 units
Expand Down
14 changes: 7 additions & 7 deletions compute-budget/src/compute_budget_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@ use {
solana_sdk::{
borsh1::try_from_slice_unchecked,
compute_budget::{self, ComputeBudgetInstruction},
entrypoint::HEAP_LENGTH as MIN_HEAP_FRAME_BYTES,
entrypoint::HEAP_LENGTH,
fee::FeeBudgetLimits,
instruction::{CompiledInstruction, InstructionError},
pubkey::Pubkey,
transaction::TransactionError,
},
};

const MAX_HEAP_FRAME_BYTES: u32 = 256 * 1024;
/// Roughly 0.5us/page, where page is 32K; given roughly 15CU/us, the
/// default heap page cost = 0.5 * 15 ~= 8CU/page
pub const DEFAULT_HEAP_COST: u64 = 8;
pub const DEFAULT_INSTRUCTION_COMPUTE_UNIT_LIMIT: u32 = 200_000;
pub const MAX_COMPUTE_UNIT_LIMIT: u32 = 1_400_000;
pub const MAX_HEAP_FRAME_BYTES: u32 = 256 * 1024;
pub const MIN_HEAP_FRAME_BYTES: u32 = HEAP_LENGTH as u32;

/// The total accounts data a transaction can load is limited to 64MiB to not break
/// anyone in Mainnet-beta today. It can be set by set_loaded_accounts_data_size_limit instruction
Expand All @@ -33,7 +34,7 @@ pub struct ComputeBudgetLimits {
impl Default for ComputeBudgetLimits {
fn default() -> Self {
ComputeBudgetLimits {
updated_heap_bytes: u32::try_from(MIN_HEAP_FRAME_BYTES).unwrap(),
updated_heap_bytes: MIN_HEAP_FRAME_BYTES,
compute_unit_limit: MAX_COMPUTE_UNIT_LIMIT,
compute_unit_price: 0,
loaded_accounts_bytes: MAX_LOADED_ACCOUNTS_DATA_SIZE_BYTES,
Expand Down Expand Up @@ -122,7 +123,7 @@ pub fn process_compute_budget_instructions<'a>(

// sanitize limits
let updated_heap_bytes = requested_heap_size
.unwrap_or(u32::try_from(MIN_HEAP_FRAME_BYTES).unwrap()) // loader's default heap_size
.unwrap_or(MIN_HEAP_FRAME_BYTES) // loader's default heap_size
.min(MAX_HEAP_FRAME_BYTES);

let compute_unit_limit = updated_compute_unit_limit
Expand All @@ -147,8 +148,7 @@ pub fn process_compute_budget_instructions<'a>(
}

fn sanitize_requested_heap_size(bytes: u32) -> bool {
(u32::try_from(MIN_HEAP_FRAME_BYTES).unwrap()..=MAX_HEAP_FRAME_BYTES).contains(&bytes)
&& bytes % 1024 == 0
(MIN_HEAP_FRAME_BYTES..=MAX_HEAP_FRAME_BYTES).contains(&bytes) && bytes % 1024 == 0
}

#[cfg(test)]
Expand Down Expand Up @@ -377,7 +377,7 @@ mod tests {
test!(
&[
Instruction::new_with_bincode(Pubkey::new_unique(), &0_u8, vec![]),
ComputeBudgetInstruction::request_heap_frame(MIN_HEAP_FRAME_BYTES as u32),
ComputeBudgetInstruction::request_heap_frame(MIN_HEAP_FRAME_BYTES),
ComputeBudgetInstruction::request_heap_frame(MAX_HEAP_FRAME_BYTES),
],
Err(TransactionError::DuplicateInstruction(2))
Expand Down
2 changes: 1 addition & 1 deletion ledger-tool/src/program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ pub fn program(ledger_path: &Path, matches: &ArgMatches<'_>) {
account_lengths,
&mut invoke_context,
);
let mut vm = vm.unwrap();
let (mut vm, _, _) = vm.unwrap();
let start_time = Instant::now();
if matches.value_of("mode").unwrap() == "debugger" {
vm.debug_port = Some(matches.value_of("port").unwrap().parse::<u16>().unwrap());
Expand Down
4 changes: 2 additions & 2 deletions program-runtime/src/invoke_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,7 @@ macro_rules! with_mock_invoke_context {
let mut $transaction_context = TransactionContext::new(
$transaction_accounts,
Rent::default(),
compute_budget.max_invoke_stack_height,
compute_budget.max_instruction_stack_depth,
compute_budget.max_instruction_trace_length,
);
let mut sysvar_cache = SysvarCache::default();
Expand Down Expand Up @@ -940,7 +940,7 @@ mod tests {
#[test]
fn test_instruction_stack_height() {
let one_more_than_max_depth = ComputeBudget::default()
.max_invoke_stack_height
.max_instruction_stack_depth
.saturating_add(1);
let mut invoke_stack = vec![];
let mut transaction_accounts = vec![];
Expand Down
1 change: 1 addition & 0 deletions program-runtime/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub use solana_rbpf;
pub mod invoke_context;
pub mod loaded_programs;
pub mod log_collector;
pub mod mem_pool;
pub mod stable_log;
pub mod sysvar_cache;
pub mod timings;
146 changes: 146 additions & 0 deletions program-runtime/src/mem_pool.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
use {
solana_compute_budget::{
compute_budget::{MAX_CALL_DEPTH, MAX_INSTRUCTION_STACK_DEPTH, STACK_FRAME_SIZE},
compute_budget_processor::{MAX_HEAP_FRAME_BYTES, MIN_HEAP_FRAME_BYTES},
},
solana_rbpf::{aligned_memory::AlignedMemory, ebpf::HOST_ALIGN},
std::array,
};

trait Reset {
fn reset(&mut self);
}

struct Pool<T: Reset, const SIZE: usize> {
items: [Option<T>; SIZE],
next_empty: usize,
}

impl<T: Reset, const SIZE: usize> Pool<T, SIZE> {
fn new(items: [T; SIZE]) -> Self {
Self {
items: items.map(|i| Some(i)),
next_empty: SIZE,
}
}

fn len(&self) -> usize {
SIZE
}

fn get(&mut self) -> Option<T> {
if self.next_empty == 0 {
return None;
}
self.next_empty = self.next_empty.saturating_sub(1);
self.items
.get_mut(self.next_empty)
.and_then(|item| item.take())
}

fn put(&mut self, mut value: T) -> bool {
self.items
.get_mut(self.next_empty)
.map(|item| {
value.reset();
item.replace(value);
self.next_empty = self.next_empty.saturating_add(1);
true
})
.unwrap_or(false)
}
}

impl Reset for AlignedMemory<{ HOST_ALIGN }> {
fn reset(&mut self) {
self.as_slice_mut().fill(0)
}
}

pub struct VmMemoryPool {
stack: Pool<AlignedMemory<{ HOST_ALIGN }>, MAX_INSTRUCTION_STACK_DEPTH>,
heap: Pool<AlignedMemory<{ HOST_ALIGN }>, MAX_INSTRUCTION_STACK_DEPTH>,
}

impl VmMemoryPool {
pub fn new() -> Self {
Self {
stack: Pool::new(array::from_fn(|_| {
AlignedMemory::zero_filled(STACK_FRAME_SIZE * MAX_CALL_DEPTH)
})),
heap: Pool::new(array::from_fn(|_| {
AlignedMemory::zero_filled(MAX_HEAP_FRAME_BYTES as usize)
})),
}
}

pub fn stack_len(&self) -> usize {
self.stack.len()
}

pub fn heap_len(&self) -> usize {
self.heap.len()
}

pub fn get_stack(&mut self, size: usize) -> AlignedMemory<{ HOST_ALIGN }> {
debug_assert!(size == STACK_FRAME_SIZE * MAX_CALL_DEPTH);
self.stack
.get()
.unwrap_or_else(|| AlignedMemory::zero_filled(size))
}

pub fn put_stack(&mut self, stack: AlignedMemory<{ HOST_ALIGN }>) -> bool {
self.stack.put(stack)
}

pub fn get_heap(&mut self, heap_size: u32) -> AlignedMemory<{ HOST_ALIGN }> {
debug_assert!((MIN_HEAP_FRAME_BYTES..=MAX_HEAP_FRAME_BYTES).contains(&heap_size));
self.heap
.get()
.unwrap_or_else(|| AlignedMemory::zero_filled(MAX_HEAP_FRAME_BYTES as usize))
}

pub fn put_heap(&mut self, heap: AlignedMemory<{ HOST_ALIGN }>) -> bool {
let heap_size = heap.len();
debug_assert!(
heap_size >= MIN_HEAP_FRAME_BYTES as usize
&& heap_size <= MAX_HEAP_FRAME_BYTES as usize
);
self.heap.put(heap)
}
}

impl Default for VmMemoryPool {
fn default() -> Self {
Self::new()
}
}

#[cfg(test)]
mod test {
use super::*;

#[derive(Debug, Eq, PartialEq)]
struct Item(u8, u8);
impl Reset for Item {
fn reset(&mut self) {
self.1 = 0;
}
}

#[test]
fn test_pool() {
let mut pool = Pool::<Item, 2>::new([Item(0, 1), Item(1, 1)]);
assert_eq!(pool.get(), Some(Item(1, 1)));
assert_eq!(pool.get(), Some(Item(0, 1)));
assert_eq!(pool.get(), None);
pool.put(Item(1, 1));
assert_eq!(pool.get(), Some(Item(1, 0)));
pool.put(Item(2, 2));
pool.put(Item(3, 3));
assert!(!pool.put(Item(4, 4)));
assert_eq!(pool.get(), Some(Item(3, 0)));
assert_eq!(pool.get(), Some(Item(2, 0)));
assert_eq!(pool.get(), None);
}
}
Loading

0 comments on commit a78c562

Please sign in to comment.