Skip to content

Commit cd23c89

Browse files
Improve state cache eviction and reduce mem usage (#4762)
* Improve state cache eviction and reduce mem usage * Fix epochs_per_state_diff tests
1 parent 1b4bc88 commit cd23c89

File tree

4 files changed

+107
-11
lines changed

4 files changed

+107
-11
lines changed

beacon_node/store/src/config.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ use serde_derive::{Deserialize, Serialize};
44
use ssz::{Decode, Encode};
55
use ssz_derive::{Decode, Encode};
66
use std::io::Write;
7+
use types::{EthSpec, Unsigned};
78
use zstd::Encoder;
89

9-
pub const DEFAULT_EPOCHS_PER_STATE_DIFF: u64 = 16;
10+
// Only used in tests. Mainnet sets a higher default on the CLI.
11+
pub const DEFAULT_EPOCHS_PER_STATE_DIFF: u64 = 8;
1012
pub const DEFAULT_BLOCK_CACHE_SIZE: usize = 64;
1113
pub const DEFAULT_STATE_CACHE_SIZE: usize = 128;
1214
pub const DEFAULT_COMPRESSION_LEVEL: i32 = 1;
@@ -64,6 +66,10 @@ pub enum StoreConfigError {
6466
config: OnDiskStoreConfig,
6567
on_disk: OnDiskStoreConfig,
6668
},
69+
InvalidEpochsPerStateDiff {
70+
epochs_per_state_diff: u64,
71+
max_supported: u64,
72+
},
6773
}
6874

6975
impl Default for StoreConfig {
@@ -107,8 +113,14 @@ impl StoreConfig {
107113
Ok(())
108114
}
109115

116+
/// Check that the configuration is valid.
117+
pub fn verify<E: EthSpec>(&self) -> Result<(), StoreConfigError> {
118+
self.verify_compression_level()?;
119+
self.verify_epochs_per_state_diff::<E>()
120+
}
121+
110122
/// Check that the compression level is valid.
111-
pub fn verify_compression_level(&self) -> Result<(), StoreConfigError> {
123+
fn verify_compression_level(&self) -> Result<(), StoreConfigError> {
112124
if zstd::compression_level_range().contains(&self.compression_level) {
113125
Ok(())
114126
} else {
@@ -118,6 +130,21 @@ impl StoreConfig {
118130
}
119131
}
120132

133+
/// Check that the configuration is valid.
134+
pub fn verify_epochs_per_state_diff<E: EthSpec>(&self) -> Result<(), StoreConfigError> {
135+
// To build state diffs we need to be able to determine the previous state root from the
136+
// state itself, which requires reading back in the state_roots array.
137+
let max_supported = E::SlotsPerHistoricalRoot::to_u64() / E::slots_per_epoch();
138+
if self.epochs_per_state_diff <= max_supported {
139+
Ok(())
140+
} else {
141+
Err(StoreConfigError::InvalidEpochsPerStateDiff {
142+
epochs_per_state_diff: self.epochs_per_state_diff,
143+
max_supported,
144+
})
145+
}
146+
}
147+
121148
/// Estimate the size of `len` bytes after compression at the current compression level.
122149
pub fn estimate_compressed_size(&self, len: usize) -> usize {
123150
if self.compression_level == 0 {

beacon_node/store/src/hot_cold_store.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ impl<E: EthSpec> HotColdDB<E, MemoryStore<E>, MemoryStore<E>> {
142142
spec: ChainSpec,
143143
log: Logger,
144144
) -> Result<HotColdDB<E, MemoryStore<E>, MemoryStore<E>>, Error> {
145-
config.verify_compression_level()?;
145+
config.verify::<E>()?;
146146

147147
let hierarchy = config.hierarchy_config.to_moduli()?;
148148

@@ -189,7 +189,7 @@ impl<E: EthSpec> HotColdDB<E, LevelDB<E>, LevelDB<E>> {
189189
spec: ChainSpec,
190190
log: Logger,
191191
) -> Result<Arc<Self>, Error> {
192-
config.verify_compression_level()?;
192+
config.verify::<E>()?;
193193

194194
let hierarchy = config.hierarchy_config.to_moduli()?;
195195

beacon_node/store/src/state_cache.rs

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,15 @@ use crate::Error;
22
use lru::LruCache;
33
use std::collections::{BTreeMap, HashMap, HashSet};
44
use std::num::NonZeroUsize;
5-
use types::{BeaconState, EthSpec, Hash256, Slot};
5+
use types::{BeaconState, Epoch, EthSpec, Hash256, Slot};
6+
7+
/// Fraction of the LRU cache to leave intact during culling.
8+
const CULL_EXEMPT_NUMERATOR: usize = 1;
9+
const CULL_EXEMPT_DENOMINATOR: usize = 10;
10+
11+
/// States that are less than or equal to this many epochs old *could* become finalized and will not
12+
/// be culled from the cache.
13+
const EPOCH_FINALIZATION_LIMIT: u64 = 4;
614

715
#[derive(Debug)]
816
pub struct FinalizedState<E: EthSpec> {
@@ -27,6 +35,8 @@ pub struct StateCache<E: EthSpec> {
2735
finalized_state: Option<FinalizedState<E>>,
2836
states: LruCache<Hash256, BeaconState<E>>,
2937
block_map: BlockMap,
38+
capacity: NonZeroUsize,
39+
max_epoch: Epoch,
3040
}
3141

3242
#[derive(Debug)]
@@ -42,6 +52,8 @@ impl<E: EthSpec> StateCache<E> {
4252
finalized_state: None,
4353
states: LruCache::new(capacity),
4454
block_map: BlockMap::default(),
55+
capacity,
56+
max_epoch: Epoch::new(0),
4557
}
4658
}
4759

@@ -115,6 +127,14 @@ impl<E: EthSpec> StateCache<E> {
115127
});
116128
}
117129

130+
// Update the cache's idea of the max epoch.
131+
self.max_epoch = std::cmp::max(state.current_epoch(), self.max_epoch);
132+
133+
// If the cache is full, use the custom cull routine to make room.
134+
if let Some(over_capacity) = self.len().checked_sub(self.capacity.get()) {
135+
self.cull(over_capacity + 1);
136+
}
137+
118138
// Insert the full state into the cache.
119139
self.states.put(state_root, state.clone());
120140

@@ -166,6 +186,60 @@ impl<E: EthSpec> StateCache<E> {
166186
}
167187
}
168188
}
189+
190+
/// Cull approximately `count` states from the cache.
191+
///
192+
/// States are culled LRU, with the following extra order imposed:
193+
///
194+
/// - Advanced states.
195+
/// - Mid-epoch unadvanced states.
196+
/// - Epoch-boundary states that are too old to be finalized.
197+
/// - Epoch-boundary states that could be finalized.
198+
pub fn cull(&mut self, count: usize) {
199+
let cull_exempt = std::cmp::max(
200+
1,
201+
self.len() * CULL_EXEMPT_NUMERATOR / CULL_EXEMPT_DENOMINATOR,
202+
);
203+
204+
// Stage 1: gather states to cull.
205+
let mut advanced_state_roots = vec![];
206+
let mut mid_epoch_state_roots = vec![];
207+
let mut old_boundary_state_roots = vec![];
208+
let mut good_boundary_state_roots = vec![];
209+
for (&state_root, state) in self.states.iter().skip(cull_exempt) {
210+
let is_advanced = state.slot() > state.latest_block_header().slot;
211+
let is_boundary = state.slot() % E::slots_per_epoch() == 0;
212+
let could_finalize =
213+
(self.max_epoch - state.current_epoch()) <= EPOCH_FINALIZATION_LIMIT;
214+
215+
if is_advanced {
216+
advanced_state_roots.push(state_root);
217+
} else if !is_boundary {
218+
mid_epoch_state_roots.push(state_root);
219+
} else if !could_finalize {
220+
old_boundary_state_roots.push(state_root);
221+
} else {
222+
good_boundary_state_roots.push(state_root);
223+
}
224+
225+
// Terminate early in the common case where we've already found enough junk to cull.
226+
if advanced_state_roots.len() == count {
227+
break;
228+
}
229+
}
230+
231+
// Stage 2: delete.
232+
// This could probably be more efficient in how it interacts with the block map.
233+
for state_root in advanced_state_roots
234+
.iter()
235+
.chain(mid_epoch_state_roots.iter())
236+
.chain(old_boundary_state_roots.iter())
237+
.chain(good_boundary_state_roots.iter())
238+
.take(count)
239+
{
240+
self.delete_state(state_root);
241+
}
242+
}
169243
}
170244

171245
impl BlockMap {

lighthouse/tests/beacon_node.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,12 +1795,7 @@ fn epochs_per_migration_override() {
17951795
fn epochs_per_state_diff_default() {
17961796
CommandLineTest::new()
17971797
.run_with_zero_port()
1798-
.with_config(|config| {
1799-
assert_eq!(
1800-
config.store.epochs_per_state_diff,
1801-
beacon_node::beacon_chain::store::config::DEFAULT_EPOCHS_PER_STATE_DIFF
1802-
)
1803-
});
1798+
.with_config(|config| assert_eq!(config.store.epochs_per_state_diff, 16));
18041799
}
18051800
#[test]
18061801
fn epochs_per_state_diff_override() {

0 commit comments

Comments
 (0)