Skip to content

Commit

Permalink
Auto merge of rust-lang#136115 - Mark-Simulacrum:shard-alloc-id, r=Ra…
Browse files Browse the repository at this point in the history
…lfJung

Shard AllocMap Lock

This improves performance on many-seed parallel (-Zthreads=32) miri executions from managing to use ~8 cores to using 27-28 cores, which is about the same as what I see with the data structure proposed in rust-lang#136105 - I haven't analyzed but I suspect the sharding might actually work out better if we commonly insert "densely" since sharding would split the cache lines and the OnceVec packs locks close together. Of course, we could do something similar with the bitset lock too.

Either way, this seems like a very reasonable starting point that solves the problem ~equally well on what I can test locally.

r? `@RalfJung`
  • Loading branch information
bors committed Feb 4, 2025
2 parents bef3c3b + 7f1231c commit e5f11af
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 26 deletions.
66 changes: 42 additions & 24 deletions compiler/rustc_middle/src/mir/interpret/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ use std::{fmt, io};
use rustc_abi::{AddressSpace, Align, Endian, HasDataLayout, Size};
use rustc_ast::{LitKind, Mutability};
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lock;
use rustc_data_structures::sharded::ShardedHashMap;
use rustc_data_structures::sync::{AtomicU64, Lock};
use rustc_hir::def::DefKind;
use rustc_hir::def_id::{DefId, LocalDefId};
use rustc_macros::{HashStable, TyDecodable, TyEncodable, TypeFoldable, TypeVisitable};
Expand Down Expand Up @@ -389,35 +390,39 @@ pub const CTFE_ALLOC_SALT: usize = 0;

pub(crate) struct AllocMap<'tcx> {
/// Maps `AllocId`s to their corresponding allocations.
alloc_map: FxHashMap<AllocId, GlobalAlloc<'tcx>>,
// Note that this map on rustc workloads seems to be rather dense, but in miri workloads should
// be pretty sparse. In #136105 we considered replacing it with a (dense) Vec-based map, but
// since there are workloads where it can be sparse we decided to go with sharding for now. At
// least up to 32 cores the one workload tested didn't exhibit much difference between the two.
//
// Should be locked *after* locking dedup if locking both to avoid deadlocks.
to_alloc: ShardedHashMap<AllocId, GlobalAlloc<'tcx>>,

/// Used to deduplicate global allocations: functions, vtables, string literals, ...
///
/// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating
/// the actual guarantees.
dedup: FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>,
dedup: Lock<FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>>,

/// The `AllocId` to assign to the next requested ID.
/// Always incremented; never gets smaller.
next_id: AllocId,
next_id: AtomicU64,
}

impl<'tcx> AllocMap<'tcx> {
pub(crate) fn new() -> Self {
AllocMap {
alloc_map: Default::default(),
to_alloc: Default::default(),
dedup: Default::default(),
next_id: AllocId(NonZero::new(1).unwrap()),
next_id: AtomicU64::new(1),
}
}
fn reserve(&mut self) -> AllocId {
let next = self.next_id;
self.next_id.0 = self.next_id.0.checked_add(1).expect(
"You overflowed a u64 by incrementing by 1... \
You've just earned yourself a free drink if we ever meet. \
Seriously, how did you do that?!",
);
next
fn reserve(&self) -> AllocId {
// Technically there is a window here where we overflow and then another thread
// increments `next_id` *again* and uses it before we panic and tear down the entire session.
// We consider this fine since such overflows cannot realistically occur.
let next_id = self.next_id.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
AllocId(NonZero::new(next_id).unwrap())
}
}

Expand All @@ -428,26 +433,34 @@ impl<'tcx> TyCtxt<'tcx> {
/// Make sure to call `set_alloc_id_memory` or `set_alloc_id_same_memory` before returning such
/// an `AllocId` from a query.
pub fn reserve_alloc_id(self) -> AllocId {
self.alloc_map.lock().reserve()
self.alloc_map.reserve()
}

/// Reserves a new ID *if* this allocation has not been dedup-reserved before.
/// Should not be used for mutable memory.
fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId {
let mut alloc_map = self.alloc_map.lock();
if let GlobalAlloc::Memory(mem) = alloc {
if mem.inner().mutability.is_mut() {
bug!("trying to dedup-reserve mutable memory");
}
}
let alloc_salt = (alloc, salt);
if let Some(&alloc_id) = alloc_map.dedup.get(&alloc_salt) {
// Locking this *before* `to_alloc` also to ensure correct lock order.
let mut dedup = self.alloc_map.dedup.lock();
if let Some(&alloc_id) = dedup.get(&alloc_salt) {
return alloc_id;
}
let id = alloc_map.reserve();
let id = self.alloc_map.reserve();
debug!("creating alloc {:?} with id {id:?}", alloc_salt.0);
alloc_map.alloc_map.insert(id, alloc_salt.0.clone());
alloc_map.dedup.insert(alloc_salt, id);
let had_previous = self
.alloc_map
.to_alloc
.lock_shard_by_value(&id)
.insert(id, alloc_salt.0.clone())
.is_some();
// We just reserved, so should always be unique.
assert!(!had_previous);
dedup.insert(alloc_salt, id);
id
}

Expand Down Expand Up @@ -497,7 +510,7 @@ impl<'tcx> TyCtxt<'tcx> {
/// local dangling pointers and allocations in constants/statics.
#[inline]
pub fn try_get_global_alloc(self, id: AllocId) -> Option<GlobalAlloc<'tcx>> {
self.alloc_map.lock().alloc_map.get(&id).cloned()
self.alloc_map.to_alloc.lock_shard_by_value(&id).get(&id).cloned()
}

#[inline]
Expand All @@ -516,16 +529,21 @@ impl<'tcx> TyCtxt<'tcx> {
/// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. Trying to
/// call this function twice, even with the same `Allocation` will ICE the compiler.
pub fn set_alloc_id_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) {
if let Some(old) = self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Memory(mem)) {
if let Some(old) =
self.alloc_map.to_alloc.lock_shard_by_value(&id).insert(id, GlobalAlloc::Memory(mem))
{
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
}
}

/// Freezes an `AllocId` created with `reserve` by pointing it at a static item. Trying to
/// call this function twice, even with the same `DefId` will ICE the compiler.
pub fn set_nested_alloc_id_static(self, id: AllocId, def_id: LocalDefId) {
if let Some(old) =
self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
if let Some(old) = self
.alloc_map
.to_alloc
.lock_shard_by_value(&id)
.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
{
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
}
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_middle/src/ty/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1366,7 +1366,7 @@ pub struct GlobalCtxt<'tcx> {
pub data_layout: TargetDataLayout,

/// Stores memory for globals (statics/consts).
pub(crate) alloc_map: Lock<interpret::AllocMap<'tcx>>,
pub(crate) alloc_map: interpret::AllocMap<'tcx>,

current_gcx: CurrentGcx,
}
Expand Down Expand Up @@ -1583,7 +1583,7 @@ impl<'tcx> TyCtxt<'tcx> {
new_solver_evaluation_cache: Default::default(),
canonical_param_env_cache: Default::default(),
data_layout,
alloc_map: Lock::new(interpret::AllocMap::new()),
alloc_map: interpret::AllocMap::new(),
current_gcx,
});

Expand Down

0 comments on commit e5f11af

Please sign in to comment.