Skip to content

Commit

Permalink
Introduce MixedBitSet.
Browse files Browse the repository at this point in the history
It just uses `BitSet` for small/medium sizes (<= 2048 bits) and
`ChunkedBitSet` for larger sizes. This is good because `ChunkedBitSet`
is slow and memory-hungry at smaller sizes.
  • Loading branch information
nnethercote committed Dec 5, 2024
1 parent dff5ce6 commit 6ee1a7a
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 3 deletions.
155 changes: 155 additions & 0 deletions compiler/rustc_index/src/bit_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,9 @@ impl<'a, T: Idx> Iterator for BitIter<'a, T> {
/// some stretches with lots of 0s and 1s mixed in a way that causes trouble
/// for `IntervalSet`.
///
/// Best used via `MixedBitSet`, rather than directly, because `MixedBitSet`
/// has better performance for small bitsets.
///
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
/// just be `usize`.
///
Expand Down Expand Up @@ -1106,6 +1109,158 @@ where
false
}

/// A bitset with a mixed representation, using `BitSet` for small and medium
/// bitsets, and `ChunkedBitSet` for large bitsets, i.e. those with enough bits
/// for at least two chunks. This is a good choice for many bitsets that can
/// have large domain sizes (e.g. 5000+).
///
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
/// just be `usize`.
///
/// All operations that involve an element will panic if the element is equal
/// to or greater than the domain size. All operations that involve two bitsets
/// will panic if the bitsets have differing domain sizes.
#[derive(PartialEq, Eq)]
pub enum MixedBitSet<T> {
Small(BitSet<T>),
Large(ChunkedBitSet<T>),
}

impl<T> MixedBitSet<T> {
pub fn domain_size(&self) -> usize {
match self {
MixedBitSet::Small(set) => set.domain_size(),
MixedBitSet::Large(set) => set.domain_size(),
}
}
}

impl<T: Idx> MixedBitSet<T> {
#[inline]
pub fn new_empty(domain_size: usize) -> MixedBitSet<T> {
if domain_size <= CHUNK_BITS {
MixedBitSet::Small(BitSet::new_empty(domain_size))
} else {
MixedBitSet::Large(ChunkedBitSet::new_empty(domain_size))
}
}

#[inline]
pub fn is_empty(&self) -> bool {
match self {
MixedBitSet::Small(set) => set.is_empty(),
MixedBitSet::Large(set) => set.is_empty(),
}
}

#[inline]
pub fn contains(&self, elem: T) -> bool {
match self {
MixedBitSet::Small(set) => set.contains(elem),
MixedBitSet::Large(set) => set.contains(elem),
}
}

#[inline]
pub fn insert(&mut self, elem: T) -> bool {
match self {
MixedBitSet::Small(set) => set.insert(elem),
MixedBitSet::Large(set) => set.insert(elem),
}
}

pub fn insert_all(&mut self) {
match self {
MixedBitSet::Small(set) => set.insert_all(),
MixedBitSet::Large(set) => set.insert_all(),
}
}

#[inline]
pub fn remove(&mut self, elem: T) -> bool {
match self {
MixedBitSet::Small(set) => set.remove(elem),
MixedBitSet::Large(set) => set.remove(elem),
}
}

pub fn iter(&self) -> MixedBitIter<'_, T> {
match self {
MixedBitSet::Small(set) => MixedBitIter::Small(set.iter()),
MixedBitSet::Large(set) => MixedBitIter::Large(set.iter()),
}
}

bit_relations_inherent_impls! {}
}

impl<T> Clone for MixedBitSet<T> {
fn clone(&self) -> Self {
match self {
MixedBitSet::Small(set) => MixedBitSet::Small(set.clone()),
MixedBitSet::Large(set) => MixedBitSet::Large(set.clone()),
}
}

/// WARNING: this implementation of clone_from may panic if the two
/// bitsets have different domain sizes. This constraint is not inherent to
/// `clone_from`, but it works with the existing call sites and allows a
/// faster implementation, which is important because this function is hot.
fn clone_from(&mut self, from: &Self) {
match (self, from) {
(MixedBitSet::Small(set), MixedBitSet::Small(from)) => set.clone_from(from),
(MixedBitSet::Large(set), MixedBitSet::Large(from)) => set.clone_from(from),
_ => panic!("MixedBitSet size mismatch"),
}
}
}

impl<T: Idx> BitRelations<MixedBitSet<T>> for MixedBitSet<T> {
fn union(&mut self, other: &MixedBitSet<T>) -> bool {
match (self, other) {
(MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.union(other),
(MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.union(other),
_ => panic!("MixedBitSet size mismatch"),
}
}

fn subtract(&mut self, other: &MixedBitSet<T>) -> bool {
match (self, other) {
(MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.subtract(other),
(MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.subtract(other),
_ => panic!("MixedBitSet size mismatch"),
}
}

fn intersect(&mut self, _other: &MixedBitSet<T>) -> bool {
unimplemented!("implement if/when necessary");
}
}

impl<T: Idx> fmt::Debug for MixedBitSet<T> {
fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MixedBitSet::Small(set) => set.fmt(w),
MixedBitSet::Large(set) => set.fmt(w),
}
}
}

pub enum MixedBitIter<'a, T: Idx> {
Small(BitIter<'a, T>),
Large(ChunkedBitIter<'a, T>),
}

impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> {
type Item = T;
fn next(&mut self) -> Option<T> {
match self {
MixedBitIter::Small(iter) => iter.next(),
MixedBitIter::Large(iter) => iter.next(),
}
}
}

/// A resizable bitset type with a dense representation.
///
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
Expand Down
22 changes: 21 additions & 1 deletion compiler/rustc_mir_dataflow/src/framework/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
use std::fmt;

use rustc_index::Idx;
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};

use super::lattice::MaybeReachable;

Expand Down Expand Up @@ -127,6 +127,26 @@ where
}
}

impl<T, C> DebugWithContext<C> for MixedBitSet<T>
where
T: Idx + DebugWithContext<C>,
{
fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MixedBitSet::Small(set) => set.fmt_with(ctxt, f),
MixedBitSet::Large(set) => set.fmt_with(ctxt, f),
}
}

fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match (self, old) {
(MixedBitSet::Small(set), MixedBitSet::Small(old)) => set.fmt_diff_with(old, ctxt, f),
(MixedBitSet::Large(set), MixedBitSet::Large(old)) => set.fmt_diff_with(old, ctxt, f),
_ => panic!("MixedBitSet size mismatch"),
}
}
}

impl<S, C> DebugWithContext<C> for MaybeReachable<S>
where
S: DebugWithContext<C>,
Expand Down
8 changes: 7 additions & 1 deletion compiler/rustc_mir_dataflow/src/framework/lattice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
use std::iter;

use rustc_index::bit_set::{BitSet, ChunkedBitSet};
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
use rustc_index::{Idx, IndexVec};

use crate::framework::BitSetExt;
Expand Down Expand Up @@ -132,6 +132,12 @@ impl<T: Idx> JoinSemiLattice for ChunkedBitSet<T> {
}
}

impl<T: Idx> JoinSemiLattice for MixedBitSet<T> {
fn join(&mut self, other: &Self) -> bool {
self.union(other)
}
}

/// Extends a type `T` with top and bottom elements to make it a partially ordered set in which no
/// value of `T` is comparable with any other.
///
Expand Down
18 changes: 17 additions & 1 deletion compiler/rustc_mir_dataflow/src/framework/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
use std::cmp::Ordering;

use rustc_data_structures::work_queue::WorkQueue;
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
use rustc_index::{Idx, IndexVec};
use rustc_middle::bug;
use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges, traversal};
Expand Down Expand Up @@ -77,6 +77,12 @@ impl<T: Idx> BitSetExt<T> for ChunkedBitSet<T> {
}
}

impl<T: Idx> BitSetExt<T> for MixedBitSet<T> {
fn contains(&self, elem: T) -> bool {
self.contains(elem)
}
}

/// A dataflow problem with an arbitrarily complex transfer function.
///
/// This trait specifies the lattice on which this analysis operates (the domain), its
Expand Down Expand Up @@ -337,6 +343,16 @@ impl<T: Idx> GenKill<T> for ChunkedBitSet<T> {
}
}

impl<T: Idx> GenKill<T> for MixedBitSet<T> {
fn gen_(&mut self, elem: T) {
self.insert(elem);
}

fn kill(&mut self, elem: T) {
self.remove(elem);
}
}

impl<T, S: GenKill<T>> GenKill<T> for MaybeReachable<S> {
fn gen_(&mut self, elem: T) {
match self {
Expand Down

0 comments on commit 6ee1a7a

Please sign in to comment.