Skip to content

BTreeMap/BTreeSet::from_iter: use bulk building to improve the performance #88448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions library/alloc/benches/btree/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,50 @@ macro_rules! map_insert_seq_bench {
};
}

macro_rules! map_from_iter_rand_bench {
($name: ident, $n: expr, $map: ident) => {
#[bench]
pub fn $name(b: &mut Bencher) {
let n: usize = $n;
// setup
let mut rng = thread_rng();
let mut vec = Vec::with_capacity(n);

for _ in 0..n {
let i = rng.gen::<usize>() % n;
vec.push((i, i));
}

// measure
b.iter(|| {
let map: $map<_, _> = vec.iter().copied().collect();
black_box(map);
});
}
};
}

macro_rules! map_from_iter_seq_bench {
($name: ident, $n: expr, $map: ident) => {
#[bench]
pub fn $name(b: &mut Bencher) {
let n: usize = $n;
// setup
let mut vec = Vec::with_capacity(n);

for i in 0..n {
vec.push((i, i));
}

// measure
b.iter(|| {
let map: $map<_, _> = vec.iter().copied().collect();
black_box(map);
});
}
};
}

macro_rules! map_find_rand_bench {
($name: ident, $n: expr, $map: ident) => {
#[bench]
Expand Down Expand Up @@ -111,6 +155,12 @@ map_insert_rand_bench! {insert_rand_10_000, 10_000, BTreeMap}
map_insert_seq_bench! {insert_seq_100, 100, BTreeMap}
map_insert_seq_bench! {insert_seq_10_000, 10_000, BTreeMap}

map_from_iter_rand_bench! {from_iter_rand_100, 100, BTreeMap}
map_from_iter_rand_bench! {from_iter_rand_10_000, 10_000, BTreeMap}

map_from_iter_seq_bench! {from_iter_seq_100, 100, BTreeMap}
map_from_iter_seq_bench! {from_iter_seq_10_000, 10_000, BTreeMap}

map_find_rand_bench! {find_rand_100, 100, BTreeMap}
map_find_rand_bench! {find_rand_10_000, 10_000, BTreeMap}

Expand Down
47 changes: 47 additions & 0 deletions library/alloc/src/collections/btree/dedup_sorted_iter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use core::iter::Peekable;

/// A iterator for deduping the key of a sorted iterator.
/// When encountering the duplicated key, only the last key-value pair is yielded.
///
/// Used by [`BTreeMap::bulk_build_from_sorted_iter`].
pub struct DedupSortedIter<K, V, I>
where
I: Iterator<Item = (K, V)>,
{
iter: Peekable<I>,
}

impl<K, V, I> DedupSortedIter<K, V, I>
where
I: Iterator<Item = (K, V)>,
{
pub fn new(iter: I) -> Self {
Self { iter: iter.peekable() }
}
}

impl<K, V, I> Iterator for DedupSortedIter<K, V, I>
where
K: Eq,
I: Iterator<Item = (K, V)>,
{
type Item = (K, V);

fn next(&mut self) -> Option<(K, V)> {
loop {
let next = match self.iter.next() {
Some(next) => next,
None => return None,
};

let peeked = match self.iter.peek() {
Some(peeked) => peeked,
None => return Some(next),
};

if next.0 != peeked.0 {
return Some(next);
}
}
}
}
36 changes: 31 additions & 5 deletions library/alloc/src/collections/btree/map.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::vec::Vec;
use core::borrow::Borrow;
use core::cmp::Ordering;
use core::fmt::{self, Debug};
Expand All @@ -9,6 +10,7 @@ use core::ops::{Index, RangeBounds};
use core::ptr;

use super::borrow::DormantMutRef;
use super::dedup_sorted_iter::DedupSortedIter;
use super::navigate::{LazyLeafRange, LeafRange};
use super::node::{self, marker, ForceResult::*, Handle, NodeRef, Root};
use super::search::SearchResult::*;
Expand Down Expand Up @@ -1290,6 +1292,18 @@ impl<K, V> BTreeMap<K, V> {
pub fn into_values(self) -> IntoValues<K, V> {
IntoValues { inner: self.into_iter() }
}

/// Makes a `BTreeMap` from a sorted iterator.
pub(crate) fn bulk_build_from_sorted_iter<I>(iter: I) -> Self
where
K: Ord,
I: Iterator<Item = (K, V)>,
{
let mut root = Root::new();
let mut length = 0;
root.bulk_push(DedupSortedIter::new(iter), &mut length);
BTreeMap { root: Some(root), length }
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down Expand Up @@ -1914,9 +1928,15 @@ impl<K, V> FusedIterator for RangeMut<'_, K, V> {}
#[stable(feature = "rust1", since = "1.0.0")]
impl<K: Ord, V> FromIterator<(K, V)> for BTreeMap<K, V> {
fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> BTreeMap<K, V> {
let mut map = BTreeMap::new();
map.extend(iter);
map
let mut inputs: Vec<_> = iter.into_iter().collect();

if inputs.is_empty() {
return BTreeMap::new();
}

// use stable sort to preserve the insertion order.
inputs.sort_by(|a, b| a.0.cmp(&b.0));
BTreeMap::bulk_build_from_sorted_iter(inputs.into_iter())
}
}

Expand Down Expand Up @@ -2025,8 +2045,14 @@ impl<K: Ord, V, const N: usize> From<[(K, V); N]> for BTreeMap<K, V> {
/// let map2: BTreeMap<_, _> = [(1, 2), (3, 4)].into();
/// assert_eq!(map1, map2);
/// ```
fn from(arr: [(K, V); N]) -> Self {
core::array::IntoIter::new(arr).collect()
fn from(mut arr: [(K, V); N]) -> Self {
if N == 0 {
return BTreeMap::new();
}

// use stable sort to preserve the insertion order.
arr.sort_by(|a, b| a.0.cmp(&b.0));
BTreeMap::bulk_build_from_sorted_iter(core::array::IntoIter::new(arr))
}
}

Expand Down
1 change: 1 addition & 0 deletions library/alloc/src/collections/btree/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod append;
mod borrow;
mod dedup_sorted_iter;
mod fix;
pub mod map;
mod mem;
Expand Down
27 changes: 22 additions & 5 deletions library/alloc/src/collections/btree/set.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// This is pretty much entirely stolen from TreeSet, since BTreeMap has an identical interface
// to TreeMap

use crate::vec::Vec;
use core::borrow::Borrow;
use core::cmp::Ordering::{Equal, Greater, Less};
use core::cmp::{max, min};
Expand Down Expand Up @@ -1059,9 +1060,17 @@ impl<T> BTreeSet<T> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<T: Ord> FromIterator<T> for BTreeSet<T> {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> BTreeSet<T> {
let mut set = BTreeSet::new();
set.extend(iter);
set
let mut inputs: Vec<_> = iter.into_iter().collect();

if inputs.is_empty() {
return BTreeSet::new();
}

// use stable sort to preserve the insertion order.
inputs.sort();
let iter = inputs.into_iter().map(|k| (k, ()));
let map = BTreeMap::bulk_build_from_sorted_iter(iter);
BTreeSet { map }
}
}

Expand All @@ -1074,8 +1083,16 @@ impl<T: Ord, const N: usize> From<[T; N]> for BTreeSet<T> {
/// let set2: BTreeSet<_> = [1, 2, 3, 4].into();
/// assert_eq!(set1, set2);
/// ```
fn from(arr: [T; N]) -> Self {
core::array::IntoIter::new(arr).collect()
fn from(mut arr: [T; N]) -> Self {
if N == 0 {
return BTreeSet::new();
}

// use stable sort to preserve the insertion order.
arr.sort();
let iter = core::array::IntoIter::new(arr).map(|k| (k, ()));
let map = BTreeMap::bulk_build_from_sorted_iter(iter);
BTreeSet { map }
}
}

Expand Down