Skip to content

Implement sorting in place for OrderMap, OrderSet #57

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ extern crate fnv;
#[macro_use]
extern crate lazy_static;

use std::hash::Hash;
use fnv::FnvHasher;
use std::hash::BuildHasherDefault;
type FnvBuilder = BuildHasherDefault<FnvHasher>;
Expand Down Expand Up @@ -361,13 +362,16 @@ fn lookup_orderedmap_10_000_noexist(b: &mut Bencher) {
// number of items to look up
const LOOKUP_MAP_SIZE: u32 = 100_000_u32;
const LOOKUP_SAMPLE_SIZE: u32 = 5000;
const SORT_MAP_SIZE: usize = 10_000;


// use lazy_static so that comparison benchmarks use the exact same inputs
lazy_static! {
static ref KEYS: Vec<u32> = {
shuffled_keys(0..LOOKUP_MAP_SIZE)
};
}

lazy_static! {
static ref HMAP_100K: HashMap<u32, u32> = {
let c = LOOKUP_MAP_SIZE;
Expand All @@ -392,6 +396,25 @@ lazy_static! {
};
}

lazy_static! {
static ref OMAP_SORT_U32: OrderMap<u32, u32> = {
let mut map = OrderMap::with_capacity(SORT_MAP_SIZE);
for &key in &KEYS[..SORT_MAP_SIZE] {
map.insert(key, key);
}
map
};
}
lazy_static! {
static ref OMAP_SORT_S: OrderMap<String, String> = {
let mut map = OrderMap::with_capacity(SORT_MAP_SIZE);
for &key in &KEYS[..SORT_MAP_SIZE] {
map.insert(format!("{:^16x}", &key), String::new());
}
map
};
}

#[bench]
fn lookup_hashmap_100_000_multi(b: &mut Bencher) {
let map = &*HMAP_100K;
Expand Down Expand Up @@ -643,3 +666,60 @@ fn many_retain_hashmap_100_000(b: &mut Bencher) {
map
});
}


// simple sort impl for comparison
pub fn simple_sort<K: Ord + Hash, V>(m: &mut OrderMap<K, V>) {
let mut ordered: Vec<_> = m.drain(..).collect();
ordered.sort_by(|left, right| left.0.cmp(&right.0));
m.extend(ordered);
}


#[bench]
fn ordermap_sort_s(b: &mut Bencher) {
let map = OMAP_SORT_S.clone();

// there's a map clone there, but it's still useful to profile this
b.iter(|| {
let mut map = map.clone();
map.sort_keys();
map
});
}

#[bench]
fn ordermap_simple_sort_s(b: &mut Bencher) {
let map = OMAP_SORT_S.clone();

// there's a map clone there, but it's still useful to profile this
b.iter(|| {
let mut map = map.clone();
simple_sort(&mut map);
map
});
}

#[bench]
fn ordermap_sort_u32(b: &mut Bencher) {
let map = OMAP_SORT_U32.clone();

// there's a map clone there, but it's still useful to profile this
b.iter(|| {
let mut map = map.clone();
map.sort_keys();
map
});
}

#[bench]
fn ordermap_simple_sort_u32(b: &mut Bencher) {
let map = OMAP_SORT_U32.clone();

// there's a map clone there, but it's still useful to profile this
b.iter(|| {
let mut map = map.clone();
simple_sort(&mut map);
map
});
}
7 changes: 1 addition & 6 deletions benches/faststring.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
#![feature(test)]
extern crate test;
extern crate rand;
extern crate fnv;
extern crate lazy_static;

use fnv::FnvHasher;
use std::hash::BuildHasherDefault;
type FnvBuilder = BuildHasherDefault<FnvHasher>;

use test::Bencher;

#[macro_use] extern crate ordermap;
extern crate ordermap;

use ordermap::OrderMap;

Expand Down
56 changes: 56 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ fn hash_elem_using<B: BuildHasher, K: ?Sized + Hash>(build: &B, k: &K) -> HashVa
#[derive(Copy, Debug)]
struct HashValue(usize);

impl HashValue {
#[inline(always)]
fn get(self) -> usize { self.0 }
}

impl Clone for HashValue {
#[inline]
fn clone(&self) -> Self { *self }
Expand Down Expand Up @@ -1044,6 +1049,57 @@ impl<K, V, S> OrderMap<K, V, S>
}
}

/// Sort the map’s key-value pairs by the default ordering of the keys.
///
/// See `sort_by` for details.
pub fn sort_keys(&mut self)
where K: Ord,
{
self.sort_by(|k1, _, k2, _| Ord::cmp(k1, k2))
}

/// Sort the map’s key-value pairs in place using the comparison
/// function `compare`.
///
/// The comparison function receives two key and value pairs to compare (you
/// can sort by keys or values or their combination as needed).
///
/// Computes in **O(n log n)** time and **O(n)** space. The sort is stable.
pub fn sort_by<F>(&mut self, mut compare: F)
where F: FnMut(&K, &V, &K, &V) -> Ordering,
{
// here we temporarily use the hash field in a bucket to store the old
// index instead.
//
// Save the old hash values in `side_index`.
// Then we can sort `self.entries` in place.
let mut side_index = Vec::from_iter(enumerate(&mut self.entries).map(|(i, elt)| {
replace(&mut elt.hash, HashValue(i)).get()
}));

self.entries.sort_by(move |ei, ej| compare(&ei.key, &ei.value, &ej.key, &ej.value));

// Here we write back the hash values from side_index and fill
// in side_index with a mapping from the old to the new index instead.
for (i, ent) in enumerate(&mut self.entries) {
let old_index = ent.hash.get();
ent.hash = HashValue(replace(&mut side_index[old_index], i));
}

// Apply new index to self.indices
dispatch_32_vs_64!(self.apply_new_index(&side_index));
}

fn apply_new_index<Sz>(&mut self, new_index: &[usize])
where Sz: Size
{
for pos in self.indices.iter_mut() {
if let Some((i, _)) = pos.resolve::<Sz>() {
pos.set_pos::<Sz>(new_index[i]);
}
}
}

/// Sort the key-value pairs of the map and return a by value iterator of
/// the key-value pairs with the result.
///
Expand Down
18 changes: 18 additions & 0 deletions src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,24 @@ impl<T, S> OrderSet<T, S>
self.map.retain(move |x, &mut ()| keep(x))
}

/// Sort the set’s values by their default ordering.
///
/// See `sort_by` for details.
pub fn sort(&mut self)
where T: Ord,
{
self.map.sort_keys()
}

/// Sort the set’s values in place using the comparison function `compare`.
///
/// Computes in **O(n log n)** time and **O(n)** space. The sort is stable.
pub fn sort_by<F>(&mut self, mut compare: F)
where F: FnMut(&T, &T) -> Ordering,
{
self.map.sort_by(move |a, _, b, _| compare(a, b));
}

/// Sort the values of the set and return a by value iterator of
/// the values with the result.
///
Expand Down
43 changes: 43 additions & 0 deletions tests/quick.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,49 @@ quickcheck! {
// check the order
itertools::assert_equal(map.keys(), initial_map.keys().filter(|&k| !remove_map.contains_key(k)));
}

fn sort_1(keyvals: Large<Vec<(i8, i8)>>) -> () {
let mut map: OrderMap<_, _> = OrderMap::from_iter(keyvals.to_vec());
let mut answer = keyvals.0;
answer.sort_by_key(|t| t.0);

// reverse dedup: Because OrderMap::from_iter keeps the last value for
// identical keys
answer.reverse();
answer.dedup_by_key(|t| t.0);
answer.reverse();

map.sort_by(|k1, _, k2, _| Ord::cmp(k1, k2));

// check it contains all the values it should
for &(key, val) in &answer {
assert_eq!(map[&key], val);
}

// check the order

let mapv = Vec::from_iter(map);
assert_eq!(answer, mapv);

}

fn sort_2(keyvals: Large<Vec<(i8, i8)>>) -> () {
let mut map: OrderMap<_, _> = OrderMap::from_iter(keyvals.to_vec());
map.sort_by(|_, v1, _, v2| Ord::cmp(v1, v2));
assert_sorted_by_key(map, |t| t.1);
}
}

fn assert_sorted_by_key<I, Key, X>(iterable: I, key: Key)
where I: IntoIterator,
I::Item: Ord + Clone + Debug,
Key: Fn(&I::Item) -> X,
X: Ord,
{
let input = Vec::from_iter(iterable);
let mut sorted = input.clone();
sorted.sort_by_key(key);
assert_eq!(input, sorted);
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
Expand Down