Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize flat container implementations to less specific regions #518

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ timely = {workspace = true}

[workspace.dependencies]
#timely = { version = "0.12", default-features = false }
timely = { git = "https://github.com/TimelyDataflow/timely-dataflow", default-features = false }
timely = { git = "https://github.com/antiguru/timely-dataflow", branch = "flatcontainer_storage", default-features = false }
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#timely = { path = "../timely-dataflow/timely/", default-features = false }

[features]
Expand Down
66 changes: 39 additions & 27 deletions src/consolidation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ use std::cmp::Ordering;
use std::collections::VecDeque;
use timely::Container;
use timely::container::{ContainerBuilder, PushInto, SizableContainer};
use timely::container::flatcontainer::{FlatStack, Push, Region};
use timely::container::flatcontainer::impls::tuple::{TupleABCRegion, TupleABRegion};
use crate::Data;
use crate::difference::{IsZero, Semigroup};
use crate::trace::cursor::IntoOwned;
Expand Down Expand Up @@ -156,7 +154,7 @@ where
// TODO: Can we replace `multiple` by a bool?
#[cold]
fn consolidate_and_flush_through(&mut self, multiple: usize) {
let preferred_capacity = <Vec<(D,T,R)>>::preferred_capacity();
let preferred_capacity = <Vec<(D,T,R)> as SizableContainer>::preferred_capacity();
consolidate_updates(&mut self.current);
let mut drain = self.current.drain(..(self.current.len()/multiple)*multiple).peekable();
while drain.peek().is_some() {
Expand All @@ -180,7 +178,7 @@ where
/// Precondition: `current` is not allocated or has space for at least one element.
#[inline]
fn push_into(&mut self, item: P) {
let preferred_capacity = <Vec<(D,T,R)>>::preferred_capacity();
let preferred_capacity = <Vec<(D,T,R)> as SizableContainer>::preferred_capacity();
if self.current.capacity() < preferred_capacity * 2 {
self.current.reserve(preferred_capacity * 2 - self.current.capacity());
}
Expand Down Expand Up @@ -280,31 +278,45 @@ where
}
}

impl<K, V, T, R> ConsolidateLayout for FlatStack<TupleABCRegion<TupleABRegion<K, V>, T, R>>
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is important.

where
for<'a> K: Region + Push<<K as Region>::ReadItem<'a>> + Clone + 'static,
for<'a> K::ReadItem<'a>: Ord + Copy,
for<'a> V: Region + Push<<V as Region>::ReadItem<'a>> + Clone + 'static,
for<'a> V::ReadItem<'a>: Ord + Copy,
for<'a> T: Region + Push<<T as Region>::ReadItem<'a>> + Clone + 'static,
for<'a> T::ReadItem<'a>: Ord + Copy,
R: Region + Push<<R as Region>::Owned> + Clone + 'static,
for<'a> R::Owned: Semigroup<R::ReadItem<'a>>,
{
type Key<'a> = (K::ReadItem<'a>, V::ReadItem<'a>, T::ReadItem<'a>) where Self: 'a;
type Diff<'a> = R::ReadItem<'a> where Self: 'a;
type DiffOwned = R::Owned;

fn into_parts(((key, val), time, diff): Self::Item<'_>) -> (Self::Key<'_>, Self::Diff<'_>) {
((key, val, time), diff)
}
mod flatcontainer {
use std::cmp::Ordering;

use timely::container::flatcontainer::{FlatStack, Push, Region};
use timely::container::flatcontainer::impls::index::IndexContainer;

use crate::consolidation::ConsolidateLayout;
use crate::difference::Semigroup;
use crate::trace::implementations::merge_batcher_flat::RegionUpdate;

impl<R, S> ConsolidateLayout for FlatStack<R, S>
where
R: RegionUpdate
+ Region
+ Clone
+ for<'a> Push<((R::Key<'a>, R::Val<'a>), R::Time<'a>, R::DiffOwned)>
+ 'static,
for<'a> R::DiffOwned: Semigroup<R::Diff<'a>>,
for<'a> R::ReadItem<'a>: Copy,
S: IndexContainer<R::Index> + Clone + 'static,
{
type Key<'a> = (R::Key<'a>, R::Val<'a>, R::Time<'a>) where Self: 'a;
type Diff<'a> = R::Diff<'a> where Self: 'a;
type DiffOwned = R::DiffOwned;

fn into_parts(item: Self::Item<'_>) -> (Self::Key<'_>, Self::Diff<'_>) {
let (key, val, time, diff) = R::into_parts(item);
((key, val, time), diff)
}

fn cmp<'a>(((key1, val1), time1, _diff1): &Self::Item<'_>, ((key2, val2), time2, _diff2): &Self::Item<'_>) -> Ordering {
(K::reborrow(*key1), V::reborrow(*val1), T::reborrow(*time1)).cmp(&(K::reborrow(*key2), V::reborrow(*val2), T::reborrow(*time2)))
}
fn cmp<'a>(item1: &Self::Item<'_>, item2: &Self::Item<'_>) -> Ordering {
let (key1, val1, time1, _diff1) = R::into_parts(*item1);
let (key2, val2, time2, _diff2) = R::into_parts(*item2);
(R::reborrow_key(key1), R::reborrow_val(val1), R::reborrow_time(time1)).cmp(&(R::reborrow_key(key2), R::reborrow_val(val2), R::reborrow_time(time2)))
}

fn push_with_diff(&mut self, (key, value, time): Self::Key<'_>, diff: Self::DiffOwned) {
self.copy(((key, value), time, diff));
fn push_with_diff(&mut self, (key, value, time): Self::Key<'_>, diff: Self::DiffOwned) {
self.copy(((key, value), time, diff));
}
}
}

Expand Down
17 changes: 8 additions & 9 deletions src/trace/implementations/chunker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use std::collections::VecDeque;
use timely::communication::message::RefOrMut;
use timely::Container;
use timely::container::columnation::{Columnation, TimelyStack};
use timely::container::{ContainerBuilder, PushInto, SizableContainer};
use timely::container::{CapacityContainer, ContainerBuilder, PushInto};

use crate::consolidation::{consolidate_updates, consolidate_container, ConsolidateLayout};
use crate::difference::Semigroup;

Expand Down Expand Up @@ -291,21 +292,19 @@ where
impl<'a, Input, Output> PushInto<RefOrMut<'a, Input>> for ContainerChunker<Output>
where
Input: Container,
Output: SizableContainer
+ ConsolidateLayout
Output: ConsolidateLayout
+ CapacityContainer
+ PushInto<Input::Item<'a>>
+ PushInto<Input::ItemRef<'a>>,
{
fn push_into(&mut self, container: RefOrMut<'a, Input>) {
if self.pending.capacity() < Output::preferred_capacity() {
self.pending.reserve(Output::preferred_capacity() - self.pending.len());
}
self.pending.ensure_preferred_capacity();
let form_batch = |this: &mut Self| {
if this.pending.len() == this.pending.capacity() {
if this.pending.len() >= Output::preferred_capacity() {
consolidate_container(&mut this.pending, &mut this.empty);
std::mem::swap(&mut this.pending, &mut this.empty);
this.empty.clear();
if this.pending.len() > this.pending.capacity() / 2 {
if this.pending.len() > Output::preferred_capacity() / 2 {
// Note that we're pushing non-full containers, which is a deviation from
// other implementation. The reason for this is that we cannot extract
// partial data from `this.pending`. We should revisit this in the future.
Expand All @@ -332,7 +331,7 @@ where

impl<Output> ContainerBuilder for ContainerChunker<Output>
where
Output: SizableContainer + ConsolidateLayout,
Output: ConsolidateLayout,
{
type Container = Output;

Expand Down
Loading