celerity
diff --git a/‎src/algorithms/fill.h
Lines changed: 2 additions & 2 deletions b/‎src/algorithms/fill.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/algorithms/for_each.h
Lines changed: 2 additions & 2 deletions b/‎src/algorithms/for_each.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/algorithms/generate.h
Lines changed: 10 additions & 12 deletions b/‎src/algorithms/generate.h
Lines changed: 10 additions & 12 deletions
diff --git a/‎src/algorithms/transform.h
Lines changed: 12 additions & 10 deletions b/‎src/algorithms/transform.h
Lines changed: 12 additions & 10 deletions
diff --git a/‎src/fusion.h
Lines changed: 51 additions & 53 deletions b/‎src/fusion.h
Lines changed: 51 additions & 53 deletions
@@ -23,8 +23,8 @@ auto fill_impl(IteratorType<T, Rank> beg, IteratorType<T, Rank> end, const T &va
     return [=](celerity::handler &cgh) {
         auto out_acc = get_access<policy_type, mode::write, one_to_one>(cgh, beg, end);
 
-        return [=](item_context<Rank, T> &ctx) {
-            out_acc[ctx[0]] = value;
+        return [=](item_context<Rank, T(void)> &ctx) {
+            out_acc[ctx.get_out()] = value;
         };
     };
 }
 
@@ -26,8 +26,8 @@ auto for_each_impl(InIterator<T, Rank> beg, InIterator<T, Rank> end, const F &f)
     return [=](celerity::handler &cgh) {
         auto in_acc = get_access<policy_type, cl::sycl::access::mode::read, accessor_type>(cgh, beg, end);
 
-        return [=](item_context<Rank, T> &ctx) {
-            f(ctx[0], in_acc[ctx[0]]);
+        return [=](item_context<Rank, void(T)> &ctx) {
+            f(ctx.get_item(), in_acc[ctx.get_in()]);
         };
     };
 }
 
@@ -25,18 +25,16 @@ auto generate_impl(IteratorType<T, Rank> beg, IteratorType<T, Rank> end, const F
     return [=](celerity::handler &cgh) {
         auto out_acc = get_access<policy_type, mode::write, one_to_one>(cgh, beg, end);
 
-        if constexpr (traits::arity_v<F> == 1)
-        {
-            return [=](item_context<Rank, T> &ctx) {
-                out_acc[ctx[0]] = f(ctx[0]);
-            };
-        }
-        else
-        {
-            return [=](item_context<Rank, T> &ctx) {
-                out_acc[ctx[0]] = f();
-            };
-        }
+        return [=](item_context<Rank, T()> &ctx) {
+            if constexpr (traits::arity_v<F> == 1)
+            {
+                out_acc[ctx.get_out()] = f(ctx.get_item());
+            }
+            else
+            {
+                out_acc[ctx.get_out()] = f();
+            }
+        };
     };
 }
 
 
@@ -28,8 +28,8 @@ auto transform_impl(InIterator<T, Rank> beg, InIterator<T, Rank> end, OutIterato
         auto in_acc = get_access<policy_type, mode::read, accessor_type>(cgh, beg, end);
         auto out_acc = get_access<policy_type, mode::discard_write, one_to_one>(cgh, out, out);
 
-        return [=](item_context<Rank, T> &ctx) {
-            out_acc[ctx[0]] = f(in_acc[ctx[0]]);
+        return [=](item_context<Rank, U(T)> &ctx) {
+            out_acc[ctx.get_out()] = f(in_acc[ctx.get_in()]);
         };
     };
 }
@@ -48,8 +48,8 @@ auto transform_impl(InIterator<T, Rank> beg, InIterator<T, Rank> end, OutIterato
         auto in_acc = get_access<policy_type, mode::read, accessor_type>(cgh, beg, end);
         auto out_acc = get_access<policy_type, mode::write, one_to_one>(cgh, out, out);
 
-        return [=](item_context<Rank, T> &ctx) {
-            out_acc[ctx[0]] = f(ctx.get_item(), in_acc[ctx[0]]);
+        return [=](item_context<Rank, U(T)> &ctx) {
+            out_acc[ctx.get_out()] = f(ctx.get_item(), in_acc[ctx.get_in()]);
         };
     };
 }
@@ -61,12 +61,13 @@ template <typename ExecutionPolicy,
           typename F,
           typename T,
           typename U,
+          typename V,
           int Rank,
           require<traits::function_traits<F>::arity == 2> = yes>
 auto transform_impl(FirstInputIteratorType<T, Rank> beg,
                     FirstInputIteratorType<T, Rank> end,
                     SecondInputIteratorType<U, Rank> beg2,
-                    OutputIteratorType<T, Rank> out,
+                    OutputIteratorType<V, Rank> out,
                     const F &f)
 {
     using namespace traits;
@@ -82,8 +83,8 @@ auto transform_impl(FirstInputIteratorType<T, Rank> beg,
         auto out_acc = get_access<policy_type, mode::discard_write, one_to_one>(cgh, out, out);
 
         // TODO: item_context needs to fit for both T and U
-        return [=](item_context<Rank, T> &ctx) {
-            out_acc[ctx[0]] = f(first_in_acc[ctx[0]], second_in_acc[ctx[1]]);
+        return [=](item_context<Rank, V(T, U)> &ctx) {
+            out_acc[ctx.get_out()] = f(first_in_acc[ctx.template get_in<0>()], second_in_acc[ctx.template get_in<1>()]);
         };
     };
 }
@@ -95,12 +96,13 @@ template <typename ExecutionPolicy,
           typename F,
           typename T,
           typename U,
+          typename V,
           int Rank,
           require<traits::function_traits<F>::arity == 3> = yes>
 auto transform_impl(FirstInputIteratorType<T, Rank> beg,
                     FirstInputIteratorType<T, Rank> end,
                     SecondInputIteratorType<U, Rank> beg2,
-                    OutputIteratorType<T, Rank> out, const F &f)
+                    OutputIteratorType<V, Rank> out, const F &f)
 {
     using namespace traits;
     using namespace cl::sycl::access;
@@ -115,8 +117,8 @@ auto transform_impl(FirstInputIteratorType<T, Rank> beg,
         auto out_acc = get_access<policy_type, mode::discard_write, one_to_one>(cgh, out, out);
 
         // TODO: item_context needs to fit for both T and U
-        return [=](item_context<Rank, T> &ctx) {
-            out_acc[ctx[0]] = f(ctx.get_item(), first_in_acc[ctx[0]], second_in_acc[ctx[1]]);
+        return [=](item_context<Rank, V(T, U)> &ctx) {
+            out_acc[ctx.get_out()] = f(ctx.get_item(), first_in_acc[ctx.template get_in<0>()], second_in_acc[ctx.template get_in<1>()]);
         };
     };
 }
 
@@ -27,16 +27,28 @@ auto fuse(task_t<ExecutionPolicyA, KernelA> a, task_t<ExecutionPolicyB, KernelB>
     using new_execution_policy = named_distributed_execution_policy<
         indexed_kernel_name_t<fused<ExecutionPolicyA, ExecutionPolicyB>>>;
 
-    using kernel_type = std::invoke_result_t<decltype(a.get_sequence()), handler &>;
-    using item_type = traits::arg_type_t<kernel_type, 0>;
+    using kernel_a_type = std::invoke_result_t<decltype(a.get_sequence()), handler &>;
+    using context_a_type = std::decay_t<traits::arg_type_t<kernel_a_type, 0>>;
 
-    auto seq = a.get_sequence() | b.get_sequence();
+    using kernel_b_type = std::invoke_result_t<decltype(b.get_sequence()), handler &>;
+    using context_b_type = std::decay_t<traits::arg_type_t<kernel_b_type, 0>>;
+
+    using combined_context_type = combined_context_t<context_a_type, context_b_type>;
 
     auto f = [=](handler &cgh) {
-        const auto kernels = sequence(std::invoke(seq, cgh));
+        const auto kernels_a = sequence(std::invoke(a.get_sequence(), cgh));
+        const auto kernels_b = sequence(std::invoke(b.get_sequence(), cgh));
+
+        return [=](combined_context_type &ctx) {
+            context_a_type ctx_a(ctx.get_item());
+            ctx_a.copy_in(ctx);
+
+            kernels_a(ctx_a);
 
-        return [=](item_type item) {
-            kernels(item);
+            context_b_type ctx_b{ctx_a, ctx};
+            kernels_b(ctx_b);
+
+            ctx.copy_out(ctx_b);
         };
     };
 
@@ -60,8 +72,16 @@ auto fuse(task_t<ExecutionPolicyA, KernelA> a,
     using new_execution_policy = named_distributed_execution_policy<
         indexed_kernel_name_t<fused<fused<ExecutionPolicyA, ExecutionPolicyB>, ExecutionPolicyC>>>;
 
-    using kernel_type = std::invoke_result_t<decltype(a.get_sequence()), handler &>;
-    using item_type = traits::arg_type_t<kernel_type, 0>;
+    using kernel_a_type = std::invoke_result_t<decltype(a.get_sequence()), handler &>;
+    using context_a_type = std::decay_t<traits::arg_type_t<kernel_a_type, 0>>;
+
+    using kernel_b_type = std::invoke_result_t<decltype(b.get_sequence()), handler &>;
+    using context_b_type = std::decay_t<traits::arg_type_t<kernel_b_type, 0>>;
+
+    using kernel_c_type = std::invoke_result_t<decltype(c.get_sequence()), handler &>;
+    using context_c_type = std::decay_t<traits::arg_type_t<kernel_c_type, 0>>;
+
+    using combined_context_type = combined_context_t<context_a_type, context_c_type>;
 
     auto seq_a = a.get_sequence();
     auto seq_b = b.get_sequence();
@@ -72,29 +92,18 @@ auto fuse(task_t<ExecutionPolicyA, KernelA> a,
         const auto kernels_b = sequence(std::invoke(seq_b, cgh));
         const auto kernels_c = sequence(std::invoke(seq_c, cgh));
 
-        return [=](item_type item) {
-            kernels_a(item);
-            // data[0] = result of a
-            // data[1] = empty
-
-            // switch item context so that
-            // the b-kernels write to the
-            // second data store
-            item.switch_data();
-            // data[0] = empty
-            // data[1] = result of a
-
-            kernels_b(item);
-            // data[0] = result of b
-            // data[1] = result of a
-
-            // switch back to normal
-            // data[0] = result of a
-            // data[1] = result of b
-            item.switch_data();
-
-            kernels_c(item);
-            // result of c written to buffer
+        return [=](combined_context_type &ctx) {
+            context_a_type ctx_a{ctx.get_item()};
+            ctx_a.copy_in(ctx);
+            kernels_a(ctx_a);
+
+            context_b_type ctx_b{ctx.get_item()};
+            kernels_b(ctx_b);
+
+            context_c_type ctx_c{ctx_a, ctx_b};
+            kernels_c(ctx_c);
+
+            ctx.copy_out(ctx_c);
         };
     };
 
@@ -116,8 +125,11 @@ auto fuse_right(task_t<ExecutionPolicyB, KernelB> b,
     using new_execution_policy = named_distributed_execution_policy<
         indexed_kernel_name_t<fused<ExecutionPolicyB, ExecutionPolicyC>>>;
 
-    using kernel_type = std::invoke_result_t<decltype(b.get_sequence()), handler &>;
-    using item_type = traits::arg_type_t<kernel_type, 0>;
+    using kernel_b_type = std::invoke_result_t<decltype(b.get_sequence()), handler &>;
+    using context_b_type = std::decay_t<traits::arg_type_t<kernel_b_type, 0>>;
+
+    using kernel_c_type = std::invoke_result_t<decltype(c.get_sequence()), handler &>;
+    using context_c_type = std::decay_t<traits::arg_type_t<kernel_c_type, 0>>;
 
     auto seq_b = b.get_sequence();
     auto seq_c = c.get_sequence();
@@ -126,26 +138,12 @@ auto fuse_right(task_t<ExecutionPolicyB, KernelB> b,
         const auto kernels_b = sequence(std::invoke(seq_b, cgh));
         const auto kernels_c = sequence(std::invoke(seq_c, cgh));
 
-        return [=](item_type item) {
-            // switch item context so that
-            // the b-kernels write to the
-            // second data store
-            item.switch_data();
-            // data[0] = empty
-            // data[1] = empty
-
-            kernels_b(item);
-            // data[0] = result of b
-            // data[1] = empty
-
-            // switch back to normal
-            // data[0] = empty
-            // data[1] = result of b
-            item.switch_data();
-
-            kernels_c(item);
-            // data[0] = result of c
-            // data[1] = empty
+        return [=](context_c_type &ctx_c) {
+            context_b_type ctx_b{ctx_c.get_item()};
+            kernels_b(ctx_b);
+
+            ctx_c.template get_in<1>() = ctx_b.get_out();
+            kernels_c(ctx_c);
         };
     };