diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index b03fab12..647fea7c 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -6,4 +6,7 @@ add_executable(internal-iteration-benchmark internal_iteration_benchmark.cpp) target_link_libraries(internal-iteration-benchmark PUBLIC nanobench flux) add_executable(sort-benchmark sort_benchmark.cpp) -target_link_libraries(sort-benchmark PUBLIC nanobench flux) \ No newline at end of file +target_link_libraries(sort-benchmark PUBLIC nanobench flux) + +add_executable(multidimensional-memset-benchmark multidimensional_memset_benchmark.cpp multidimensional_memset_benchmark_kernels.cpp) +target_link_libraries(multidimensional-memset-benchmark PUBLIC nanobench flux) diff --git a/benchmark/multidimensional_memset_benchmark.cpp b/benchmark/multidimensional_memset_benchmark.cpp new file mode 100644 index 00000000..5174bc22 --- /dev/null +++ b/benchmark/multidimensional_memset_benchmark.cpp @@ -0,0 +1,93 @@ + +// Copyright (c) 2021 Barry Revzin +// Copyright (c) 2023 NVIDIA Corporation (reply-to: brycelelbach@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include + +#include + +#include +#include +#include + +namespace an = ankerl::nanobench; + +// Kernels are placed in a separate translation unit to prevent compilers from +// optimizing them based on the input that we'll be giving them and to make it +// easier to study their compiled assembly. +extern void memset_2d_reference(double* A, flux::distance_t N, flux::distance_t M); +extern void memset_2d_std_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M); +extern void memset_2d_flux_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M); +extern void memset_diagonal_2d_reference(double* A, flux::distance_t N, flux::distance_t M); +extern void memset_diagonal_2d_std_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M); +extern void memset_diagonal_2d_flux_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M); + +int main(int argc, char** argv) +{ + int const n_iters = argc > 1 ? std::atoi(argv[1]) : 40; + + constexpr flux::distance_t N = 1024; + constexpr flux::distance_t M = 2048; + std::vector A(N * M); + + const auto run_benchmark = + [] (auto& bench, auto& A, auto N, auto M, auto name, auto func, auto check) { + std::iota(A.begin(), A.end(), 0); + bench.run(name, [&] { func(A.data(), N, M); }); + check(A, N, M); + }; + + { + const auto check_2d = [] (auto& A, auto N, auto M) { + const auto it = std::ranges::find_if_not(A, [&] (auto e) { return e == 0.0; }); + if (it != A.end()) + throw false; + }; + + auto bench = an::Bench() + .minEpochIterations(n_iters) + .relative(true) + .performanceCounters(false); + + const auto run_2d_benchmark_impl = [&] (auto name, auto func) { + run_benchmark(bench, A, N, M, name, func, check_2d); + }; + + #define run_2d_benchmark(func) run_2d_benchmark_impl(#func, func) + + run_2d_benchmark(memset_2d_reference); + run_2d_benchmark(memset_2d_std_cartesian_product_iota); + run_2d_benchmark(memset_2d_flux_cartesian_product_iota); + } + + { + const auto check_diagonal_2d = [] (auto& A, auto N, auto M) { + for (auto i : std::views::iota(0, N)) + for (auto j : std::views::iota(0, M)) { + if (i == j) { + if (A[i * M + j] != 0.0) throw false; + } else { + if (A[i * M + j] != i * M + j) throw false; + } + } + }; + + auto bench = an::Bench() + .minEpochIterations(n_iters) + .relative(true) + .performanceCounters(false); + + const auto run_diagonal_2d_benchmark_impl = [&] (auto name, auto func) { + run_benchmark(bench, A, N, M, name, func, check_diagonal_2d); + }; + + #define run_diagonal_2d_benchmark(func) run_diagonal_2d_benchmark_impl(#func, func) + + run_diagonal_2d_benchmark(memset_diagonal_2d_reference); + run_diagonal_2d_benchmark(memset_diagonal_2d_std_cartesian_product_iota_filter); + run_diagonal_2d_benchmark(memset_diagonal_2d_flux_cartesian_product_iota_filter); + } +} diff --git a/benchmark/multidimensional_memset_benchmark_kernels.cpp b/benchmark/multidimensional_memset_benchmark_kernels.cpp new file mode 100644 index 00000000..5399ab91 --- /dev/null +++ b/benchmark/multidimensional_memset_benchmark_kernels.cpp @@ -0,0 +1,68 @@ + +// Copyright (c) 2023 NVIDIA Corporation (reply-to: brycelelbach@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include + +#include "ranges_cartesian_product.hpp" + +#include +#include + +void memset_2d_reference(double* A, flux::distance_t N, flux::distance_t M) +{ + for (flux::distance_t i = 0; i != N; ++i) + for (flux::distance_t j = 0; j != M; ++j) + A[i * M + j] = 0.0; +} + +void memset_2d_std_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M) +{ + std::ranges::for_each( + std::views::cartesian_product(std::views::iota(0, N), std::views::iota(0, M)), + flux::unpack([&] (auto i, auto j) { + A[i * M + j] = 0.0; + })); +} + +void memset_2d_flux_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M) +{ + flux::for_each( + flux::cartesian_product(flux::ints(0, N), flux::ints(0, M)), + flux::unpack([&] (auto i, auto j) { + A[i * M + j] = 0.0; + })); +} + +void memset_diagonal_2d_reference(double* A, flux::distance_t N, flux::distance_t M) +{ + for (flux::distance_t i = 0; i != N; ++i) + for (flux::distance_t j = 0; j != M; ++j) + if (i == j) A[i * M + j] = 0.0; +} + +void memset_diagonal_2d_std_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M) +{ + std::ranges::for_each( + std::views::cartesian_product(std::views::iota(0, N), std::views::iota(0, M)) + | std::views::filter(flux::unpack([] (auto i, auto j) { return i == j; })), + flux::unpack([&] (auto i, auto j) { + A[i * M + j] = 0.0; + })); +} + +void memset_diagonal_2d_flux_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M) +{ + flux::for_each( + flux::cartesian_product(flux::ints(0, N), flux::ints(0, M)) + .filter(flux::unpack([] (auto i, auto j) { return i == j; })), + flux::unpack([&] (auto i, auto j) { + A[i * M + j] = 0.0; + })); +} + diff --git a/benchmark/ranges_cartesian_product.hpp b/benchmark/ranges_cartesian_product.hpp new file mode 100644 index 00000000..430bf529 --- /dev/null +++ b/benchmark/ranges_cartesian_product.hpp @@ -0,0 +1,423 @@ + +// Copyright (c) 2015-2017 Bryce Adelstein Lelbach +// Copyright (c) 2020-2023 Corentin Jabot +// Copyright (c) 2017-2023 NVIDIA Corporation (reply-to: brycelelbach@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include + +namespace std::ranges { + +namespace detail { + +template +constexpr bool valid_cartesian_product_pack + = std::ranges::input_range && (std::ranges::forward_range && ...); + +template +concept cartesian_product_simple_view + = std::ranges::view &&std::ranges::range + && std::same_as, std::ranges::iterator_t> + && std::same_as, std::ranges::sentinel_t>; + +} // namespace detail + +template + requires(sizeof...(Ranges) == 0) || detail::valid_cartesian_product_pack +struct cartesian_product_view + : std::ranges::view_interface> +{ +private: + std::tuple bases; + +public: + constexpr cartesian_product_view() = default; + + constexpr cartesian_product_view(Ranges... base_) + : bases(std::move(base_)...) {} + + template + struct sentinel; + + template + struct iterator + { + private: + using parent = std::conditional_t< + IsConst, cartesian_product_view const, cartesian_product_view + >; + + parent* view = nullptr; + std::tuple...> its; + + template + friend struct cartesian_product_view::sentinel; + + static constexpr auto iterator_category_impl() { + if constexpr ((std::ranges::random_access_range && ...)) + return std::random_access_iterator_tag{}; + else if constexpr ((std::ranges::bidirectional_range && ...)) + return std::bidirectional_iterator_tag{}; + else if constexpr ((std::ranges::forward_range && ...)) + return std::forward_iterator_tag{}; + else if constexpr ((std::ranges::input_range && ...)) + return std::input_iterator_tag{}; + else + return std::output_iterator_tag{}; + } + + public: + using iterator_category = decltype(iterator_category_impl()); + using reference = std::tuple...>; + using value_type = std::tuple...>; + using difference_type = std::common_type_t...>; + + constexpr iterator() = default; + constexpr explicit iterator( + parent* view_, std::ranges::iterator_t... its_ + ) + : view(view_), its(std::move(its_)...) {} + + constexpr auto operator*() const + { + return std::apply( + [&](auto const&... args) { return reference{*(args)...}; } + , its + ); + } + + constexpr iterator operator++(int) + { + if constexpr ((std::ranges::forward_range && ...)) { + auto tmp = *this; + ++*this; + return tmp; + } + ++*this; + } + + constexpr iterator& operator++() + { + next(); + return *this; + } + + constexpr iterator& operator--() + requires(std::ranges::bidirectional_range && ...) + { + prev(); + return *this; + } + + constexpr iterator operator--(int) + requires(std::ranges::bidirectional_range && ...) + { + auto tmp = *this; + --*this; + return tmp; + } + + constexpr iterator& operator+=(difference_type n) + requires(std::ranges::random_access_range && ...) + { + advance(n); + return *this; + } + + constexpr iterator &operator-=(difference_type n) + requires(std::ranges::random_access_range && ...) + { + advance(-n); + return *this; + } + + friend constexpr iterator operator+(iterator i, difference_type n) + requires(std::ranges::random_access_range && ...) + { + return {i + n}; + } + + friend constexpr iterator operator+(difference_type n, iterator i) + requires(std::ranges::random_access_range && ...) + { + return {i + n}; + } + + friend constexpr iterator operator-(iterator i, difference_type n) + requires(std::ranges::random_access_range && ...) + { + return {i - n}; + } + + friend constexpr difference_type operator-( + iterator const& x, iterator const& y + ) + requires(std::ranges::random_access_range && ...) + { + return y.distance(x); + } + + constexpr decltype(auto) operator[](difference_type n) const + requires(std::ranges::random_access_range && ...) + { + return *iterator{*this + n}; + } + + constexpr bool operator==(iterator const& other) const + { + if (at_end() && other.at_end()) + return true; + return eq(*this, other); + } + + friend constexpr auto operator<=>(iterator const& x, iterator const& y) + requires( + (std::ranges::random_access_range && ...) && + (std::three_way_comparable> && ...) + ) + { + return compare(x, y); + } + + friend constexpr bool operator==(const iterator &i, sentinel const&) + { + return i.at_end(); + } + friend constexpr bool operator==(const iterator &i, sentinel const&) + { + return i.at_end(); + } + + private: + constexpr bool at_end() const + { + auto const& v = std::get<0>(view->bases); + return std::end(v) == std::get<0>(its); + } + + template + constexpr static auto compare(iterator const& a, iterator const& b) + -> std::strong_ordering + { + auto cmp = std::get(a.its) <=> std::get(b.its); + if constexpr (N + 1 < sizeof...(Ranges)) { + if (cmp == 0) + return compare(a, b); + } + return cmp; + } + + template + constexpr static bool eq(iterator const& a, iterator const& b) + { + if (std::get(a.its) != std::get(b.its)) + return false; + if constexpr (N > 0) + return eq(a, b); + return true; + } + + template + constexpr void next() + { + const auto &v = std::get(view->bases); + auto &it = std::get(its); + if (++it == std::end(v)) { + if constexpr (N != 0) { + it = std::ranges::begin(v); + next(); + } + } + } + + template + constexpr void prev() + { + const auto &v = std::get(view->bases); + auto &it = std::get(its); + if (it == std::ranges::begin(v)) + { + std::ranges::advance(it, std::ranges::end(v)); + if constexpr (N > 0) + prev(); + } + --it; + } + + template + constexpr difference_type distance(iterator const& other) const + { + if constexpr (N == 0) { + return std::get<0>(other.its) - std::get<0>(its); + } else { + const auto d = this->distance(other); + auto const scale = std::ranges::distance(std::get(view->bases)); + auto const increment = std::get(other.its) - std::get(its); + + return difference_type{(d * scale) + increment}; + } + } + + template + void advance(difference_type n) + { + if (n == 0) + return; + + auto &i = std::get(its); + auto const size = static_cast( + std::ranges::size(std::get(view->bases)) + ); + auto const first = std::ranges::begin(std::get(view->bases)); + + auto const idx = static_cast(i - first); + n += idx; + + auto div = size ? n / size : 0; + auto mod = size ? n % size : 0; + + if constexpr (N != 0) { + if (mod < 0) { + mod += size; + div--; + } + advance(div); + } else { + if (div > 0) { + mod = size; + } + } + using D = std::iter_difference_t; + i = first + static_cast(mod); + } + }; + + template + struct sentinel + { + private: + friend iterator; + friend iterator; + + using parent = std::conditional_t< + IsConst, cartesian_product_view const, cartesian_product_view + >; + + parent* view = nullptr; + std::tuple...> end; + + public: + sentinel() = default; + + constexpr explicit sentinel( + parent* view_, std::ranges::sentinel_t... end_ + ) + : view(view_), end(std::move(end_)...) { + } + + constexpr sentinel(sentinel other) + requires( + IsConst + && (std::convertible_to< + std::ranges::sentinel_t + , std::ranges::sentinel_t + > && ...) + ) + : view(other.view_), end(other.end_) {} + }; + +public: + constexpr auto size() const + requires(std::ranges::sized_range && ...) + { + return std::apply( + [] (Args const&... args) + { + using Size = std::common_type_t...>; + return (Size(std::ranges::size(args)) * ...); + } + , bases); + } + + constexpr auto begin() + requires(!detail::cartesian_product_simple_view || ...) + { + return std::apply( + [&] (auto&... args) + { + using std::ranges::begin; + return iterator{this, begin(args)...}; + } + , bases + ); + } + + constexpr auto begin() const + requires(detail::cartesian_product_simple_view && ...) + { + return std::apply( + [&] (auto const&... args) + { + using std::ranges::begin; + return iterator{this, begin(args)...}; + } + , bases + ); + } + + constexpr auto end() const + requires(std::ranges::common_range && ...) + { + return std::apply( + [&] (auto const& first, auto const&... args) + { + using std::ranges::end; + using std::ranges::begin; + return iterator(this, end(first), begin(args)...); + }, + bases + ); + } + + constexpr auto end() const + requires(!std::ranges::common_range || ...) + { + return std::apply( + [&] (auto const&... args) { + return sentinel(this, std::end(args)...); + } + , bases + ); + } +}; + +template +cartesian_product_view(Ranges&&...) -> + cartesian_product_view...>; + +namespace detail { + +struct cartesian_product_fn +{ + template + constexpr auto operator()(Ranges&&... ranges) const + { + return cartesian_product_view((Ranges&&)ranges...); + } +}; + +} // namespace detail + +namespace views { + +inline constexpr detail::cartesian_product_fn cartesian_product{}; + +} // namespace views + +} // namespace std::ranges + diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 830e9d77..546c58e2 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,7 +2,9 @@ function(ADD_EXAMPLE NAME SOURCE) add_executable(${NAME} ${SOURCE}) target_link_libraries(${NAME} flux) - add_test(NAME ${NAME} COMMAND ${NAME}) + if(NOT ${${NAME}_SKIP_TEST}) + add_test(NAME ${NAME} COMMAND ${NAME} ${${NAME}_ARGS}) + endif() endfunction() add_example(example-config-parser config_parser.cpp) @@ -10,6 +12,7 @@ add_example(example-calendar calendar.cpp) add_example(example-merge-intervals merge_intervals.cpp) add_example(example-histogram histogram.cpp) add_example(example-word-count word_count.cpp) +set(example-word-count_SKIP_TEST TRUE) add_example(example-prime-numbers prime_numbers.cpp) add_example(example-shortest-path shortest_path.cpp) add_example(example-moving-average moving_average.cpp) @@ -35,4 +38,4 @@ add_example(example-docs-set-symmetric-difference docs/set_symmetric_difference. add_example(example-docs-set-union docs/set_union.cpp) add_example(example-docs-scan-first docs/scan_first.cpp) add_example(example-docs-starts-with docs/starts_with.cpp) -add_example(example-docs-unfold docs/unfold.cpp) \ No newline at end of file +add_example(example-docs-unfold docs/unfold.cpp) diff --git a/include/flux/core/config.hpp b/include/flux/core/config.hpp index 14e6c701..d81f0903 100644 --- a/include/flux/core/config.hpp +++ b/include/flux/core/config.hpp @@ -19,6 +19,9 @@ #define FLUX_OVERFLOW_POLICY_WRAP 11 #define FLUX_OVERFLOW_POLICY_IGNORE 12 +#define FLUX_DIVIDE_BY_ZERO_POLICY_ERROR 100 +#define FLUX_DIVIDE_BY_ZERO_POLICY_IGNORE 101 + // Default error policy is terminate #define FLUX_DEFAULT_ERROR_POLICY FLUX_ERROR_POLICY_TERMINATE @@ -29,6 +32,13 @@ # define FLUX_DEFAULT_OVERFLOW_POLICY FLUX_OVERFLOW_POLICY_ERROR #endif // NDEBUG +// Default divide by zero policy is error in debug builds, ignore in release builds +#ifdef NDEBUG +# define FLUX_DEFAULT_DIVIDE_BY_ZERO_POLICY FLUX_DIVIDE_BY_ZERO_POLICY_IGNORE +#else +# define FLUX_DEFAULT_DIVIDE_BY_ZERO_POLICY FLUX_DIVIDE_BY_ZERO_POLICY_ERROR +#endif // NDEBUG + // Select which error policy to use #if defined(FLUX_TERMINATE_ON_ERROR) # define FLUX_ERROR_POLICY FLUX_ERROR_POLICY_TERMINATE @@ -63,6 +73,15 @@ # define FLUX_OVERFLOW_POLICY FLUX_DEFAULT_OVERFLOW_POLICY #endif // FLUX_ERROR_ON_OVERFLOW +// Select which overflow policy to use +#if defined(FLUX_ERROR_ON_DIVIDE_BY_ZERO) +# define FLUX_DIVIDE_BY_ZERO_POLICY FLUX_DIVIDE_BY_ZERO_POLICY_ERROR +#elif defined(FLUX_IGNORE_DIVIDE_BY_ZERO) +# define FLUX_DIVIDE_BY_ZERO_POLICY FLUX_DIVIDE_BY_ZERO_POLICY_IGNORE +#else +# define FLUX_DIVIDE_BY_ZERO_POLICY FLUX_DEFAULT_DIVIDE_BY_ZERO_POLICY +#endif // FLUX_ERROR_ON_DIVIDE_BY_ZERO + // Default int_t is ptrdiff_t #define FLUX_DEFAULT_INT_TYPE std::ptrdiff_t @@ -86,6 +105,11 @@ enum class overflow_policy { error = FLUX_OVERFLOW_POLICY_ERROR }; +enum class divide_by_zero_policy { + ignore = FLUX_DIVIDE_BY_ZERO_POLICY_IGNORE, + error = FLUX_DIVIDE_BY_ZERO_POLICY_ERROR +}; + namespace config { FLUX_EXPORT @@ -99,6 +123,9 @@ inline constexpr error_policy on_error = static_cast(FLUX_ERROR_PO FLUX_EXPORT inline constexpr overflow_policy on_overflow = static_cast(FLUX_OVERFLOW_POLICY); +FLUX_EXPORT +inline constexpr divide_by_zero_policy on_divide_by_zero = static_cast(FLUX_DIVIDE_BY_ZERO_POLICY); + FLUX_EXPORT inline constexpr bool print_error_on_terminate = FLUX_PRINT_ERROR_ON_TERMINATE; diff --git a/include/flux/core/numeric.hpp b/include/flux/core/numeric.hpp index 0fd4d310..f339edbe 100644 --- a/include/flux/core/numeric.hpp +++ b/include/flux/core/numeric.hpp @@ -171,6 +171,44 @@ inline constexpr auto checked_mul = } }; +inline constexpr auto checked_div = + [](T lhs, T rhs, + std::source_location loc = std::source_location::current()) + -> T +{ + if (std::is_constant_evaluated()) { + return lhs / rhs; + } else { + if constexpr (config::on_divide_by_zero == divide_by_zero_policy::ignore) { + return lhs / rhs; + } else { + if (rhs == 0) { + runtime_error("divide by zero", loc); + } + return lhs / rhs; + } + } +}; + +inline constexpr auto checked_mod = + [](T lhs, T rhs, + std::source_location loc = std::source_location::current()) + -> T +{ + if (std::is_constant_evaluated()) { + return lhs % rhs; + } else { + if constexpr (config::on_divide_by_zero == divide_by_zero_policy::ignore) { + return lhs % rhs; + } else { + if (rhs == 0) { + runtime_error("divide by zero", loc); + } + return lhs % rhs; + } + } +}; + } // namespace flux::num #endif diff --git a/include/flux/op/cartesian_product.hpp b/include/flux/op/cartesian_product.hpp index 82be248c..c40a22e0 100644 --- a/include/flux/op/cartesian_product.hpp +++ b/include/flux/op/cartesian_product.hpp @@ -1,5 +1,7 @@ // Copyright (c) 2022 Tristan Brindle (tcbrindle at gmail dot com) +// Copyright (c) 2023 NVIDIA Corporation (reply-to: brycelelbach@gmail.com) +// // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -7,6 +9,7 @@ #define FLUX_OP_CARTESIAN_PRODUCT_HPP_INCLUDED #include +#include #include #include @@ -88,24 +91,37 @@ struct cartesian_product_traits_base { template static constexpr auto ra_inc_impl(Self& self, cursor_type& cur, distance_t offset) - -> cursor_type& + -> cursor_type& { - auto& base = std::get(self.bases_); - - auto this_sz = flux::size(base); - auto this_offset = offset % this_sz; - auto next_offset = offset/this_sz; + if (offset == 0) + return cur; - // Adjust this cursor by the corrected offset - flux::inc(base, std::get(cur), this_offset); + auto& base = std::get(self.bases_); + const auto this_index = flux::distance(base, flux::first(base), std::get(cur)); + auto new_index = num::checked_add(this_index, offset); + auto this_size = flux::size(base); + + // If the new index overflows the maximum or underflows zero, calculate the carryover and fix it. + if (new_index < 0 || new_index >= this_size) { + offset = num::checked_div(new_index, this_size); + new_index = num::checked_mod(new_index, this_size); + + // Correct for negative index which may happen when underflowing. + if (new_index < 0) { + new_index = num::checked_add(new_index, this_size); + offset = num::checked_sub(offset, flux::distance_t(1)); + } - // Call the next level down if necessary - if constexpr (I > 0) { - if (next_offset != 0) { - ra_inc_impl(self, cur, next_offset); + // Call the next level down if necessary. + if constexpr (I > 0) { + if (offset != 0) { + ra_inc_impl(self, cur, offset); + } } } + flux::inc(base, std::get(cur), num::checked_sub(new_index, this_index)); + return cur; } @@ -157,25 +173,25 @@ struct cartesian_product_traits_base { } template - requires (bidirectional_sequence> && ...) && - (bounded_sequence> && ...) + requires (bidirectional_sequence> && ...) && + (bounded_sequence> && ...) static constexpr auto dec(Self& self, cursor_type& cur) -> cursor_type& { return dec_impl(self, cur); } template - requires (random_access_sequence> && ...) && - (sized_sequence> && ...) + requires (random_access_sequence> && ...) && + (sized_sequence> && ...) static constexpr auto inc(Self& self, cursor_type& cur, distance_t offset) - -> cursor_type& + -> cursor_type& { return ra_inc_impl(self, cur, offset); } template - requires (random_access_sequence> && ...) && - (sized_sequence> && ...) + requires (random_access_sequence> && ...) && + (sized_sequence> && ...) static constexpr auto distance(Self& self, cursor_type const& from, cursor_type const& to) -> distance_t @@ -193,7 +209,6 @@ struct cartesian_product_traits_base { } }; - } // end namespace detail template @@ -216,6 +231,33 @@ struct sequence_traits> }(std::index_sequence_for{}); } + template + static constexpr void for_each_while_impl(Self& self, + bool& keep_going, + cursor_t& cur, + Function&& func, + PartialElements&&... partial_elements) + { + // We need to iterate right to left. + if constexpr (I == sizeof...(Bases) - 1) { + std::get(cur) = flux::for_each_while(std::get(self.bases_), + [&](auto&& elem) { + keep_going = std::invoke(func, + element_t(FLUX_FWD(partial_elements)..., FLUX_FWD(elem))); + return keep_going; + }); + } else { + std::get(cur) = flux::for_each_while(std::get(self.bases_), + [&](auto&& elem) { + for_each_while_impl( + self, keep_going, cur, + func, FLUX_FWD(partial_elements)..., FLUX_FWD(elem)); + return keep_going; + }); + } + } + public: using value_type = std::tuple...>; @@ -242,6 +284,16 @@ struct sequence_traits> { return read_(flux::move_at_unchecked, self, cur); } + + template + static constexpr auto for_each_while(Self& self, Function&& func) + -> cursor_t + { + bool keep_going = true; + cursor_t cur; + for_each_while_impl<0>(self, keep_going, cur, FLUX_FWD(func)); + return cur; + } }; FLUX_EXPORT inline constexpr auto cartesian_product = detail::cartesian_product_fn{}; diff --git a/test/test_cartesian_product.cpp b/test/test_cartesian_product.cpp index ea0aa152..d22723d2 100644 --- a/test/test_cartesian_product.cpp +++ b/test/test_cartesian_product.cpp @@ -1,8 +1,15 @@ +// Copyright (c) 2022 Tristan Brindle (tcbrindle at gmail dot com) +// Copyright (c) 2023 NVIDIA Corporation (reply-to: brycelelbach@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + #include "catch.hpp" #include #include +#include #include #include #include @@ -17,9 +24,58 @@ namespace { constexpr bool test_cartesian_product() { + // 1D `cartesian_product`. + { + auto cart = flux::cartesian_product(std::array{100, 200, 300}); + + using C = decltype(cart); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(flux::bidirectional_sequence); + static_assert(flux::random_access_sequence); + static_assert(not flux::contiguous_sequence); + static_assert(flux::bounded_sequence); + static_assert(flux::sized_sequence); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(flux::bidirectional_sequence); + static_assert(flux::random_access_sequence); + static_assert(not flux::contiguous_sequence); + static_assert(flux::bounded_sequence); + static_assert(flux::sized_sequence); + + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + + STATIC_CHECK(flux::size(cart) == 3); + + STATIC_CHECK(check_equal(cart, {std::tuple{100}, std::tuple{200}, std::tuple{300}})); + + STATIC_CHECK(flux::distance(cart, cart.first(), cart.last()) == 3); + + { + auto cur = flux::next(cart, cart.first(), 2); + STATIC_CHECK(cart[cur] == std::tuple{300}); + flux::inc(cart, cur, -2); + STATIC_CHECK(cart[cur] == std::tuple{100}); + } + + int sum = 0; + cart.for_each(flux::unpack([&] (int i) { sum += i; })); + STATIC_CHECK(sum == 100 + 200 + 300); + } + + // 2D `cartesian_product` with lvalue references. { std::array arr1{100, 200}; - std::array arr2{1.0f, 2.0f}; + std::array arr2{true, false}; auto cart = flux::cartesian_product(flux::mut_ref(arr1), flux::mut_ref(arr2)); @@ -41,32 +97,42 @@ constexpr bool test_cartesian_product() static_assert(flux::bounded_sequence); static_assert(flux::sized_sequence); - static_assert(std::same_as, std::tuple>); - static_assert(std::same_as, std::tuple>); - static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); - static_assert(std::same_as, std::tuple>); - static_assert(std::same_as, std::tuple>); - static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); STATIC_CHECK(flux::size(cart) == 2 * 2); STATIC_CHECK(check_equal(cart, { - std::tuple{100, 1.0f}, std::tuple{100, 2.0f}, - std::tuple{200, 1.0f}, std::tuple{200, 2.0f} })); + std::tuple{100, true}, std::tuple{100, false}, + std::tuple{200, true}, std::tuple{200, false} })); STATIC_CHECK(flux::distance(cart, cart.first(), cart.last()) == 2 * 2); { auto cur = flux::next(cart, cart.first(), 2); - STATIC_CHECK(cart[cur] == std::tuple{200, 1.0f}); + STATIC_CHECK(cart[cur] == std::tuple{200, true}); flux::inc(cart, cur, -2); - STATIC_CHECK(cart[cur] == std::tuple{100, 1.0f}); + STATIC_CHECK(cart[cur] == std::tuple{100, true}); } + + int sum_i = 0; + int sum_j = 0; + cart.for_each(flux::unpack([&] (int i, bool j) { + sum_i += i; + sum_j += j; + })); + STATIC_CHECK(sum_i == 2 * (100 + 200)); + STATIC_CHECK(sum_j == 2); } + // 2D `cartesian_product` with rvalue references and temporaries. { - auto cart = flux::cartesian_product(std::array{100, 200}, std::array{1.0f, 2.0f}); + auto cart = flux::cartesian_product(std::array{100, 200}, std::array{true, false}); using C = decltype(cart); @@ -86,31 +152,340 @@ constexpr bool test_cartesian_product() static_assert(flux::bounded_sequence); static_assert(flux::sized_sequence); - static_assert(std::same_as, std::tuple>); - static_assert(std::same_as, std::tuple>); - static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); - static_assert(std::same_as, std::tuple>); - static_assert(std::same_as, std::tuple>); - static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); STATIC_CHECK(flux::size(cart) == 2 * 2); STATIC_CHECK(check_equal(cart, { - std::tuple{100, 1.0f}, std::tuple{100, 2.0f}, - std::tuple{200, 1.0f}, std::tuple{200, 2.0f} })); + std::tuple{100, true}, std::tuple{100, false}, + std::tuple{200, true}, std::tuple{200, false} })); STATIC_CHECK(flux::distance(cart, cart.first(), cart.last()) == 2 * 2); { auto cur = flux::next(cart, cart.first(), 2); - STATIC_CHECK(cart[cur] == std::tuple{200, 1.0f}); + STATIC_CHECK(cart[cur] == std::tuple{200, true}); flux::inc(cart, cur, -2); - STATIC_CHECK(cart[cur] == std::tuple{100, 1.0f}); + STATIC_CHECK(cart[cur] == std::tuple{100, true}); + } + + int sum_i = 0; + int sum_j = 0; + cart.for_each(flux::unpack([&] (int i, bool j) { + sum_i += i; + sum_j += j; + })); + STATIC_CHECK(sum_i == 2 * (100 + 200)); + STATIC_CHECK(sum_j == 2); + } + + // 3D `cartesian_product`. + { + std::array arr1{100, 200}; + std::array arr2{true, false, true, false}; + std::array arr3{1ULL, 2ULL, 4ULL}; + + auto cart = flux::cartesian_product(flux::mut_ref(arr1), flux::mut_ref(arr2), flux::mut_ref(arr3)); + + using C = decltype(cart); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(flux::bidirectional_sequence); + static_assert(flux::random_access_sequence); + static_assert(not flux::contiguous_sequence); + static_assert(flux::bounded_sequence); + static_assert(flux::sized_sequence); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(flux::bidirectional_sequence); + static_assert(flux::random_access_sequence); + static_assert(not flux::contiguous_sequence); + static_assert(flux::bounded_sequence); + static_assert(flux::sized_sequence); + + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + + STATIC_CHECK(flux::size(cart) == 2 * 4 * 3); + + STATIC_CHECK(check_equal(cart, { + std::tuple{100, true, 1ULL}, + std::tuple{100, true, 2ULL}, + std::tuple{100, true, 4ULL}, + std::tuple{100, false, 1ULL}, + std::tuple{100, false, 2ULL}, + std::tuple{100, false, 4ULL}, + std::tuple{100, true, 1ULL}, + std::tuple{100, true, 2ULL}, + std::tuple{100, true, 4ULL}, + std::tuple{100, false, 1ULL}, + std::tuple{100, false, 2ULL}, + std::tuple{100, false, 4ULL}, + std::tuple{200, true, 1ULL}, + std::tuple{200, true, 2ULL}, + std::tuple{200, true, 4ULL}, + std::tuple{200, false, 1ULL}, + std::tuple{200, false, 2ULL}, + std::tuple{200, false, 4ULL}, + std::tuple{200, true, 1ULL}, + std::tuple{200, true, 2ULL}, + std::tuple{200, true, 4ULL}, + std::tuple{200, false, 1ULL}, + std::tuple{200, false, 2ULL}, + std::tuple{200, false, 4ULL} + })); + + STATIC_CHECK(flux::distance(cart, cart.first(), cart.last()) == 2 * 4 * 3); + + { + auto cur = flux::next(cart, cart.first(), 3); + STATIC_CHECK(cart[cur] == std::tuple{100, false, 1ULL}); + flux::inc(cart, cur, -3); + STATIC_CHECK(cart[cur] == std::tuple{100, true, 1ULL}); + } + + int sum_i = 0; + int sum_j = 0; + unsigned long long sum_k = 0; + cart.for_each(flux::unpack([&] (int i, bool j, unsigned long long k) { + sum_i += i; + sum_j += j; + sum_k += k; + })); + STATIC_CHECK(sum_i == 12 * (100 + 200)); + STATIC_CHECK(sum_j == 12); + STATIC_CHECK(sum_k == 8ULL * (1ULL + 2ULL + 4ULL)); + } + + // Higher dimension `cartesian_product`. + { + std::array arr{100, 200}; + + auto cart = flux::cartesian_product( + flux::mut_ref(arr), + flux::mut_ref(arr), + flux::mut_ref(arr), + flux::mut_ref(arr), + flux::mut_ref(arr), + flux::mut_ref(arr) + ); + + using C = decltype(cart); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(flux::bidirectional_sequence); + static_assert(flux::random_access_sequence); + static_assert(not flux::contiguous_sequence); + static_assert(flux::bounded_sequence); + static_assert(flux::sized_sequence); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(flux::bidirectional_sequence); + static_assert(flux::random_access_sequence); + static_assert(not flux::contiguous_sequence); + static_assert(flux::bounded_sequence); + static_assert(flux::sized_sequence); + + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + + STATIC_CHECK(flux::size(cart) == 2 * 2 * 2 * 2 * 2 * 2); + + STATIC_CHECK(flux::distance(cart, cart.first(), cart.last()) == 2 * 2 * 2 * 2 * 2 * 2); + + { + auto cur = flux::next(cart, cart.first(), 3); + STATIC_CHECK(cart[cur] == std::tuple{100, 100, 100, 100, 200, 200}); + flux::inc(cart, cur, -3); + STATIC_CHECK(cart[cur] == std::tuple{100, 100, 100, 100, 100, 100}); + } + + int sum_a = 0; + int sum_b = 0; + int sum_c = 0; + int sum_d = 0; + int sum_e = 0; + int sum_f = 0; + cart.for_each(flux::unpack([&] (int a, int b, int c, int d, int e, int f) { + sum_a += a; + sum_b += b; + sum_c += c; + sum_d += d; + sum_e += e; + sum_f += f; + })); + STATIC_CHECK(sum_a == 32 * (100 + 200)); + STATIC_CHECK(sum_b == 32 * (100 + 200)); + STATIC_CHECK(sum_c == 32 * (100 + 200)); + STATIC_CHECK(sum_d == 32 * (100 + 200)); + STATIC_CHECK(sum_e == 32 * (100 + 200)); + STATIC_CHECK(sum_f == 32 * (100 + 200)); + } + + // `cartesian_product` of `iota`/`ints`. + { + auto cart = flux::cartesian_product(flux::ints(0, 4), flux::ints(0, 2), flux::ints(0, 3)); + + using C = decltype(cart); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(flux::bidirectional_sequence); + static_assert(flux::random_access_sequence); + static_assert(not flux::contiguous_sequence); + static_assert(flux::bounded_sequence); + static_assert(flux::sized_sequence); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(flux::bidirectional_sequence); + static_assert(flux::random_access_sequence); + static_assert(not flux::contiguous_sequence); + static_assert(flux::bounded_sequence); + static_assert(flux::sized_sequence); + + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + static_assert(std::same_as, std::tuple>); + + STATIC_CHECK(flux::size(cart) == 4 * 2 * 3); + + STATIC_CHECK(check_equal(cart, { + std::tuple{0, 0, 0}, + std::tuple{0, 0, 1}, + std::tuple{0, 0, 2}, + std::tuple{0, 1, 0}, + std::tuple{0, 1, 1}, + std::tuple{0, 1, 2}, + std::tuple{1, 0, 0}, + std::tuple{1, 0, 1}, + std::tuple{1, 0, 2}, + std::tuple{1, 1, 0}, + std::tuple{1, 1, 1}, + std::tuple{1, 1, 2}, + std::tuple{2, 0, 0}, + std::tuple{2, 0, 1}, + std::tuple{2, 0, 2}, + std::tuple{2, 1, 0}, + std::tuple{2, 1, 1}, + std::tuple{2, 1, 2}, + std::tuple{3, 0, 0}, + std::tuple{3, 0, 1}, + std::tuple{3, 0, 2}, + std::tuple{3, 1, 0}, + std::tuple{3, 1, 1}, + std::tuple{3, 1, 2}, + })); + + STATIC_CHECK(flux::distance(cart, cart.first(), cart.last()) == 4 * 2 * 3); + + { + STATIC_CHECK(flux::next(cart, cart.first(), 6) == std::tuple{1, 0, 0}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 6), 1) == std::tuple{1, 0, 1}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 6), 2) == std::tuple{1, 0, 2}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 6), 3) == std::tuple{1, 1, 0}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 6), 4) == std::tuple{1, 1, 1}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 6), 5) == std::tuple{1, 1, 2}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 6), -1) == std::tuple{0, 1, 2}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 6), -2) == std::tuple{0, 1, 1}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 6), -3) == std::tuple{0, 1, 0}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 6), -4) == std::tuple{0, 0, 2}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 6), -5) == std::tuple{0, 0, 1}); + + STATIC_CHECK(flux::next(cart, cart.first(), 11) == std::tuple{1, 1, 2}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 11), 1) == std::tuple{2, 0, 0}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 11), 2) == std::tuple{2, 0, 1}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 11), 3) == std::tuple{2, 0, 2}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 11), 4) == std::tuple{2, 1, 0}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 11), 5) == std::tuple{2, 1, 1}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 11), -1) == std::tuple{1, 1, 1}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 11), -2) == std::tuple{1, 1, 0}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 11), -3) == std::tuple{1, 0, 2}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 11), -4) == std::tuple{1, 0, 1}); + STATIC_CHECK(flux::next(cart, flux::next(cart, cart.first(), 11), -5) == std::tuple{1, 0, 0}); } + + flux::distance_t sum_i = 0; + flux::distance_t sum_j = 0; + flux::distance_t sum_k = 0; + cart.for_each(flux::unpack([&] (flux::distance_t i, flux::distance_t j, flux::distance_t k) { + sum_i += i; + sum_j += j; + sum_k += k; + })); + constexpr auto triangular_number = [] (auto n) { return (n * (n + 1)) / 2; }; + STATIC_CHECK(sum_i == triangular_number(4 - 1) * 2 * 3); + STATIC_CHECK(sum_j == 4 * triangular_number(2 - 1) * 3); + STATIC_CHECK(sum_k == 4 * 2 * triangular_number(3 - 1)); } - // Test unpack() + // `cartesian_product` `for_each` element type. + { + struct T {}; + + auto cart = flux::cartesian_product(std::array{100, 200}, std::array{T{}, T{}}); + + int sum_i = 0; + int count_j = 0; + cart.for_each(flux::unpack([&] (int i, T) { + sum_i += i; + count_j += 1; + })); + STATIC_CHECK(sum_i == 2 * (100 + 200)); + STATIC_CHECK(count_j == 4); + } + + // `cartesian_product` `for_each_while` short circuits. + { + auto cart = flux::cartesian_product(std::array{100, 200}, std::array{300, 0}); + + int count = 0; + cart.for_each_while(flux::unpack([&] (auto, auto j) { + ++count; + return j != 0; + })); + STATIC_CHECK(count == 2); + } + + // `cartesian_product` with a zero-sized sequence produces an empty sequence. + { + auto cart = flux::cartesian_product(std::array{1, 2, 3, 4, 5}, + flux::empty); + + static_assert(flux::bidirectional_sequence); + + STATIC_CHECK(cart.is_empty()); + + int s = 0; + cart.for_each(flux::unpack([&s](int i, int) { s += i; })); + STATIC_CHECK(s == 0); + } + + // `cartesian_product` with `unpack`. { int vals[3][3] = {}; @@ -124,7 +499,6 @@ constexpr bool test_cartesian_product() STATIC_CHECK(vals[i][j] == 100); } } - } return true; @@ -136,4 +510,4 @@ static_assert(test_cartesian_product()); TEST_CASE("cartesian_product") { REQUIRE(test_cartesian_product()); -} \ No newline at end of file +} diff --git a/test/test_utils.hpp b/test/test_utils.hpp index 7c2e82a7..cc69b79c 100644 --- a/test/test_utils.hpp +++ b/test/test_utils.hpp @@ -1,5 +1,7 @@ // Copyright (c) 2022 Tristan Brindle (tcbrindle at gmail dot com) +// Copyright (c) 2023 NVIDIA Corporation (reply-to: brycelelbach@gmail.com) +// // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -9,7 +11,7 @@ #include #include -#define STATIC_CHECK(...) if (!(__VA_ARGS__)) return false +#define STATIC_CHECK(...) if (!(__VA_ARGS__)) throw false inline namespace test_utils {