From 36459f42a84207330eae706c47e6fab712e6a149 Mon Sep 17 00:00:00 2001 From: Daniel Anderson Date: Fri, 16 Feb 2024 22:18:42 -0500 Subject: [PATCH] Rewrite relocation to be compliant with P1144 (#67) [Relocation](https://quuxplusone.github.io/blog/2018/07/18/announcing-trivially-relocatable/) now mostly follows the API proposed in [P1144](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p1144r9.html). Parlay will also defer the implementation of relocation operations to the compiler/library if they are present, which currently works on Arthur O'Dwyer's LLVM fork [here](https://github.com/Quuxplusone/llvm-project). --- CMakeLists.txt | 3 +- benchmark/bench_sequence.cpp | 58 +++ include/parlay/alloc.h | 7 +- include/parlay/internal/bucket_sort.h | 4 +- include/parlay/internal/collect_reduce.h | 2 +- include/parlay/internal/counting_sort.h | 2 +- include/parlay/internal/delayed/filter.h | 2 +- include/parlay/internal/delayed/filter_op.h | 2 +- include/parlay/internal/integer_sort.h | 14 +- include/parlay/internal/merge_sort.h | 2 +- include/parlay/internal/sample_sort.h | 2 +- include/parlay/internal/sequence_base.h | 11 +- .../parlay/internal/uninitialized_iterator.h | 159 +++++++ .../parlay/internal/uninitialized_sequence.h | 2 +- .../parlay/internal/uninitialized_storage.h | 2 +- include/parlay/primitives.h | 4 +- include/parlay/relocation.h | 205 +++++----- include/parlay/sequence.h | 312 ++++++-------- include/parlay/type_traits.h | 236 +++++++++-- include/parlay/utilities.h | 92 ++++- test/test_counting_sort.cpp | 7 - test/test_integer_sort.cpp | 31 +- test/test_relocate.cpp | 387 ++++++------------ test/test_sample_sort.cpp | 6 - test/test_sequence.cpp | 29 +- 25 files changed, 922 insertions(+), 659 deletions(-) create mode 100644 include/parlay/internal/uninitialized_iterator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b9a2cab..59388488 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,8 @@ # ------------------------------------------------------------------- cmake_minimum_required(VERSION 3.14) -project(PARLAY VERSION 2.2.4 + +project(PARLAY VERSION 2.3.1 DESCRIPTION "A collection of parallel algorithms and other support for parallelism in C++" LANGUAGES CXX) diff --git a/benchmark/bench_sequence.cpp b/benchmark/bench_sequence.cpp index 6b6f052a..91533eb9 100644 --- a/benchmark/bench_sequence.cpp +++ b/benchmark/bench_sequence.cpp @@ -27,6 +27,61 @@ static void bench_short_subscript(benchmark::State& state) { } } +static void bench_grow_int64(benchmark::State& state) { + parlay::sequence s; + for (auto _ : state) { + state.PauseTiming(); + s = parlay::sequence(10000000); + state.ResumeTiming(); + s.reserve(s.capacity() + 1); // Trigger grow + } +} + +// No annotation needed since this one should be detectable +struct Relocatable { + std::unique_ptr x; + Relocatable() = default; + Relocatable(int x_) : x(std::make_unique(x_)) { } +}; + +#if defined(PARLAY_MUST_SPECIALIZE_IS_TRIVIALLY_RELOCATABLE) +namespace parlay { +template<> +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(Relocatable); +} +#endif + +static_assert(parlay::is_trivially_relocatable_v); + +struct NotRelocatable { + std::unique_ptr x; + NotRelocatable() = default; + NotRelocatable(int x_) : x(std::make_unique(x_)) { } + NotRelocatable(NotRelocatable&& other) noexcept : x(std::move(other.x)) { } + ~NotRelocatable() { } +}; +static_assert(!parlay::is_trivially_relocatable_v); + +static void bench_grow_relocatable(benchmark::State& state) { + parlay::sequence s; + for (auto _ : state) { + state.PauseTiming(); + s = parlay::sequence(10000000); + state.ResumeTiming(); + s.reserve(s.capacity() + 1); // Trigger grow + } +} + +static void bench_grow_nonrelocatable(benchmark::State& state) { + parlay::sequence s; + for (auto _ : state) { + state.PauseTiming(); + s = parlay::sequence(10000000); + state.ResumeTiming(); + s.reserve(s.capacity() + 1); // Trigger grow + } +} + // ------------------------- Registration ------------------------------- #define BENCH(NAME) BENCHMARK(bench_ ## NAME) \ @@ -35,3 +90,6 @@ static void bench_short_subscript(benchmark::State& state) { BENCH(subscript); BENCH(short_subscript); +BENCH(grow_int64); +BENCH(grow_relocatable); +BENCH(grow_nonrelocatable); diff --git a/include/parlay/alloc.h b/include/parlay/alloc.h index 68c37e9d..7d919814 100644 --- a/include/parlay/alloc.h +++ b/include/parlay/alloc.h @@ -183,8 +183,11 @@ struct allocator { template /* implicit */ constexpr allocator(const allocator&) noexcept { } }; -template -struct is_trivially_relocatable> : std::true_type {}; +// Allocator should be trivially copyable since it is stateless and has no user-provided copy +// constructor. This should guarantee that it is also trivially relocatable. +static_assert(std::is_trivially_copyable_v>); +static_assert(is_trivially_relocatable_v>); + template bool operator==(const allocator&, const allocator&) { return true; } diff --git a/include/parlay/internal/bucket_sort.h b/include/parlay/internal/bucket_sort.h index 3665748d..8700fe52 100644 --- a/include/parlay/internal/bucket_sort.h +++ b/include/parlay/internal/bucket_sort.h @@ -42,7 +42,7 @@ void radix_step_(slice A, for (size_t j = n; j > 0; j--) { auto x = --counts[keys[j-1]]; - uninitialized_relocate(&B[x], &A[j-1]); + relocate_at(&A[j - 1], &B[x]); } } @@ -128,7 +128,7 @@ void base_sort(slice in, else { quicksort(in.begin(), in.size(), f); if (!inplace) { - uninitialized_relocate_n(out.begin(), in.begin(), in.size()); + parlay::uninitialized_relocate(in.begin(), in.end(), out.begin()); } } } diff --git a/include/parlay/internal/collect_reduce.h b/include/parlay/internal/collect_reduce.h index 48f27ae4..49ea7f90 100644 --- a/include/parlay/internal/collect_reduce.h +++ b/include/parlay/internal/collect_reduce.h @@ -271,7 +271,7 @@ auto seq_collect_reduce_sparse(Slice A, Helper const &helper) { auto r = r_s.begin(); size_t j = 0; for (size_t i = 0; i < table_size; i++) - if (flags[i]) uninitialized_relocate(&r[j++], &table[i]); + if (flags[i]) relocate_at(&table[i], &r[j++]); assert(j == count); return r_s; } diff --git a/include/parlay/internal/counting_sort.h b/include/parlay/internal/counting_sort.h index 28d1e5a0..e7466ea7 100644 --- a/include/parlay/internal/counting_sort.h +++ b/include/parlay/internal/counting_sort.h @@ -320,7 +320,7 @@ auto count_sort_inplace(slice In, KeyS const& Keys, size using value_type = typename slice::value_type; auto Tmp = uninitialized_sequence(In.size()); auto a = count_sort(In, make_slice(Tmp), make_slice(Keys), num_buckets); - uninitialized_relocate_n(In.begin(), Tmp.begin(), In.size()); + parlay::uninitialized_relocate(Tmp.begin(), Tmp.end(), In.begin()); return a.first; } diff --git a/include/parlay/internal/delayed/filter.h b/include/parlay/internal/delayed/filter.h index 5be7832a..f73b50cd 100644 --- a/include/parlay/internal/delayed/filter.h +++ b/include/parlay/internal/delayed/filter.h @@ -96,7 +96,7 @@ struct block_delayed_filter_t : } } auto res = sequence::uninitialized(n); - uninitialized_relocate_n(res.begin(), temp.begin(), n); + parlay::uninitialized_relocate_n(temp.begin(), n, res.begin()); return res; } diff --git a/include/parlay/internal/delayed/filter_op.h b/include/parlay/internal/delayed/filter_op.h index e8336fb7..4884bd77 100644 --- a/include/parlay/internal/delayed/filter_op.h +++ b/include/parlay/internal/delayed/filter_op.h @@ -85,7 +85,7 @@ struct block_delayed_filter_op_t : } } auto res = sequence::uninitialized(n); - uninitialized_relocate_n(res.begin(), temp.begin(), n); + parlay::uninitialized_relocate_n(temp.begin(), n, res.begin()); return res; } diff --git a/include/parlay/internal/integer_sort.h b/include/parlay/internal/integer_sort.h index cc8c67fb..f2fd4529 100644 --- a/include/parlay/internal/integer_sort.h +++ b/include/parlay/internal/integer_sort.h @@ -80,10 +80,10 @@ void seq_radix_sort_(slice In, } if (swapped && inplace) { - uninitialized_relocate_n(In.begin(), Out.begin(), In.size()); + parlay::uninitialized_relocate(Out.begin(), Out.end(), In.begin()); } else if (!swapped && !inplace) { - uninitialized_relocate_n(Out.begin(), In.begin(), Out.size()); + parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin()); } } @@ -105,10 +105,10 @@ void seq_radix_sort(slice In, size_t n = In.size(); if (odd) { // We could just use assign_dispatch(Tmp[i], In[i]) for each i, but we - // can optimize better by calling destructive_move_slice, since this + // can optimize better by calling uninitialized_relocate, since this // has the ability to memcpy multiple elements at once if constexpr (std::is_same_v) { - uninitialized_relocate_n(Tmp.begin(), In.begin(), Tmp.size()); + parlay::uninitialized_relocate(In.begin(), In.end(), Tmp.begin()); } else { for (size_t i = 0; i < n; i++) @@ -117,7 +117,7 @@ void seq_radix_sort(slice In, seq_radix_sort_(Tmp, Out, g, key_bits, false); } else { if constexpr (std::is_same_v) { - uninitialized_relocate_n(Out.begin(), In.begin(), Out.size()); + parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin()); } else { for (size_t i = 0; i < n; i++) @@ -219,7 +219,7 @@ sequence integer_sort_r(slice In, // uninitialized_relocate_n, which can memcpy multiple elements at a time // to save on performing every copy individually. if constexpr (std::is_same_v) { - uninitialized_relocate_n(Out.begin(), In.begin(), Out.size()); + parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin()); } else { parallel_for(0, In.size(), [&](size_t i) { @@ -248,7 +248,7 @@ sequence integer_sort_r(slice In, if constexpr (inplace_tag::value == true) { if (!one_bucket) { - uninitialized_relocate_n(In.begin(), Out.begin(), In.size()); + parlay::uninitialized_relocate(Out.begin(), Out.end(), In.begin()); } } diff --git a/include/parlay/internal/merge_sort.h b/include/parlay/internal/merge_sort.h index 629f7086..87162ec5 100644 --- a/include/parlay/internal/merge_sort.h +++ b/include/parlay/internal/merge_sort.h @@ -28,7 +28,7 @@ void merge_sort_(slice In, insertion_sort(In.begin(), In.size(), f); if (!inplace) { for (size_t i = 0; i < In.size(); i++) { - uninitialized_relocate(&Out[i], &In[i]); + relocate_at(&In[i], &Out[i]); } } } diff --git a/include/parlay/internal/sample_sort.h b/include/parlay/internal/sample_sort.h index 59175e0d..06c9893c 100644 --- a/include/parlay/internal/sample_sort.h +++ b/include/parlay/internal/sample_sort.h @@ -178,7 +178,7 @@ void sample_sort_inplace_(slice In, // Sample block is already sorted, so we don't need to sort it again. // We can just move it straight over into the other sorted blocks - uninitialized_relocate_n(Tmp.begin(), sample_set.begin(), sample_set_size); + parlay::uninitialized_relocate(sample_set.begin(), sample_set.end(), Tmp.begin()); // move data from blocks to buckets auto bucket_offsets = diff --git a/include/parlay/internal/sequence_base.h b/include/parlay/internal/sequence_base.h index 49732af9..1f85a472 100644 --- a/include/parlay/internal/sequence_base.h +++ b/include/parlay/internal/sequence_base.h @@ -561,7 +561,16 @@ struct alignas(uint64_t) sequence_base { auto n = size(); auto dest_buffer = new_buffer.data(); auto current_buffer = data(); - uninitialized_relocate_n_a(dest_buffer, current_buffer, n, *this); + + if constexpr (is_trivial_allocator_v) { + parlay::uninitialized_relocate_n(current_buffer, n, dest_buffer); + } + else { + parallel_for(0, n, [&](size_t i){ + std::allocator_traits::construct(alloc, std::addressof(dest_buffer[i]), std::move(current_buffer[i])); + std::allocator_traits::destroy(alloc, std::addressof(current_buffer[i])); + }); + } // Destroy the old stuff if (!is_small()) { diff --git a/include/parlay/internal/uninitialized_iterator.h b/include/parlay/internal/uninitialized_iterator.h new file mode 100644 index 00000000..408ccd86 --- /dev/null +++ b/include/parlay/internal/uninitialized_iterator.h @@ -0,0 +1,159 @@ + +#ifndef PARLAY_INTERNAL_UNINITIALIZED_ITERATOR_H_ +#define PARLAY_INTERNAL_UNINITIALIZED_ITERATOR_H_ + +#include +#include + +#include "../range.h" +#include "../type_traits.h" + +namespace parlay { +namespace internal { + +// Given a container of uninitialized, you can wrap its iterators with +// uninitialized_iterator_adaptor to get an iterator whose value type is T! +// +// The resulting iterator will have the same iterator category as Iterator. +template +class uninitialized_iterator_adaptor { + public: + using iterator_category = parlay::iterator_category_t; + using difference_type = parlay::iterator_difference_type_t; + using value_type = decltype(std::declval>().value); + using reference = std::add_lvalue_reference_t; + using pointer = std::add_pointer_t; + + explicit uninitialized_iterator_adaptor(Iterator it_) : it(it_) {} + + reference operator*() const { return it->value; } + + pointer operator->() const { return std::addressof(it->value); } + + uninitialized_iterator_adaptor& operator++() { + ++it; + return *this; + } + + friend void swap(uninitialized_iterator_adaptor& left, uninitialized_iterator_adaptor& right) noexcept { + std::swap(left.it, right.it); + } + + // ------------------------ Enabled if input iterator ------------------------ + + template + auto operator++(int) + -> std::enable_if_t, uninitialized_iterator_adaptor> { + auto tmp = *this; + ++(*this); + return tmp; + } + + template + auto operator==(const uninitialized_iterator_adaptor& other) const + -> std::enable_if_t, bool> { + return it == other.it; + } + + template + auto operator!=(const uninitialized_iterator_adaptor& other) const + -> std::enable_if_t, bool> { + return it != other.it; + } + + // ------------------------ Enabled if forward iterator ------------------------ + + // Can't SFINAE special member functions so this is close enough until C++20 + template, int> = 0> + uninitialized_iterator_adaptor() : it{} {} + + // ------------------------ Enabled if bidirectional iterator ------------------------ + + template + auto operator--() -> std::enable_if_t, uninitialized_iterator_adaptor&> { + it--; + return *this; + } + + template + auto operator--(int) -> std::enable_if_t, uninitialized_iterator_adaptor> { + auto tmp = *this; + --(*this); + return tmp; + } + + // ------------------------ Enabled if random-access iterator ------------------------ + + template + auto operator+=(difference_type diff) + -> std::enable_if_t, uninitialized_iterator_adaptor&> { + it += diff; + return *this; + } + + template + auto operator+(difference_type diff) const + -> std::enable_if_t, uninitialized_iterator_adaptor> { + auto result = *this; + result += diff; + return result; + } + + template + auto operator-=(difference_type diff) + -> std::enable_if_t, uninitialized_iterator_adaptor&> { + it -= diff; + return *this; + } + + template + auto operator-(difference_type diff) const + -> std::enable_if_t, uninitialized_iterator_adaptor> { + auto result = *this; + result -= diff; + return result; + } + + template + auto operator-(const uninitialized_iterator_adaptor& other) const + -> std::enable_if_t, difference_type> { + return it - other.it; + } + + template + auto operator[](std::size_t p) const -> std::enable_if_t, reference> { + return it[p].value; + } + + template + auto operator<(const uninitialized_iterator_adaptor& other) const + -> std::enable_if_t, bool> { + return it < other.it; + } + + template + auto operator<=(const uninitialized_iterator_adaptor& other) const + -> std::enable_if_t, bool> { + return it <= other.it; + } + + template + auto operator>(const uninitialized_iterator_adaptor& other) const + -> std::enable_if_t, bool> { + return it > other.it; + } + + template + auto operator>=(const uninitialized_iterator_adaptor& other) const + -> std::enable_if_t, bool> { + return it >= other.it; + } + + private: + Iterator it; +}; + +} // namespace internal +} // namespace parlay + +#endif // PARLAY_INTERNAL_UNINITIALIZED_ITERATOR_H_ diff --git a/include/parlay/internal/uninitialized_sequence.h b/include/parlay/internal/uninitialized_sequence.h index dcfc9c8b..1d24560b 100644 --- a/include/parlay/internal/uninitialized_sequence.h +++ b/include/parlay/internal/uninitialized_sequence.h @@ -152,7 +152,7 @@ class uninitialized_sequence { const_reverse_iterator crbegin() const { return std::make_reverse_iterator(cend()); } const_reverse_iterator crend() const { return std::make_reverse_iterator(cbegin()); } - void swap(uninitialized_sequence& other) { + void swap(uninitialized_sequence& other) noexcept { std::swap(impl.n, other.impl.n); std::swap(impl.data, other.impl.data); } diff --git a/include/parlay/internal/uninitialized_storage.h b/include/parlay/internal/uninitialized_storage.h index 0359ae25..17bed4f6 100644 --- a/include/parlay/internal/uninitialized_storage.h +++ b/include/parlay/internal/uninitialized_storage.h @@ -18,7 +18,7 @@ namespace internal { template class uninitialized_storage { using value_type = T; - typename std::aligned_storage::type storage; + alignas(T) char storage [sizeof(T)]; public: uninitialized_storage() { diff --git a/include/parlay/primitives.h b/include/parlay/primitives.h index 5e5aa35f..962f5576 100644 --- a/include/parlay/primitives.h +++ b/include/parlay/primitives.h @@ -1011,7 +1011,7 @@ auto flatten(sequence>&& r) { size_t len = internal::scan_inplace(make_slice(offsets), plus()); auto res = sequence::uninitialized(len); parallel_for(0, parlay::size(r), [&, it = std::begin(r)](size_t i) { - uninitialized_relocate_n(std::begin(res)+offsets[i], std::begin(it[i]), it[i].size()); + parlay::uninitialized_relocate(std::begin(it[i]), std::end(it[i]), std::begin(res)+offsets[i]); clear_relocated(it[i]); }); r.clear(); @@ -1279,7 +1279,7 @@ auto kth_smallest_copy(Range&& in, size_t k, Compare&& less = {}) { auto [offsets, total] = parlay::scan(sums); assert(total == n); - auto id = static_cast(std::upper_bound(offsets.begin(), offsets.end(), k) - offsets.begin() - 1); + auto id = static_cast(std::upper_bound(offsets.begin(), offsets.end(), k) - offsets.begin()) - 1; auto bucket_length = ((id == offsets.size() - 1) ? total : offsets[id+1]) - offsets[id]; // Grab the contents of the bucket containing the k'th element. Exclude the pivot diff --git a/include/parlay/relocation.h b/include/parlay/relocation.h index c6e3fe31..08038278 100644 --- a/include/parlay/relocation.h +++ b/include/parlay/relocation.h @@ -20,146 +20,133 @@ namespace parlay { /* - Relocation (a.k.a. "destructive move") - - The relocation of object a into memory b is equivalent to a move - construction of a into b, followed by the destruction of what - remains at a. In other words, it is - - new (b) T(std::move(*a)); - a->~T(); - - For many types, however, this can be optimized by replacing it with just - - std::memcpy(b, a, sizeof(T)); - - We call any such types trivially relocatable. This is clearly - true for any trivial type, but it also turns out to be true - for most other standard types, such as vectors, unique_ptrs, - and more. The key observation is that the only reason that - the move operations of these types is non-trivial is because - they must clear out the source object after moving from it. - If, however, the source object is treated as uninitialized - memory after relocation, then it does not have to be cleared - out, since its destructor will not run. + Range-based Relocation A strong motivating use case for relocation is in dynamically sized containers (e.g. vector, or parlay::sequence). When - performing a resize operation, one has to move all of the - contents of the old buffer into the new one, and destroy - the contents of the old buffer, like so (ignoring allocator - details) + performing a resize operation, one has to move the contents + of the old buffer into the new one, and destroy the contents + of the old buffer, like so (ignoring allocator details) - parallel_for(0, n, [&](size_t i) { - new (&new_buffer[i]) std::move(current_buffer[i])); - current_buffer[i].~value_type(); - }); + parallel_for(0, n, [&](size_t i) { + ::new (voidify(new_buffer[i])) std::move(current_buffer[i])); + std::destroy_at(std::addressof(current_buffer[i])); + }); - This can be replaced with + If current_buffer and new_buffer contain the same type (should + always be true for a sequence container resize operation), then + this can be replaced with - parallel_for(0, n, [&](size_t i) { - uninitialized_relocate(&new_buffer[i], ¤t_buffer[i]); - }); + parallel_for(0, n, [&](size_t i) { + relocate_at(std::addressof(current_buffer[i]), std::addressof(new_buffer[i])); + }); - or even, for better performance yet + However, it may be even more efficient to move chunks of objects in parallel, + so for better performance, you can write - uninitialized_relocate_n(new_buffer, current_buffer, n); + uninitialized_relocate_n(current_buffer, n, new_buffer); The uninitialized_relocate functions will use the optimized memcpy-based approach for any types for which it is suitable, and otherwise, will fall back to the generic approach. */ -// Relocate the given range of n elements [from, from + n) into uninitialized -// memory at [to, to + n), such that both the source and destination memory -// were allocated by the given allocator. -template -inline void uninitialized_relocate_n_a(It1 to, It2 from, size_t n, Alloc& alloc) { - using T = typename std::iterator_traits::value_type; - static_assert(std::is_same_v::value_type, T>); - static_assert(std::is_same_v::value_type, T>); - - constexpr bool trivially_relocatable = is_trivially_relocatable_v; - constexpr bool trivial_alloc = is_trivial_allocator_v; - constexpr bool contiguous = is_contiguous_iterator_v && is_contiguous_iterator_v; - constexpr bool random_access = is_random_access_iterator_v && is_random_access_iterator_v; - - // The most efficient scenario -- The objects are trivially relocatable, the allocator - // has no special behaviour, and the iterators point to contiguous memory so we can - // memcpy chunks of more than one T object at a time. - if constexpr (trivially_relocatable && contiguous && trivial_alloc) { +// Relocate from source to dest, which may be of different +// (but compatible) types, which is equivalent to +// +// ::new (voidify(dest)) T(std::move(source)); +// std::destroy_at(std::addressof(source)); +// +template +inline void relocate_or_move_and_destroy(T& source, U& dest) { + static_assert(std::is_constructible_v>); + static_assert(std::is_destructible_v); + + if constexpr (std::is_same_v) { + relocate_at(std::addressof(source), std::addressof(dest)); + } + else { + PARLAY_ASSERT_UNINITIALIZED(dest); + ::new (voidify(dest)) T(std::move(source)); + std::destroy_at(std::addressof(source)); + PARLAY_ASSERT_UNINITIALIZED(source); + } +} + +// Relocate the given range of n elements [first, first + n) into uninitialized +// memory at [result, result + n). +template +inline std::pair uninitialized_relocate_n( + InputIterator first, Size n, NoThrowForwardIterator result) { + + static_assert(is_input_iterator_v); + static_assert(is_forward_iterator_v); + + using T = iterator_value_type_t; + + constexpr bool trivially_relocatable = is_trivially_relocatable_v && + std::is_same_v, T>; + constexpr bool contiguous = is_contiguous_iterator_v && + is_contiguous_iterator_v; + constexpr bool random_access = is_random_access_iterator_v && + is_random_access_iterator_v; + + // The most efficient scenario -- The objects are trivially relocatable and the iterators + // point to contiguous memory so, we can memcpy chunks of more than one T object at a time. + if constexpr (contiguous && trivially_relocatable) { constexpr size_t chunk_size = 1024 * sizeof(size_t) / sizeof(T); const size_t n_chunks = (n + chunk_size - 1) / chunk_size; parallel_for(0, n_chunks, [&](size_t i) { size_t n_objects = (std::min)(chunk_size, n - i * chunk_size); +#if defined(__cpp_lib_trivially_relocatable) + std::uninitialized_relocate_n(first + i * chunk_size, n_objects, result + i * chunk_size); +#else size_t n_bytes = sizeof(T) * n_objects; - void* src = static_cast(std::addressof(*(from + i * chunk_size))); - void* dest = static_cast(std::addressof(*(to + i * chunk_size))); + void* src = voidify(*(first + i * chunk_size)); + void* dest = voidify(*(result + i * chunk_size)); std::memcpy(dest, src, n_bytes); +#endif }, 1); - // The next best thing -- If the objects are trivially relocatable and the allocator - // has no special behaviour, so long as the iterators are random access, we can still - // relocate everything in parallel, just not by memcpying multiple objects at a time - } else if constexpr (trivially_relocatable && random_access && trivial_alloc) { - constexpr size_t chunk_size = 1024 * sizeof(size_t) / sizeof(T); - const size_t n_chunks = (n + chunk_size - 1) / chunk_size; - parallel_for(0, n_chunks, [&](size_t i) { - for (size_t j = 0; j < chunk_size && (j + i *chunk_size < n); j++) { - void* src = static_cast(std::addressof(from[j + i * chunk_size])); - void* dest = static_cast(std::addressof(to[j + i * chunk_size])); - std::memcpy(dest, src, sizeof(T)); - } - }, 1); - } - // The iterators are not random access, but we can still relocate, just not in parallel - else if constexpr (trivially_relocatable && trivial_alloc) { - for (size_t i = 0; i < n; i++) { - std::memcpy(std::addressof(*to), std::addressof(*from), sizeof(T)); - to++; - from++; - } + return {first + n, result + n}; } - // After this point, the object can not be trivially relocated, either because it is not - // trivially relocatable, or because the allocator has specialized behaviour. We now fall - // back to just doing a "destructive move" manually. + // The next best thing -- if the iterators are random access we can still relocate everything in + // parallel, just not by memcpying multiple objects at a time else if constexpr (random_access) { - static_assert(std::is_move_constructible::value); - static_assert(std::is_destructible::value); - parallel_for(0, n, [&](size_t i) { - PARLAY_ASSERT_UNINITIALIZED(to[i]); - std::allocator_traits::construct(alloc, std::addressof(to[i]), std::move(from[i])); - std::allocator_traits::destroy(alloc, std::addressof(from[i])); - PARLAY_ASSERT_UNINITIALIZED(from[i]); + parallel_for(0, n, [&](size_t i){ + relocate_or_move_and_destroy(first[i], result[i]); }); + return {first + n, result + n}; } - // The worst case. No parallelism and no fast relocation. + // No parallelism allowed! else { - static_assert(std::is_move_constructible::value); - static_assert(std::is_destructible::value); - for (size_t i = 0; i < n; i++) { - PARLAY_ASSERT_UNINITIALIZED(*to); - std::allocator_traits::construct(alloc, std::addressof(*to), std::move(*from)); - std::allocator_traits::destroy(alloc, std::addressof(*from)); - PARLAY_ASSERT_UNINITIALIZED(*from); - to++; - from++; + for (; n > 0; ++result, (void)++first, --n) { + // Note: Dereferencing result is UB since it points to uninitialized memory, but + // I don't think that is avoidable until we get C++20 with std::to_address. + relocate_or_move_and_destroy(*first, *result); } + return {first, result}; } } -// Relocate the given range of n elements [from, from + n) into uninitialized -// memory at [to, to + n). Relocation is done as if the memory was allocated -// by a standard allocator (e.g. std::allocator, parlay::allocator, malloc) -// -// For an allocator-aware version that respects the construction and destruction -// behaviour of the allocator, use uninitialized_relocate_n_a. -template -inline void uninitialized_relocate_n(Iterator1 to, Iterator2 from, size_t n) { - using T = typename std::iterator_traits::value_type; - std::allocator a; - uninitialized_relocate_n_a(to, from, n, a); -} +template +NoThrowForwardIterator uninitialized_relocate(InputIterator first, InputIterator last, NoThrowForwardIterator result) { + static_assert(parlay::is_input_iterator_v); + static_assert(parlay::is_forward_iterator_v); + + if constexpr (is_random_access_iterator_v) { + return parlay::uninitialized_relocate_n(first, std::distance(first, last), result).second; + } + else { + for (; first != last; ++result, (void)++first) { + // Note: Dereferencing result is UB since it points to uninitialized memory, but + // I don't think that is avoidable until we get C++20 with std::to_address. + relocate_or_move_and_destroy(*first, *result); + } + return result; + } } +} // namespace parlay + #endif // PARLAY_RELOCATION_H_ diff --git a/include/parlay/sequence.h b/include/parlay/sequence.h index 03bd1903..4970214f 100644 --- a/include/parlay/sequence.h +++ b/include/parlay/sequence.h @@ -67,7 +67,7 @@ using sequence_default_allocator = std::allocator; // EnableSSO: true to enable small-size optimization // template, bool EnableSSO = std::is_same::value> -class sequence : protected sequence_internal::sequence_base { +class PARLAY_TRIVIALLY_RELOCATABLE sequence : protected sequence_internal::sequence_base { static_assert(std::is_same_v, T>, "sequences must have a non-const, non-volatile value_type"); static_assert(std::is_same_v, T>, "sequences must not have an array, reference, or function value_type"); @@ -152,7 +152,11 @@ class sequence : protected sequence_internal::sequence_base - sequence(Iterator_ i, Iterator_ j) : sequence_base_type() { - initialize_dispatch(i, j, std::is_integral()); + // Constructs a sequence consisting of the elements in the given iterator range + template, int> = 0> + sequence(Iterator_ first, Iterator_ last) : sequence_base_type() { + initialize_range(first, last); } // Constructs a sequence from the elements of the given initializer list // // Note: cppcheck flags all implicit constructors. This one is okay since // we want to convert initializer lists into sequences. - sequence(std::initializer_list l) : - sequence_base_type() { // cppcheck-suppress noExplicitConstructor - initialize_range(std::begin(l), std::end(l), - typename std::iterator_traits::iterator_category()); + sequence(std::initializer_list l) : sequence_base_type() { // cppcheck-suppress noExplicitConstructor + initialize_range(std::begin(l), std::end(l)); } sequence_type& operator=(std::initializer_list l) { storage.clear(); - initialize_range(std::begin(l), std::end(l), - typename std::iterator_traits::iterator_category()); + initialize_range(std::begin(l), std::end(l)); return *this; } @@ -237,13 +238,12 @@ class sequence : protected sequence_internal::sequence_base(args)...); - move_append(the_tail); + append(std::move(the_tail)); return it; } } @@ -260,40 +260,53 @@ class sequence : protected sequence_internal::sequence_base + iterator append(size_t n, const value_type& v) { + return append_n(n, v); + } + + template, int> = 0> iterator append(Iterator_ first, Iterator_ last) { - return append_dispatch(first, last, std::is_integral()); + return append_range(first, last); } - template + template, int> = 0> iterator append(R&& r) { - return append(std::begin(r), std::end(r)); + return append_range(std::begin(r), std::end(r)); } - // Append the given sequence r. Since r is an rvalue, we can - // move its elements instead of copying them. Furthermore, if - // the current sequence is empty and doesn't own a large buffer, - // we can simply move assign the entire sequence r + // Append the given sequence r. Since r is an rvalue, we can relocate its elements directly + // instead of copying them. Furthermore, if the current sequence is empty and doesn't own a + // large buffer, we can simply move assign the entire sequence r iterator append(sequence_type&& r) { + // Note: We check the capacity because an append should never cause the capacity to decrease, + // since this could throw away a large reserve and lead to unexpected reallocations. if (empty() && capacity() <= r.size()) { *this = std::move(r); return begin(); } else { - return append(std::make_move_iterator(std::begin(r)), std::make_move_iterator(std::end(r))); + auto new_size = size() + r.size(); + storage.ensure_capacity(new_size); + auto append_begin = end(); + parlay::uninitialized_relocate(r.begin(), r.end(), append_begin); + clear_relocated(r); // Ditch the buffer without destruction since relocation + assert(r.empty()); // leaves the elements in a destroyed state. + storage.set_size(new_size); + return append_begin; } } - iterator append(size_t n, const value_type& t) { return append_n(n, t); } - iterator insert(iterator p, const value_type& t) { return emplace(p, t); } iterator insert(iterator p, value_type&& rv) { return emplace(p, std::move(rv)); } iterator insert(iterator p, size_t n, const value_type& t) { return insert_n(p, n, t); } - template - iterator insert(iterator p, Iterator_ i, Iterator_ j) { - return insert_dispatch(p, i, j, std::is_integral()); + template, int> = 0> + iterator insert(iterator p, Iterator_ first, Iterator_ last) { + return insert_range(p, first, last); } template && std::is_constructible_v>, int> = 0> iterator insert(iterator p, Range&& r) { - return insert(p, std::begin(r), std::end(r)); + return insert_range(p, std::begin(r), std::end(r)); } iterator insert(iterator p, sequence_type&& r) { - return insert(p, std::make_move_iterator(std::begin(r)), std::make_move_iterator(std::end(r))); + auto idx = p - begin(); + auto the_tail = pop_tail(p); + append(std::move(r)); + append(std::move(the_tail)); + return begin() + idx; // p might be invalidated since append could reallocate } iterator insert(iterator p, std::initializer_list l) { @@ -320,7 +337,7 @@ class sequence : protected sequence_internal::sequence_base - void assign(Iterator_ i, Iterator_ j) { + template, int> = 0> + void assign(Iterator_ first, Iterator_ last) { storage.clear(); - initialize_dispatch(i, j, std::is_integral()); + initialize_range(first, last); } void assign(size_t n, const value_type& v) { @@ -427,17 +445,14 @@ class sequence : protected sequence_internal::sequence_base) { - parallel_for(0, end() - p, [&](size_t i) { storage.destroy(&p[i]); }); - } + auto the_tail = sequence_type::uninitialized(end() - p); + parlay::uninitialized_relocate(p, end(), the_tail.begin()); storage.set_size(p - begin()); return the_tail; } @@ -526,16 +541,14 @@ class sequence : protected sequence_internal::sequence_base - void initialize_range(InputIterator_ first, InputIterator_ last, std::input_iterator_tag) { - for (; first != last; ++first) { - push_back(*first); - } - } - - template - void initialize_range(ForwardIterator_ first, ForwardIterator_ last, std::forward_iterator_tag) { - auto n = std::distance(first, last); - storage.initialize_capacity(n); - auto buffer = storage.data(); - for (size_t i = 0; first != last; i++, ++first) { - storage.initialize(buffer + i, *first); - } - storage.set_size(n); - } - - template - void initialize_range(RandomAccessIterator_ first, RandomAccessIterator_ last, std::random_access_iterator_tag) { - auto n = std::distance(first, last); - storage.initialize_capacity(n); - auto buffer = storage.data(); - parallel_for(0, n, [&](size_t i) { - storage.initialize(buffer + i, first[i]); - }, copy_granularity(n)); - storage.set_size(n); - } - - // Use tag dispatch to distinguish between the (n, value) - // constructor and the iterator pair constructor - - template - void initialize_dispatch(Integer_ n, Integer_ v, std::true_type) { - initialize_fill(n, v); - } - template - void initialize_dispatch(Iterator_ first, Iterator_ last, std::false_type) { - initialize_range(first, last, typename std::iterator_traits::iterator_category()); - } - - // Use tag dispatch to distinguish between the (n, value) - // append operation and the iterator pair append operation + void initialize_range(Iterator_ first, Iterator_ last) { + static_assert(is_input_iterator_v); - template - iterator append_dispatch(Integer_ first, Integer_ last, std::true_type) { - return append_n(first, last); - } - - template - iterator append_dispatch(Iterator_ first, Iterator_ last, std::false_type) { - return append_range(first, last, typename std::iterator_traits::iterator_category()); + if constexpr (is_random_access_iterator_v) { + auto n = std::distance(first, last); + storage.initialize_capacity(n); + auto buffer = storage.data(); + parallel_for(0, n, [&](size_t i) { + storage.initialize(buffer + i, first[i]); + }, copy_granularity(n)); + storage.set_size(n); + } + else if (is_forward_iterator_v) { + auto n = std::distance(first, last); + storage.initialize_capacity(n); + std::uninitialized_copy(first, last, storage.data()); + storage.set_size(n); + } + else { + for (; first != last; ++first) { + push_back(*first); + } + } } iterator append_n(size_t n, const value_type& t) { @@ -613,52 +597,36 @@ class sequence : protected sequence_internal::sequence_base - iterator append_range(InputIterator_ first, InputIterator_ last, std::input_iterator_tag) { - size_t n = 0; - for (; first != last; first++, n++) { - push_back(*first); - } - return end() - n; - } - - template - iterator append_range(ForwardIterator_ first, ForwardIterator_ last, std::forward_iterator_tag) { - auto n = std::distance(first, last); - storage.ensure_capacity(size() + n); - auto it = end(); - std::uninitialized_copy(first, last, it); - storage.set_size(size() + n); - return it; - } - - template - iterator append_range(RandomAccessIterator_ first, RandomAccessIterator_ last, std::random_access_iterator_tag) { - auto n = std::distance(first, last); - storage.ensure_capacity(size() + n); - auto it = end(); - parallel_for(0, n, [&](size_t i) { - storage.initialize(it + i, first[i]); - }, copy_granularity(n)); - storage.set_size(size() + n); - return it; - } - - // Use tag dispatch to distinguish between the (n, value) - // insert operation and the iterator pair insert operation - - template - iterator insert_dispatch(iterator p, Integer_ n, Integer_ v, std::true_type) { - return insert_n(p, n, v); - } template - iterator insert_dispatch(iterator p, Iterator_ first, Iterator_ last, std::false_type) { - return insert_range(p, first, last); + iterator append_range(Iterator_ first, Iterator_ last) { + static_assert(is_input_iterator_v); + + if constexpr (is_random_access_iterator_v) { + auto n = std::distance(first, last); + storage.ensure_capacity(size() + n); + auto it = end(); + parallel_for(0, n, [&](size_t i) { + storage.initialize(it + i, first[i]); + }, copy_granularity(n)); + storage.set_size(size() + n); + return it; + } + else if constexpr (is_forward_iterator_v) { + auto n = std::distance(first, last); + storage.ensure_capacity(size() + n); + auto it = end(); + std::uninitialized_copy(first, last, it); + storage.set_size(size() + n); + return it; + } + else { + size_t n = 0; + for (; first != last; first++, n++) { + push_back(*first); + } + return end() - n; + } } iterator insert_n(iterator p, size_t n, const value_type& v) { @@ -668,31 +636,26 @@ class sequence : protected sequence_internal::sequence_base iterator insert_range(iterator p, Iterator_ first, Iterator_ last) { + static_assert(is_input_iterator_v); + auto the_tail = pop_tail(p); auto it = append(first, last); auto pos = it - begin(); - move_append(the_tail); + append(std::move(the_tail)); return begin() + pos; } - // Append the given range, moving its elements into this sequence - template - void move_append(R&& r) { - append(std::make_move_iterator(std::begin(r)), std::make_move_iterator(std::end(r))); - } - // Return true if this sequence compares equal to the sequence // beginning at other. The sequence beginning at other must be // of at least the same length as this sequence. @@ -723,6 +686,20 @@ class sequence : protected sequence_internal::sequence_base +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE_IF((is_trivially_relocatable_v), parlay::sequence); + +#endif + // A short_sequence is a dynamic array supporting parallel modification operations // that may also perform small-size optimization. For sequences of trivial types // whose elements fit in 15 bytes or fewer, the sequence will be stored inline and @@ -787,28 +764,11 @@ inline auto to_short_sequence(R&& r) -> short_sequence { return {std::begin(r), std::end(r)}; } -// Mark sequences as trivially relocatable. A sequence is always -// trivially relocatable as long as the allocator is, because: -// 1) Sequences only use small-size optimization when the element -// type is trivial, so the buffer of trivial elements is -// trivially relocatable. -// 2) Sequences that are not small-size optimized are just a -// pointer/length pair, which are trivially relocatable -template -struct is_trivially_relocatable> - : std::bool_constant> {}; - } // namespace parlay namespace std { -// exchange the values of a and b -template -inline void swap(parlay::sequence& a, parlay::sequence& b) { - a.swap(b); -} - // compute a suitable hash value for a sequence template struct hash> { diff --git a/include/parlay/type_traits.h b/include/parlay/type_traits.h index a8b68eaa..a75586ee 100644 --- a/include/parlay/type_traits.h +++ b/include/parlay/type_traits.h @@ -1,15 +1,10 @@ // Useful type traits used mostly internally by Parlay // -// Many inspired by this video, and the following standards -// proposals: +// Many inspired by this video: // - https://www.youtube.com/watch?v=MWBfmmg8-Yo -// - http://open-std.org/JTC1/SC22/WG21/docs/papers/2014/n4034.pdf -// - https://quuxplusone.github.io/blog/code/object-relocation-in-terms-of-move-plus-destroy-draft-7.html // -// Includes: -// - priority_tag -// - is_trivial_allocator -// - is_trivially_relocatable / is_nothrow_relocatable +// Notable inclusions: +// - is_trivially_relocatable // #ifndef PARLAY_TYPE_TRAITS_H_ @@ -17,11 +12,24 @@ #include +#include +#include #include +#include +#include +#include #include #include +#include +#include +#include +#include #include +#include +#include +#include #include // IWYU pragma: keep +#include // IWYU pragma: no_include @@ -37,6 +45,14 @@ struct type_identity { template using type_identity_t = typename type_identity::type; +// Provides the member type equivalent to T with its cv-ref qualifiers removed +template +struct remove_cvref : type_identity>> { }; + +// Equal to T with its cv-ref qualifiers removed +template +using remove_cvref_t = typename remove_cvref::type; + // Given a pointer-to-member (object or function), returns // the type of the class in which the member lives template @@ -230,56 +246,198 @@ inline constexpr bool is_trivial_allocator_v = is_trivial_allocator::v template struct is_trivial_allocator, T> : std::true_type {}; + /* ----------------- Trivially relocatable. --------------------- - A type T is called trivially relocatable if, given a pointer - p to an object of type T, and a pointer q to unintialized - memory large enough for an object of type T, then - new (q) T(std::move(*p)); - p->~T(); + A type T is called trivially relocatable if, given a pointer p + to an object of type T, and a pointer q to uninitialized memory + large enough for an object of type T, then + + ::new (voidify(*q)) T(std::move(*p)); + std::destroy_at(p); is equivalent to - std::memcpy(p, q, sizeof(T)); + std::memcpy(p, q, sizeof(T)); - Any type that is trivially move constructible and trivially - destructible is therefore trivially relocatable. User-defined - types that are not obviously trivially relocatable can be - annotated as such by specializing the is_trivially_relocatable - type. + Any type that is trivially copyable is trivially relocatable, but many types + that are not trivially copyable are trivially relocatable. User-defined types + that are not obviously trivially relocatable can be annotated as such (with + care). This is used to optimize parlay::sequence so that it can reallocate + much faster, and is also used by sorting algorithms for faster swapping of + ranges of elements and individual elements. - See proposal D1144R0 for copious details: - https://quuxplusone.github.io/blog/code/object-relocation-in-terms-of-move-plus-destroy-draft-7.html + See proposal P1144 for copious details: + https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p1144r9.html */ -template -struct is_trivially_relocatable : - std::bool_constant::value && - std::is_trivially_destructible::value> { }; +// Standard language support for trivially relocatable (currently P1144) +#if defined(__cpp_lib_trivially_relocatable) + +template +using is_trivially_relocatable = std::is_trivially_relocatable; + +// Clang's builtin with some (very limited) support for trivial relocatability +#elif defined(__has_builtin) +#if __has_builtin(__is_trivially_relocatable) + +template +struct is_trivially_relocatable : std::bool_constant<__is_trivially_relocatable(T)> { }; + +#define PARLAY_MUST_SPECIALIZE_IS_TRIVIALLY_RELOCATABLE + +#else + +template +struct is_trivially_relocatable : std::is_trivially_copyable { }; + +#define PARLAY_MUST_SPECIALIZE_IS_TRIVIALLY_RELOCATABLE + +#endif +#else + +template +struct is_trivially_relocatable : std::is_trivially_copyable { }; + +#define PARLAY_MUST_SPECIALIZE_IS_TRIVIALLY_RELOCATABLE -template struct is_nothrow_relocatable : - std::bool_constant::value || - (std::is_nothrow_move_constructible::value && - std::is_nothrow_destructible::value)> { }; +#endif template inline constexpr bool is_trivially_relocatable_v = is_trivially_relocatable::value; +#if defined(__cpp_impl_trivially_relocatable) +#define PARLAY_TRIVIALLY_RELOCATABLE [[trivially_relocatable]] +#else +#define PARLAY_TRIVIALLY_RELOCATABLE +#endif + + +#if defined(PARLAY_MUST_SPECIALIZE_IS_TRIVIALLY_RELOCATABLE) + +#define PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(...) \ + struct is_trivially_relocatable<__VA_ARGS__> : std::true_type {}; + +#define PARLAY_ASSUME_TRIVIALLY_RELOCATABLE_IF(cond, ...) \ + struct is_trivially_relocatable<__VA_ARGS__> : std::bool_constant {}; + + +// We list all the types that ought to be trivially relocatable under any +// sensible implementation. It is possible that some of these are wrong. + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::allocator); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::shared_ptr); + template -inline constexpr bool is_nothrow_relocatable_v = is_nothrow_relocatable::value; +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::weak_ptr); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE_IF((is_trivially_relocatable_v), std::optional); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE_IF((is_trivially_relocatable_v && + is_trivially_relocatable_v::pointer>), std::unique_ptr); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE_IF((is_trivially_relocatable_v && + is_trivially_relocatable_v), std::pair); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE_IF((is_trivially_relocatable_v), std::array); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE_IF((is_trivially_relocatable_v && ...), std::tuple); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE_IF((is_trivially_relocatable_v && ...), std::variant); + + +// Container implementations can differ widely by vendor, so we don't want to specialize these +// with a broad brush. Instead, only specialize them for stdlibs that we are confident about, +// or, more specifically, stdlibs that Arthur is confident about :D +// +// https://quuxplusone.github.io/blog/2019/02/20/p1144-what-types-are-relocatable/ -// The standard allocator is stateless, so it is trivially relocatable, -// but unfortunately it is not detected as such, so we mark it manually. -// This is important because parlay::sequence is only trivially -// relocatable when its allocator is trivially relocatable. +// Specializations for libc++. +#if defined(_LIBCPP_VERSION) template -struct is_trivially_relocatable> : std::true_type {}; +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::deque); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::forward_list); + +// std::vector and std::string are not trivially relocatable in libc++ debug mode +#if !defined(_LIBCPP_DEBUG_MODE) + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::vector); + +template<> +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::string); + +#endif + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::unordered_set); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::unordered_map); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::unordered_multiset); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::unordered_multimap); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::stack); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::queue); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::priority_queue); + + +// Specializations for GCC. +#elif defined(__GLIBCXX__) + +// GCC std::string is *not* trivially relocatable because when the string is +// in short-size mode, the data pointer points into itself :'( + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::deque); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::forward_list); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::vector); + + +// Specializations for Microsoft STL. +#elif defined(_MSC_VER) + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::deque); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::forward_list); + +template<> +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::string); + +template +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(std::vector); + +#endif + -template -struct is_trivially_relocatable> : - std::bool_constant::value && - is_trivially_relocatable::value> {}; +#endif } // namespace parlay diff --git a/include/parlay/utilities.h b/include/parlay/utilities.h index 720c2b8a..319d1b44 100644 --- a/include/parlay/utilities.h +++ b/include/parlay/utilities.h @@ -26,10 +26,31 @@ namespace parlay { +struct empty {}; + +template +struct uninitialized { + union { + empty empty_; + T value; + }; + uninitialized() : empty_{} { } + ~uninitialized() { } +}; + +static_assert(std::is_default_constructible_v>>); +static_assert(std::is_destructible_v>>); + +// Returns a raw void* to the given object, stripping cv qualifiers. +template +PARLAY_INLINE constexpr void* voidify(T& obj) noexcept { + return const_cast(static_cast(std::addressof(obj))); +} + // Obtains a pointer to an object of type T located at the address represented // by p. Essentially performs std::launder(reinterpret_cast(p)). template -[[nodiscard]] constexpr T* from_bytes(std::byte* p) noexcept { +[[nodiscard]] PARLAY_INLINE constexpr T* from_bytes(std::byte* p) noexcept { // std::launder not available on older compilers #ifdef __cpp_lib_launder return std::launder(reinterpret_cast(p)); @@ -39,11 +60,6 @@ template } -template -size_t log2_up(T); - -struct empty {}; - typedef uint32_t flags; const flags no_flag = 0; const flags fl_sequential = 1; @@ -53,39 +69,79 @@ const flags fl_conservative = 8; const flags fl_inplace = 16; template -inline void assign_uninitialized(T& a, const type_identity_t& b) { +PARLAY_INLINE void assign_uninitialized(T& a, const type_identity_t& b) { PARLAY_ASSERT_UNINITIALIZED(a); new (static_cast(std::addressof(a))) T(b); } template -inline auto assign_uninitialized(T& a, type_identity_t&& b) { +PARLAY_INLINE auto assign_uninitialized(T& a, type_identity_t&& b) { PARLAY_ASSERT_UNINITIALIZED(a); new (static_cast(std::addressof(a))) T(std::move(b)); } template -inline void move_uninitialized(T& a, type_identity_t& b) { +PARLAY_INLINE void move_uninitialized(T& a, type_identity_t& b) { PARLAY_ASSERT_UNINITIALIZED(a); new (static_cast(std::addressof(a))) T(std::move(b)); } -// Relocate a single object into uninitialized memory, leaving -// the source memory uninitialized afterwards. +// Relocate the object located at source into dest (required to be uninitialized memory), +// leaving source uninitialized. +// +// Effects: Equivalent to: +// +// struct guard { T *t; ~guard() { destroy_at(t); } } g{source}; +// return ::new (voidify(*dest)) T(std::move(*source)); + +// except that if T is trivially relocatable, side effects associated with the relocation +// of the value of *source might not happen. +// +#if defined(__cpp_lib_trivially_relocatable) +using std::relocate_at; +#else template -inline void uninitialized_relocate(T* to, T* from) noexcept(is_nothrow_relocatable::value) { +PARLAY_INLINE T* relocate_at(T* source, T* dest) + noexcept(is_trivially_relocatable_v || std::is_nothrow_move_constructible_v) { if constexpr (is_trivially_relocatable::value) { - std::memcpy(static_cast(to), static_cast(from), sizeof(T)); + std::memcpy(voidify(*dest), voidify(*source), sizeof(T)); + return dest; } else { static_assert(std::is_move_constructible::value); static_assert(std::is_destructible::value); - PARLAY_ASSERT_UNINITIALIZED(*to); - ::new (to) T(std::move(*from)); - from->~T(); - PARLAY_ASSERT_UNINITIALIZED(*from); + PARLAY_ASSERT_UNINITIALIZED(*dest); + struct guard { T *t; ~guard() { std::destroy_at(t); PARLAY_ASSERT_UNINITIALIZED(*t); } } g{source}; + return ::new (voidify(*dest)) T(std::move(*source)); } } +#endif + +// Relocate the object located at source into the return value, leaving source uninitialized. +// +// Effects: Equivalent to: +// +// remove_cv_t t = std::move(source); +// destroy_at(source); +// return t; + +// except that if T is trivially relocatable, side effects associated with the relocation +// of the object’s value might not happen. +// +#if defined(__cpp_lib_trivially_relocatable) +using std::relocate; +#else +template +[[nodiscard]] PARLAY_INLINE std::remove_cv_t relocate(T* source) + noexcept(std::is_nothrow_move_constructible_v) { + + static_assert(std::is_move_constructible::value); + static_assert(std::is_destructible::value); + std::remove_cv_t t = std::move(*source); + std::destroy_at(source); + return t; +} +#endif /* Hashing functions for various integer types */ @@ -352,7 +408,7 @@ void assign_dispatch(T& dest, const type_identity_t& val, uninitialized_copy_ // Uninitialized relocate dispatch -- destructively move val into dest template void assign_dispatch(T& dest, T& val, uninitialized_relocate_tag) { - uninitialized_relocate(&dest, &val); + relocate_at(&val, &dest); } diff --git a/test/test_counting_sort.cpp b/test/test_counting_sort.cpp index 454a1dd9..956925c2 100644 --- a/test/test_counting_sort.cpp +++ b/test/test_counting_sort.cpp @@ -92,13 +92,6 @@ TEST(TestCountingSort, TestCountingSortInplaceNonContiguous) { ASSERT_TRUE(std::is_sorted(std::begin(s), std::end(s))); } -namespace parlay { -// Specialize std::unique_ptr to be considered trivially relocatable -template -struct is_trivially_relocatable> : public std::true_type { -}; -} - TEST(TestCountingSort, TestCountingSortInplaceUniquePtr) { auto s = parlay::tabulate(100000, [](long long i) { return std::make_unique((51 * i + 61) % num_buckets); diff --git a/test/test_integer_sort.cpp b/test/test_integer_sort.cpp index ee1c327b..c6eced87 100644 --- a/test/test_integer_sort.cpp +++ b/test/test_integer_sort.cpp @@ -9,11 +9,8 @@ #include "sorting_utils.h" -namespace parlay { - // Specialize std::unique_ptr to be considered trivially destructive movable - template - struct is_trivially_relocatable> : public std::true_type { }; -} +static_assert(parlay::is_trivially_relocatable_v>); + TEST(TestIntegerSort, TestIntegerSortEmptyInput) { auto s = parlay::sequence(0); @@ -42,21 +39,23 @@ TEST(TestIntegerSort, TestIntegerSortInplaceUniquePtr) { } } -// HeapInt is both copyable and trivially destructive movable -struct HeapInt { +// HeapInt is both copyable and trivially relocatable +struct PARLAY_TRIVIALLY_RELOCATABLE HeapInt { int* x; HeapInt(int _x) : x(new int(_x)) { } ~HeapInt() { if (x != nullptr) delete x; } HeapInt(const HeapInt& other) : x(new int(*(other.x))) { } - HeapInt(HeapInt&& other) : x(other.x) { + HeapInt(HeapInt&& other) noexcept : x(other.x) { other.x = nullptr; } HeapInt& operator=(const HeapInt& other) { - if (x != nullptr) delete x; - x = new int(*(other.x)); + if (this != &other) { + if (x != nullptr) delete x; + x = new int(*(other.x)); + } return *this; } - HeapInt& operator=(HeapInt&& other) { + HeapInt& operator=(HeapInt&& other) noexcept { if (x != nullptr) delete x; x = other.x; other.x = nullptr; @@ -68,11 +67,15 @@ struct HeapInt { } }; +#if defined(PARLAY_MUST_SPECIALIZE_IS_TRIVIALLY_RELOCATABLE) namespace parlay { - // Specialize std::unique_ptr to be considered trivially relocatable - template<> - struct is_trivially_relocatable : public std::true_type { }; + +template<> +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(HeapInt); + } +#endif + TEST(TestIntegerSort, TestIntegerSortCopyAndDestructiveMove) { auto s = parlay::tabulate(100000, [](int i) { diff --git a/test/test_relocate.cpp b/test/test_relocate.cpp index e3add3a4..d0f0942c 100644 --- a/test/test_relocate.cpp +++ b/test/test_relocate.cpp @@ -4,16 +4,13 @@ #include #include +#include #include #include #include -// Launder is not available in some older compilers -#ifdef __cpp_lib_launder -#define LAUNDER(x) std::launder((x)) -#else -#define LAUNDER(x) (x) -#endif +#include + // A type that is not trivially relocatable because // it keeps a pointer to an object inside itself @@ -23,21 +20,23 @@ struct NotTriviallyRelocatable { explicit NotTriviallyRelocatable(int _x) : x(_x), px(&x) { } NotTriviallyRelocatable(const NotTriviallyRelocatable& other) : x(other.x), px(&x) { } NotTriviallyRelocatable(NotTriviallyRelocatable&& other) noexcept : x(other.x), px(&x) { } + int get() const { assert(px == &x); return *px; } }; // A type that is trivially relocatable because it is -// trivially movable and trivally destructible +// trivially movable and trivially destructible struct TriviallyRelocatable { int x; explicit TriviallyRelocatable(int _x) : x(_x) { } TriviallyRelocatable(const TriviallyRelocatable&) = default; TriviallyRelocatable(TriviallyRelocatable&&) = default; ~TriviallyRelocatable() = default; + int get() const { return x; } }; // A type that we annotate as trivially relocatable, // even though it is not deducible as such by the compiler -struct MyTriviallyRelocatable { +struct PARLAY_TRIVIALLY_RELOCATABLE MyTriviallyRelocatable { int* x; explicit MyTriviallyRelocatable(int _x) : x(new int(_x)) { } MyTriviallyRelocatable(const MyTriviallyRelocatable& other) : x(nullptr) { @@ -45,7 +44,7 @@ struct MyTriviallyRelocatable { x = new int(*(other.x)); } } - MyTriviallyRelocatable(MyTriviallyRelocatable&& other) : x(other.x) { other.x = nullptr; } + MyTriviallyRelocatable(MyTriviallyRelocatable&& other) noexcept : x(other.x) { other.x = nullptr; } ~MyTriviallyRelocatable() { if (x != nullptr) { *x = -1; @@ -53,15 +52,18 @@ struct MyTriviallyRelocatable { x = nullptr; } } + int get() const { return *x; } }; +#if defined(PARLAY_MUST_SPECIALIZE_IS_TRIVIALLY_RELOCATABLE) namespace parlay { // Mark the type MyTriviallyRelocatable as trivially relocatable template<> -struct is_trivially_relocatable : public std::true_type { }; +PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(MyTriviallyRelocatable); } +#endif static_assert(!parlay::is_trivially_relocatable_v); @@ -69,286 +71,149 @@ static_assert(parlay::is_trivially_relocatable_v); static_assert(parlay::is_trivially_relocatable_v); -TEST(TestRelocate, TestNotTriviallyRelocatable) { - std::aligned_storage::type a, b; - NotTriviallyRelocatable* from = LAUNDER(reinterpret_cast(&a)); - NotTriviallyRelocatable* to = LAUNDER(reinterpret_cast(&b)); - // -- Both from and to point to uninitialized memory - - new (from) NotTriviallyRelocatable(42); - ASSERT_EQ(from->x, 42); - ASSERT_EQ(from->px, &(from->x)); - // -- Now from points to a valid object, and to points to uninitialized memory +TEST(TestRelocateAt, TestNotTriviallyRelocatable) { + parlay::uninitialized a, b; + NotTriviallyRelocatable* source = &a.value; + NotTriviallyRelocatable* dest = &b.value; + // -- Both source and dest point to uninitialized memory - parlay::uninitialized_relocate(to, from); - ASSERT_EQ(to->x, 42); - ASSERT_EQ(to->px, &(to->x)); - // -- Now to points to a valid object, and from points to uninitialized memory - - to->~NotTriviallyRelocatable(); - // -- Both from and to point to uninitialized memory -} + ::new (source) NotTriviallyRelocatable(42); + ASSERT_EQ(source->x, 42); + ASSERT_EQ(source->px, &(source->x)); + // -- Now source points to a valid object, and dest points to uninitialized memory + parlay::relocate_at(source, dest); + ASSERT_EQ(dest->x, 42); + ASSERT_EQ(dest->px, &(dest->x)); + // -- Now dest points to a valid object, and source points to uninitialized memory -TEST(TestRelocate, TestTriviallyRelocatable) { - std::aligned_storage::type a, b; - TriviallyRelocatable* from = LAUNDER(reinterpret_cast(&a)); - TriviallyRelocatable* to = LAUNDER(reinterpret_cast(&b)); - // -- Both from and to point to uninitialized memory - - new (from) TriviallyRelocatable(42); - ASSERT_EQ(from->x, 42); - // -- Now from points to a valid object, and to points to uninitialized memory - - parlay::uninitialized_relocate(to, from); - ASSERT_EQ(to->x, 42); - // -- Now to points to a valid object, and from points to uninitialized memory - - to->~TriviallyRelocatable(); - // -- Both from and to point to uninitialized memory + std::destroy_at(dest); + // -- Both source and dest point to uninitialized memory } -TEST(TestRelocate, TestCustomTriviallyRelocatable) { - std::aligned_storage::type a, b; - MyTriviallyRelocatable* from = LAUNDER(reinterpret_cast(&a)); - MyTriviallyRelocatable* to = LAUNDER(reinterpret_cast(&b)); - // -- Both from and to point to uninitialized memory - - new (from) MyTriviallyRelocatable(42); - ASSERT_EQ(*(from->x), 42); - // -- Now from points to a valid object, and to points to uninitialized memory +TEST(TestRelocateAt, TestTriviallyRelocatable) { + parlay::uninitialized a, b; + TriviallyRelocatable* source = &a.value; + TriviallyRelocatable* dest = &b.value; + // -- Both source and dest point to uninitialized memory - parlay::uninitialized_relocate(to, from); - ASSERT_EQ(*(to->x), 42); - // -- Now to points to a valid object, and from points to uninitialized memory - - to->~MyTriviallyRelocatable(); - // -- Both from and to point to uninitialized memory -} + ::new (source) TriviallyRelocatable(42); + ASSERT_EQ(source->x, 42); + // -- Now source points to a valid object, and dest points to uninitialized memory + parlay::relocate_at(source, dest); + ASSERT_EQ(dest->x, 42); + // -- Now dest points to a valid object, and source points to uninitialized memory -TEST(TestRelocate, TestNotTriviallyRelocatableArray) { - constexpr int N = 100000; - std::vector::type> a(N), b(N); - NotTriviallyRelocatable* from = LAUNDER(reinterpret_cast(a.data())); - NotTriviallyRelocatable* to = LAUNDER(reinterpret_cast(b.data())); - // -- Both from and to point to uninitialized memory - - for (int i = 0; i < N; i++) { - new (&from[i]) NotTriviallyRelocatable(i); - } - for (int i = 0; i < N; i++) { - ASSERT_EQ(from[i].x, i); - ASSERT_EQ(from[i].px, &(from[i].x)); - } - // -- Now from points to an array of valid objects, and to points to uninitialized memory - - parlay::uninitialized_relocate_n(to, from, N); - for (int i = 0; i < N; i++) { - ASSERT_EQ(to[i].x, i); - ASSERT_EQ(to[i].px, &(to[i].x)); - } - // -- Now to points to an array of valid objects, and from points to uninitialized memory - - for (int i = 0; i < N; i++) { - to[i].~NotTriviallyRelocatable(); - } - // -- Both from and to point to uninitialized memory -} - -TEST(TestRelocate, TestTriviallyRelocatableArray) { - constexpr int N = 100000; - std::vector::type> a(N), b(N); - TriviallyRelocatable* from = LAUNDER(reinterpret_cast(a.data())); - TriviallyRelocatable* to = LAUNDER(reinterpret_cast(b.data())); - // -- Both from and to point to uninitialized memory - - for (int i = 0; i < N; i++) { - new (&from[i]) TriviallyRelocatable(i); - } - for (int i = 0; i < N; i++) { - ASSERT_EQ(from[i].x, i); - } - // -- Now from points to an array of valid objects, and to points to uninitialized memory - - parlay::uninitialized_relocate_n(to, from, N); - for (int i = 0; i < N; i++) { - ASSERT_EQ(to[i].x, i); - } - // -- Now to points to an array of valid objects, and from points to uninitialized memory - - for (int i = 0; i < N; i++) { - to[i].~TriviallyRelocatable(); - } - // -- Both from and to point to uninitialized memory -} - -TEST(TestRelocate, TestCustomTriviallyRelocatableArray) { - constexpr int N = 100000; - std::vector::type> a(N), b(N); - MyTriviallyRelocatable* from = LAUNDER(reinterpret_cast(a.data())); - MyTriviallyRelocatable* to = LAUNDER(reinterpret_cast(b.data())); - // -- Both from and to point to uninitialized memory - - for (int i = 0; i < N; i++) { - new (&from[i]) MyTriviallyRelocatable(i); - } - for (int i = 0; i < N; i++) { - ASSERT_EQ(*(from[i].x), i); - } - // -- Now from points to an array of valid objects, and to points to uninitialized memory - - parlay::uninitialized_relocate_n(to, from, N); - for (size_t i = 0; i < N; i++) { - ASSERT_EQ(*(to[i].x), i); - } - // -- Now to points to an array of valid objects, and from points to uninitialized memory - - for (int i = 0; i < N; i++) { - to[i].~MyTriviallyRelocatable(); - } - // -- Both from and to point to uninitialized memory + std::destroy_at(dest); + // -- Both source and dest point to uninitialized memory } -TEST(TestRelocate, TestRelocatableNonContiguousArray) { - constexpr int N = 100000; - std::deque::type> a(N), b(N); - auto from = std::begin(a); - auto to = std::begin(b); - // -- Both from and to point to uninitialized memory - - static_assert(!parlay::is_contiguous_iterator_v); - static_assert(!parlay::is_contiguous_iterator_v); - - auto get_from = [&](auto i) { - return LAUNDER(reinterpret_cast(&from[i])); - }; - auto get_to = [&](auto i) { - return LAUNDER(reinterpret_cast(&to[i])); - }; +TEST(TestRelocateAt, TestCustomTriviallyRelocatable) { + parlay::uninitialized a, b; + MyTriviallyRelocatable* source = &a.value; + MyTriviallyRelocatable* dest = &b.value; + // -- Both source and dest point to uninitialized memory - for (int i = 0; i < N; i++) { - new (get_from(i)) MyTriviallyRelocatable(i); - } - for (int i = 0; i < N; i++) { - ASSERT_EQ(*(get_from(i)->x), i); - } - // -- Now from points to an array of valid objects, and to points to uninitialized memory - - parlay::uninitialized_relocate_n(to, from, N); - for (int i = 0; i < N; i++) { - ASSERT_EQ(*(get_to(i)->x), i); - } - // -- Now to points to an array of valid objects, and from points to uninitialized memory + ::new (source) MyTriviallyRelocatable(42); + ASSERT_EQ(*(source->x), 42); + // -- Now source points to a valid object, and dest points to uninitialized memory - for (int i = 0; i < N; i++) { - get_to(i)->~MyTriviallyRelocatable(); - } - // -- Both from and to point to uninitialized memory + parlay::relocate_at(source, dest); + ASSERT_EQ(*(dest->x), 42); + // -- Now dest points to a valid object, and source points to uninitialized memory + + std::destroy_at(dest); + // -- Both source and dest point to uninitialized memory } -TEST(TestRelocate, TestRelocatableNonRandomAccessArray) { - constexpr int N = 1000; - std::list::type> a(N), b(N); - auto from = std::begin(a); - auto to = std::begin(b); - // -- Both from and to point to uninitialized memory - - static_assert(!parlay::is_random_access_iterator_v); - static_assert(!parlay::is_random_access_iterator_v); - - auto get_from = [&](auto i) { - auto it = from; - std::advance(it, i); - return LAUNDER(reinterpret_cast(&(*it))); - }; - - auto get_to = [&](auto i) { - auto it = to; - std::advance(it, i); - return LAUNDER(reinterpret_cast(&(*it))); - }; - - for (int i = 0; i < N; i++) { - new (get_from(i)) MyTriviallyRelocatable(i); - } - for (int i = 0; i < N; i++) { - ASSERT_EQ(*(get_from(i)->x), i); - } - // -- Now from points to an array of valid objects, and to points to uninitialized memory - parlay::uninitialized_relocate_n(to, from, N); - for (int i = 0; i < N; i++) { - ASSERT_EQ(*(get_to(i)->x), i); - } - // -- Now to points to an array of valid objects, and from points to uninitialized memory +TEST(TestRelocate, TestRelocate) { + alignas(std::unique_ptr) char storage[sizeof(std::unique_ptr)]; + auto* up = ::new (&storage) std::unique_ptr{std::make_unique(42)}; - for (int i = 0; i < N; i++) { - get_to(i)->~MyTriviallyRelocatable(); - } - // -- Both from and to point to uninitialized memory + auto x = parlay::relocate(up); + ASSERT_EQ(*x, 42); } -// An uninitialized memory container that holds an object of type T, -// but does not construct it or destroy it. It will move it, though. -template -struct NonRelocatableStorage { - char storage[sizeof(T)]; - NonRelocatableStorage() { } - NonRelocatableStorage(NonRelocatableStorage&& other) { - new (get_storage()) T(std::move(*other.get_storage())); - } - T* get_storage() { - return LAUNDER(reinterpret_cast(storage)); - } - ~NonRelocatableStorage() { } // Non-trivial destructor prevents it from being trivially relocatable -}; +template +class TestRangeRelocate : public testing::Test { }; +template +struct RangeRelocateTestParams { + using container = Container; + using value_type = T; + static constexpr inline bool use_iterator = UseIterator; +}; -TEST(TestRelocate, TestNonRelocatableNonRandomAccessArray) { - constexpr int N = 1000; - std::list> a(N), b(N); - auto from = std::begin(a); - auto to = std::begin(b); +// Test on the cartesian product of x +// x +// +using TestTypes = ::testing::Types< + RangeRelocateTestParams>, TriviallyRelocatable, true>, + RangeRelocateTestParams>, NotTriviallyRelocatable, true>, + RangeRelocateTestParams>, MyTriviallyRelocatable, true>, + RangeRelocateTestParams>, TriviallyRelocatable, false>, + RangeRelocateTestParams>, NotTriviallyRelocatable, false>, + RangeRelocateTestParams>, MyTriviallyRelocatable, false>, + + RangeRelocateTestParams>, TriviallyRelocatable, true>, + RangeRelocateTestParams>, NotTriviallyRelocatable, true>, + RangeRelocateTestParams>, MyTriviallyRelocatable, true>, + RangeRelocateTestParams>, TriviallyRelocatable, false>, + RangeRelocateTestParams>, NotTriviallyRelocatable, false>, + RangeRelocateTestParams>, MyTriviallyRelocatable, false>, + + RangeRelocateTestParams>, TriviallyRelocatable, true>, + RangeRelocateTestParams>, NotTriviallyRelocatable, true>, + RangeRelocateTestParams>, MyTriviallyRelocatable, true>, + RangeRelocateTestParams>, TriviallyRelocatable, false>, + RangeRelocateTestParams>, NotTriviallyRelocatable, false>, + RangeRelocateTestParams>, MyTriviallyRelocatable, false> +>; + +TYPED_TEST_SUITE(TestRangeRelocate, TestTypes); + +TYPED_TEST(TestRangeRelocate, TestTriviallyRelocatable) { + constexpr int N = 100000; + typename TypeParam::container source(N), dest(N); - static_assert(!parlay::is_random_access_iterator_v); - static_assert(!parlay::is_random_access_iterator_v); - static_assert(!parlay::is_trivially_relocatable_v>); + using T = typename TypeParam::value_type; + static_assert(std::is_same_v>); - // -- Both from and to point to uninitialized memory + // Initialize elements of source + auto s_it = source.begin(); + for (int i = 0; i < N; i++, ++s_it) { + ::new (&(s_it->value)) T(i); + ASSERT_EQ(s_it->value.get(), i); + } - auto get_from = [&](auto i) { - auto it = from; - std::advance(it, i); - return it->get_storage(); - }; + // -- Now source points to a range of valid objects, and dest points to a range of uninitialized objects - auto get_to = [&](auto i) { - auto it = to; - std::advance(it, i); - return it->get_storage(); - }; + auto source_begin = parlay::internal::uninitialized_iterator_adaptor{source.begin()}; + auto source_end = parlay::internal::uninitialized_iterator_adaptor{source.end()}; + auto dest_begin = parlay::internal::uninitialized_iterator_adaptor{dest.begin()}; + auto dest_end = parlay::internal::uninitialized_iterator_adaptor{dest.end()}; - for (int i = 0; i < N; i++) { - new (get_from(i)) NotTriviallyRelocatable(i); + if constexpr (TypeParam::use_iterator) { + auto result = parlay::uninitialized_relocate(source_begin, source_end, dest_begin); + ASSERT_EQ(result, dest_end); } - for (int i = 0; i < N; i++) { - ASSERT_EQ((get_from(i)->x), i); - ASSERT_EQ(get_from(i)->px, &(get_from(i)->x)); + else { + auto [s_result, d_result] = parlay::uninitialized_relocate_n(source_begin, N, dest_begin); + ASSERT_EQ(s_result, source_end); + ASSERT_EQ(d_result, dest_end); } - // -- Now from points to an array of valid objects, and to points to uninitialized memory - parlay::uninitialized_relocate_n(to, from, N); - for (int i = 0; i < N; i++) { - ASSERT_EQ((get_to(i)->x), i); - ASSERT_EQ(get_to(i)->px, &(get_to(i)->x)); - } - // -- Now to points to an array of valid objects, and from points to uninitialized memory + // -- Now dest points to a range of valid objects, and source points to a range of uninitialized objects - for (int i = 0; i < N; i++) { - get_to(i)->~NotTriviallyRelocatable(); + auto d_it = dest.begin(); + for (int i = 0; i < N; i++, ++d_it) { + ASSERT_EQ(d_it->value.get(), i); + std::destroy_at(&(d_it->value)); } - // -- Both from and to point to uninitialized memory -} \ No newline at end of file + + // -- Both source and dest point to uninitialized memory +} diff --git a/test/test_sample_sort.cpp b/test/test_sample_sort.cpp index 7d9d94c0..1f514159 100644 --- a/test/test_sample_sort.cpp +++ b/test/test_sample_sort.cpp @@ -101,12 +101,6 @@ TEST(TestSampleSort, TestSortInplaceUncopyable) { ASSERT_TRUE(std::is_sorted(std::begin(s), std::end(s))); } -namespace parlay { - // Specialize std::unique_ptr to be considered trivially relocatable - template - struct is_trivially_relocatable> : public std::true_type { }; -} - TEST(TestSampleSort, TestSortInplaceUniquePtr) { auto s = parlay::tabulate(100000, [](long long int i) { return std::make_unique((50021 * i + 61) % (1 << 20)); diff --git a/test/test_sequence.cpp b/test/test_sequence.cpp index c22698f6..a0bc0fda 100644 --- a/test/test_sequence.cpp +++ b/test/test_sequence.cpp @@ -395,7 +395,7 @@ TEST(TestSequence, TestMoveAppendToEmptyAfterReserve) { TEST(TestSequence, TestAppendMoveNonTrivial) { auto s1 = parlay::sequence>{}; - auto s2 = parlay::sequence>{}; + auto s2 = std::vector>{}; s1.emplace_back(std::make_unique(5)); s2.emplace_back(std::make_unique(6)); @@ -403,7 +403,7 @@ TEST(TestSequence, TestAppendMoveNonTrivial) { ASSERT_FALSE(s1.empty()); ASSERT_FALSE(s2.empty()); - s1.append(std::move(s2)); + s1.append(std::make_move_iterator(s2.begin()), std::make_move_iterator(s2.end())); ASSERT_EQ(s1.size(), 2); ASSERT_EQ(s2[0], nullptr); @@ -413,6 +413,19 @@ TEST(TestSequence, TestAppendMoveNonTrivial) { ASSERT_EQ(*s1[1], 6); } +TEST(TestSequence, TestAppendSequenceRvalue) { + auto s1 = parlay::sequence>::from_function(1000, + [](int i) { return std::make_unique(i); }); + auto s2 = parlay::sequence>::from_function(1000, + [](int i) { return std::make_unique(i+1000); }); + s1.append(std::move(s2)); + ASSERT_TRUE(s2.empty()); // move from should leave s2 empty + ASSERT_EQ(s1.size(), 2000); + for (int i = 0; i < 2000; i++) { + ASSERT_EQ(*s1[i], i); + } +} + TEST(TestSequence, TestInsert) { auto s = parlay::sequence{1,2,4,5}; auto s2 = parlay::sequence{1,2,3,4,5}; @@ -903,13 +916,17 @@ TEST(TestSequence, TestLessThan) { #if defined(PARLAY_EXCEPTIONS_ENABLED) TEST(TestSequence, TestAtThrow) { - auto s = parlay::sequence{1,2,3,4,5,6,7,8,9}; - EXPECT_THROW({ s.at(9); }, std::out_of_range); + EXPECT_THROW({ + auto s = parlay::sequence({1,2,3,4,5,6,7,8,9}); + s.at(9); + }, std::out_of_range); } TEST(TestSequence, TestAtThrowConst) { - const auto s = parlay::sequence{1,2,3,4,5,6,7,8,9}; - EXPECT_THROW({ s.at(9); }, std::out_of_range); + EXPECT_THROW({ + const auto s = parlay::sequence({1,2,3,4,5,6,7,8,9}); + s.at(9); + }, std::out_of_range); } #else