Skip to content

Commit b366e93

Browse files
committed
Refactor to simplify logic of for_each_n_segment.h
1 parent 8db6dd0 commit b366e93

File tree

5 files changed

+19
-61
lines changed

5 files changed

+19
-61
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,29 +64,22 @@ Improvements and New Features
6464

6565
- The ``num_put::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x.
6666

67-
<<<<<<< HEAD
6867
- The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance
6968
up to 10x, depending on type of sorted elements and the initial state of the sorted array.
7069

7170
- The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available
7271
in C++23 and later.
73-
=======
74-
- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators,
75-
<<<<<<< HEAD
76-
resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` segmented inputs and 24.9x for
77-
``join_view`` of ``vector<vector<T>>``.
78-
>>>>>>> 50ac206d4a13 (Apply optimization for join_view segmented iterators)
79-
=======
80-
resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` and 24.9x for ``join_view`` of
81-
``vector<vector<char>>``.
82-
>>>>>>> 590136ba0d9f (Fix review comments)
8372

8473
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
8574
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
8675

8776
- The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets
8877
with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively.
8978

79+
- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators,
80+
resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` and 24.9x for ``join_view`` of
81+
``vector<vector<char>>``.
82+
9083
Deprecations and Removals
9184
-------------------------
9285

libcxx/include/__algorithm/for_each.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,15 @@
1616
#include <__iterator/segmented_iterator.h>
1717
#include <__type_traits/enable_if.h>
1818
#include <__type_traits/invoke.h>
19+
#include <__utility/move.h>
1920

2021
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
2122
# pragma GCC system_header
2223
#endif
2324

25+
_LIBCPP_PUSH_MACROS
26+
#include <__undef_macros>
27+
2428
_LIBCPP_BEGIN_NAMESPACE_STD
2529

2630
template <class _InputIterator, class _Sent, class _Func, class _Proj>
@@ -36,12 +40,13 @@ template <class _SegmentedIterator,
3640
class _Func,
3741
class _Proj,
3842
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
39-
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
40-
__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func, _Proj& __proj) {
43+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
44+
__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Func& __func, _Proj& __proj) {
4145
using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
4246
std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
4347
std::__for_each(__lfirst, __llast, __func, __proj);
4448
});
49+
return __last;
4550
}
4651
#endif // !_LIBCPP_CXX03_LANG
4752

@@ -55,4 +60,6 @@ for_each(_InputIterator __first, _InputIterator __last, _Func __f) {
5560

5661
_LIBCPP_END_NAMESPACE_STD
5762

63+
_LIBCPP_POP_MACROS
64+
5865
#endif // _LIBCPP___ALGORITHM_FOR_EACH_H

libcxx/include/__algorithm/for_each_n.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,11 @@
1515
#include <__config>
1616
#include <__functional/identity.h>
1717
#include <__iterator/iterator_traits.h>
18-
#include <__iterator/next.h>
1918
#include <__iterator/segmented_iterator.h>
2019
#include <__type_traits/disjunction.h>
2120
#include <__type_traits/enable_if.h>
22-
#include <__type_traits/negation.h>
2321
#include <__type_traits/invoke.h>
22+
#include <__type_traits/negation.h>
2423
#include <__utility/convert_to_integral.h>
2524
#include <__utility/move.h>
2625

@@ -59,11 +58,11 @@ template <class _RandIter,
5958
class _Proj,
6059
__enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
6160
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
62-
__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) {
61+
__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f, _Proj& __proj) {
6362
typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n;
6463
auto __last = __first + __n;
6564
std::__for_each(__first, __last, __f, __proj);
66-
return std::move(__last);
65+
return __last;
6766
}
6867

6968
#ifndef _LIBCPP_CXX03_LANG

libcxx/include/__algorithm/for_each_n_segment.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,7 @@
1010
#define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
1111

1212
#include <__config>
13-
<<<<<<< HEAD
1413
#include <__iterator/iterator_traits.h>
15-
=======
16-
#include <__iterator/distance.h>
17-
#include <__iterator/iterator_traits.h>
18-
#include <__iterator/next.h>
19-
>>>>>>> 4a86118918e8 (Fix review comments)
2014
#include <__iterator/segmented_iterator.h>
2115

2216
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)

libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp

Lines changed: 3 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -51,44 +51,9 @@ int main(int argc, char** argv) {
5151
bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
5252
bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
5353
bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);
54-
}
55-
56-
// std::for_each_n for join_view
57-
{
58-
auto bm = []<class Container>(std::string name, auto for_each_n) {
59-
using C1 = typename Container::value_type;
60-
using ElemType = typename C1::value_type;
61-
benchmark::RegisterBenchmark(
62-
name,
63-
[for_each_n](auto& st) {
64-
std::size_t const size = st.range(0);
65-
std::size_t const seg_size = 256;
66-
std::size_t const segments = (size + seg_size - 1) / seg_size;
67-
Container c(segments);
68-
for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
69-
c[i].resize(std::min(seg_size, n), ElemType(1));
70-
}
71-
72-
auto view = c | std::views::join;
73-
auto first = view.begin();
74-
75-
for ([[maybe_unused]] auto _ : st) {
76-
benchmark::DoNotOptimize(c);
77-
auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
78-
benchmark::DoNotOptimize(result);
79-
}
80-
})
81-
->Arg(8)
82-
->Arg(32)
83-
->Arg(50) // non power-of-two
84-
->Arg(1024)
85-
->Arg(4096)
86-
->Arg(8192)
87-
->Arg(1 << 14)
88-
->Arg(1 << 16)
89-
->Arg(1 << 18);
90-
};
91-
bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
54+
bm.operator()<std::vector<int>>("rng::for_each_n(vector<int>)", std::ranges::for_each_n);
55+
bm.operator()<std::deque<int>>("rng::for_each_n(deque<int>)", std::ranges::for_each_n);
56+
bm.operator()<std::list<int>>("rng::for_each_n(list<int>)", std::ranges::for_each_n);
9257
}
9358

9459
// {std,ranges}::for_each_n for join_view

0 commit comments

Comments
 (0)