Skip to content

Commit 9b95e81

Browse files
committed
Optimize std::for_each_n for segmented iterators
1 parent 336b290 commit 9b95e81

File tree

8 files changed

+332
-72
lines changed

8 files changed

+332
-72
lines changed

libcxx/docs/ReleaseNotes/21.rst

+3
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ Improvements and New Features
6363

6464
- The ``num_put::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x.
6565

66+
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
67+
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
68+
6669
Deprecations and Removals
6770
-------------------------
6871

libcxx/include/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ set(files
2525
__algorithm/find_segment_if.h
2626
__algorithm/for_each.h
2727
__algorithm/for_each_n.h
28+
__algorithm/for_each_n_segment.h
2829
__algorithm/for_each_segment.h
2930
__algorithm/generate.h
3031
__algorithm/generate_n.h

libcxx/include/__algorithm/for_each.h

+20-25
Original file line numberDiff line numberDiff line change
@@ -9,48 +9,43 @@
99

1010
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_H
1111
#define _LIBCPP___ALGORITHM_FOR_EACH_H
12-
1312
#include <__algorithm/for_each_segment.h>
1413
#include <__config>
1514
#include <__iterator/segmented_iterator.h>
16-
#include <__ranges/movable_box.h>
17-
#include <__utility/in_place.h>
18-
#include <__utility/move.h>
15+
#include <__type_traits/enable_if.h>
1916

2017
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
2118
# pragma GCC system_header
2219
#endif
2320

24-
_LIBCPP_PUSH_MACROS
25-
#include <__undef_macros>
26-
2721
_LIBCPP_BEGIN_NAMESPACE_STD
2822

29-
template <class _InputIterator, class _Function>
30-
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function
31-
for_each(_InputIterator __first, _InputIterator __last, _Function __f) {
23+
template <class _InputIterator, class _Sent, class _Func>
24+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __for_each(_InputIterator __first, _Sent __last, _Func& __f) {
3225
for (; __first != __last; ++__first)
3326
__f(*__first);
34-
return __f;
3527
}
3628

37-
// __movable_box is available in C++20, but is actually a copyable-box, so optimization is only correct in C++23
38-
#if _LIBCPP_STD_VER >= 23
39-
template <class _SegmentedIterator, class _Function>
40-
requires __is_segmented_iterator<_SegmentedIterator>::value
41-
_LIBCPP_HIDE_FROM_ABI constexpr _Function
42-
for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function __func) {
43-
ranges::__movable_box<_Function> __wrapped_func(in_place, std::move(__func));
44-
std::__for_each_segment(__first, __last, [&](auto __lfirst, auto __llast) {
45-
__wrapped_func =
46-
ranges::__movable_box<_Function>(in_place, std::for_each(__lfirst, __llast, std::move(*__wrapped_func)));
29+
#ifndef _LIBCPP_CXX03_LANG
30+
template <class _SegmentedIterator,
31+
class _Function,
32+
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
33+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
34+
__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func) {
35+
using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
36+
std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
37+
std::__for_each(__lfirst, __llast, __func);
4738
});
48-
return std::move(*__wrapped_func);
4939
}
50-
#endif // _LIBCPP_STD_VER >= 23
40+
#endif // !_LIBCPP_CXX03_LANG
5141

52-
_LIBCPP_END_NAMESPACE_STD
42+
template <class _InputIterator, class _Function>
43+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function
44+
for_each(_InputIterator __first, _InputIterator __last, _Function __f) {
45+
std::__for_each(__first, __last, __f);
46+
return __f;
47+
}
5348

54-
_LIBCPP_POP_MACROS
49+
_LIBCPP_END_NAMESPACE_STD
5550

5651
#endif // _LIBCPP___ALGORITHM_FOR_EACH_H

libcxx/include/__algorithm/for_each_n.h

+57-7
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@
1010
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_H
1111
#define _LIBCPP___ALGORITHM_FOR_EACH_N_H
1212

13+
#include <__algorithm/for_each.h>
14+
#include <__algorithm/for_each_n_segment.h>
1315
#include <__config>
16+
#include <__iterator/iterator_traits.h>
17+
#include <__iterator/segmented_iterator.h>
18+
#include <__type_traits/enable_if.h>
1419
#include <__utility/convert_to_integral.h>
1520

1621
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -19,22 +24,67 @@
1924

2025
_LIBCPP_BEGIN_NAMESPACE_STD
2126

22-
#if _LIBCPP_STD_VER >= 17
23-
24-
template <class _InputIterator, class _Size, class _Function>
25-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
26-
for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
27+
template <class _InputIterator,
28+
class _Size,
29+
class _Func,
30+
__enable_if_t<!__has_random_access_iterator_category<_InputIterator>::value &&
31+
(!__is_segmented_iterator<_InputIterator>::value
32+
// || !__has_random_access_iterator_category<
33+
// typename __segmented_iterator_traits<_InputIterator>::__local_iterator>::value
34+
), // TODO: __segmented_iterator_traits<_InputIterator> results in template instantiation
35+
// during SFINAE, which is a hard error to be fixed. Once fixed, we should uncomment.
36+
int> = 0>
37+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
38+
__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) {
2739
typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
2840
_IntegralSize __n = __orig_n;
2941
while (__n > 0) {
3042
__f(*__first);
3143
++__first;
3244
--__n;
3345
}
34-
return __first;
46+
return std::move(__first);
3547
}
3648

37-
#endif
49+
template <class _RandIter,
50+
class _Size,
51+
class _Func,
52+
__enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
53+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
54+
__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) {
55+
typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n;
56+
auto __last = __first + __n;
57+
std::__for_each(__first, __last, __f);
58+
return std::move(__last);
59+
}
60+
61+
#ifndef _LIBCPP_CXX03_LANG
62+
template <class _SegmentedIterator,
63+
class _Size,
64+
class _Func,
65+
__enable_if_t<!__has_random_access_iterator_category<_SegmentedIterator>::value &&
66+
__is_segmented_iterator<_SegmentedIterator>::value &&
67+
__has_random_access_iterator_category<
68+
typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
69+
int> = 0>
70+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
71+
__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) {
72+
using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
73+
return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
74+
std::__for_each(__lfirst, __llast, __f);
75+
});
76+
}
77+
#endif // !_LIBCPP_CXX03_LANG
78+
79+
#if _LIBCPP_STD_VER >= 17
80+
81+
template <class _InputIterator, class _Size, class _Function>
82+
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
83+
for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
84+
return std::__for_each_n(__first, __orig_n, __f);
85+
}
86+
87+
#endif // _LIBCPP_STD_VER >= 17
3888

3989
_LIBCPP_END_NAMESPACE_STD
4090

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
10+
#define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
11+
12+
#include <__config>
13+
#include <__iterator/iterator_traits.h>
14+
#include <__iterator/segmented_iterator.h>
15+
16+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
17+
# pragma GCC system_header
18+
#endif
19+
20+
_LIBCPP_BEGIN_NAMESPACE_STD
21+
22+
// __for_each_n_segment optimizes linear iteration over segmented iterators. It processes a segmented
23+
// input range [__first, __first + __n) by applying the functor __func to each element within the segment.
24+
// The return value of __func is ignored, and the function returns an iterator pointing to one past the
25+
// last processed element in the input range.
26+
27+
template <class _SegmentedIterator, class _Size, class _Functor>
28+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
29+
__for_each_n_segment(_SegmentedIterator __first, _Size __orig_n, _Functor __func) {
30+
static_assert(__is_segmented_iterator<_SegmentedIterator>::value &&
31+
__has_random_access_iterator_category<
32+
typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
33+
"__for_each_n_segment only works with segmented iterators with random-access local iterators");
34+
if (__orig_n <= 0)
35+
return __first;
36+
37+
using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
38+
using __local_iter_t = typename _Traits::__local_iterator;
39+
using __difference_t = typename std::iterator_traits<__local_iter_t>::difference_type;
40+
__difference_t __n = __orig_n;
41+
auto __seg = _Traits::__segment(__first);
42+
auto __local_first = _Traits::__local(__first);
43+
__local_iter_t __local_last;
44+
45+
while (__n > 0) {
46+
__local_last = _Traits::__end(__seg);
47+
auto __seg_size = __local_last - __local_first;
48+
if (__n <= __seg_size) {
49+
__local_last = __local_first + __n;
50+
__func(__local_first, __local_last);
51+
break;
52+
}
53+
__func(__local_first, __local_last);
54+
__n -= __seg_size;
55+
__local_first = _Traits::__begin(++__seg);
56+
}
57+
58+
return _Traits::__compose(__seg, __local_last);
59+
}
60+
61+
_LIBCPP_END_NAMESPACE_STD
62+
63+
#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H

libcxx/include/module.modulemap

+1
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ module std [system] {
436436
module find_segment_if { header "__algorithm/find_segment_if.h" }
437437
module find { header "__algorithm/find.h" }
438438
module for_each_n { header "__algorithm/for_each_n.h" }
439+
module for_each_n_segment { header "__algorithm/for_each_n_segment.h" }
439440
module for_each_segment { header "__algorithm/for_each_segment.h" }
440441
module for_each { header "__algorithm/for_each.h" }
441442
module generate_n { header "__algorithm/generate_n.h" }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17
10+
11+
#include <algorithm>
12+
#include <cstddef>
13+
#include <deque>
14+
#include <list>
15+
#include <ranges>
16+
#include <string>
17+
#include <vector>
18+
19+
#include <benchmark/benchmark.h>
20+
21+
int main(int argc, char** argv) {
22+
auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); };
23+
24+
// std::for_each_n
25+
{
26+
auto bm = []<class Container>(std::string name, auto for_each_n) {
27+
using ElemType = typename Container::value_type;
28+
benchmark::RegisterBenchmark(
29+
name,
30+
[for_each_n](auto& st) {
31+
std::size_t const n = st.range(0);
32+
Container c(n, 1);
33+
auto first = c.begin();
34+
35+
for ([[maybe_unused]] auto _ : st) {
36+
benchmark::DoNotOptimize(c);
37+
auto result = for_each_n(first, n, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
38+
benchmark::DoNotOptimize(result);
39+
}
40+
})
41+
->Arg(8)
42+
->Arg(32)
43+
->Arg(50) // non power-of-two
44+
->Arg(1024)
45+
->Arg(4096)
46+
->Arg(8192)
47+
->Arg(1 << 14)
48+
->Arg(1 << 16)
49+
->Arg(1 << 18);
50+
};
51+
bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
52+
bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
53+
bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);
54+
}
55+
56+
// std::for_each_n for join_view
57+
{
58+
auto bm = []<class Container>(std::string name, auto for_each_n) {
59+
using C1 = typename Container::value_type;
60+
using ElemType = typename C1::value_type;
61+
benchmark::RegisterBenchmark(
62+
name,
63+
[for_each_n](auto& st) {
64+
std::size_t const size = st.range(0);
65+
std::size_t const seg_size = 256;
66+
std::size_t const segments = (size + seg_size - 1) / seg_size;
67+
Container c(segments);
68+
for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
69+
c[i].resize(std::min(seg_size, n), ElemType(1));
70+
}
71+
72+
auto view = c | std::views::join;
73+
auto first = view.begin();
74+
75+
for ([[maybe_unused]] auto _ : st) {
76+
benchmark::DoNotOptimize(c);
77+
auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
78+
benchmark::DoNotOptimize(result);
79+
}
80+
})
81+
->Arg(8)
82+
->Arg(32)
83+
->Arg(50) // non power-of-two
84+
->Arg(1024)
85+
->Arg(4096)
86+
->Arg(8192)
87+
->Arg(1 << 14)
88+
->Arg(1 << 16)
89+
->Arg(1 << 18);
90+
};
91+
bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
92+
}
93+
94+
benchmark::Initialize(&argc, argv);
95+
benchmark::RunSpecifiedBenchmarks();
96+
benchmark::Shutdown();
97+
return 0;
98+
}

0 commit comments

Comments
 (0)