Skip to content

Commit 09c266b

Browse files
authored
[libc++] Optimize std::for_each_n for segmented iterators (#135468)
This patch enhances the performance of `std::for_each_n` when used with segmented iterators, leading to significant performance improvements, summarized in the tables below. This addresses a subtask of #102817.
1 parent 5a3776a commit 09c266b

File tree

8 files changed

+333
-47
lines changed

8 files changed

+333
-47
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ Improvements and New Features
7070
- The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available
7171
in C++23 and later.
7272

73+
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
74+
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
75+
7376
Deprecations and Removals
7477
-------------------------
7578

libcxx/include/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ set(files
2525
__algorithm/find_segment_if.h
2626
__algorithm/for_each.h
2727
__algorithm/for_each_n.h
28+
__algorithm/for_each_n_segment.h
2829
__algorithm/for_each_segment.h
2930
__algorithm/generate.h
3031
__algorithm/generate_n.h

libcxx/include/__algorithm/for_each_n.h

Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,32 +10,87 @@
1010
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_H
1111
#define _LIBCPP___ALGORITHM_FOR_EACH_N_H
1212

13+
#include <__algorithm/for_each.h>
14+
#include <__algorithm/for_each_n_segment.h>
1315
#include <__config>
16+
#include <__iterator/iterator_traits.h>
17+
#include <__iterator/segmented_iterator.h>
18+
#include <__type_traits/disjunction.h>
19+
#include <__type_traits/enable_if.h>
20+
#include <__type_traits/negation.h>
1421
#include <__utility/convert_to_integral.h>
22+
#include <__utility/move.h>
1523

1624
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
1725
# pragma GCC system_header
1826
#endif
1927

20-
_LIBCPP_BEGIN_NAMESPACE_STD
28+
_LIBCPP_PUSH_MACROS
29+
#include <__undef_macros>
2130

22-
#if _LIBCPP_STD_VER >= 17
31+
_LIBCPP_BEGIN_NAMESPACE_STD
2332

24-
template <class _InputIterator, class _Size, class _Function>
25-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
26-
for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
33+
template <class _InputIterator,
34+
class _Size,
35+
class _Func,
36+
__enable_if_t<!__has_random_access_iterator_category<_InputIterator>::value &&
37+
_Or< _Not<__is_segmented_iterator<_InputIterator> >,
38+
_Not<__has_random_access_local_iterator<_InputIterator> > >::value,
39+
int> = 0>
40+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
41+
__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) {
2742
typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
2843
_IntegralSize __n = __orig_n;
2944
while (__n > 0) {
3045
__f(*__first);
3146
++__first;
3247
--__n;
3348
}
34-
return __first;
49+
return std::move(__first);
3550
}
3651

37-
#endif
52+
template <class _RandIter,
53+
class _Size,
54+
class _Func,
55+
__enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
56+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
57+
__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) {
58+
typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n;
59+
auto __last = __first + __n;
60+
std::__for_each(__first, __last, __f);
61+
return std::move(__last);
62+
}
63+
64+
#ifndef _LIBCPP_CXX03_LANG
65+
template <class _SegmentedIterator,
66+
class _Size,
67+
class _Func,
68+
__enable_if_t<!__has_random_access_iterator_category<_SegmentedIterator>::value &&
69+
__is_segmented_iterator<_SegmentedIterator>::value &&
70+
__has_random_access_iterator_category<
71+
typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
72+
int> = 0>
73+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
74+
__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) {
75+
using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
76+
return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
77+
std::__for_each(__lfirst, __llast, __f);
78+
});
79+
}
80+
#endif // !_LIBCPP_CXX03_LANG
81+
82+
#if _LIBCPP_STD_VER >= 17
83+
84+
template <class _InputIterator, class _Size, class _Function>
85+
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
86+
for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
87+
return std::__for_each_n(__first, __orig_n, __f);
88+
}
89+
90+
#endif // _LIBCPP_STD_VER >= 17
3891

3992
_LIBCPP_END_NAMESPACE_STD
4093

94+
_LIBCPP_POP_MACROS
95+
4196
#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_H
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
10+
#define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
11+
12+
#include <__config>
13+
#include <__iterator/iterator_traits.h>
14+
#include <__iterator/segmented_iterator.h>
15+
16+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
17+
# pragma GCC system_header
18+
#endif
19+
20+
_LIBCPP_BEGIN_NAMESPACE_STD
21+
22+
// __for_each_n_segment optimizes linear iteration over segmented iterators. It processes a segmented
23+
// input range [__first, __first + __n) by applying the functor __func to each element within the segment.
24+
// The return value of __func is ignored, and the function returns an iterator pointing to one past the
25+
// last processed element in the input range.
26+
27+
template <class _SegmentedIterator, class _Size, class _Functor>
28+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
29+
__for_each_n_segment(_SegmentedIterator __first, _Size __orig_n, _Functor __func) {
30+
static_assert(__is_segmented_iterator<_SegmentedIterator>::value &&
31+
__has_random_access_iterator_category<
32+
typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
33+
"__for_each_n_segment only works with segmented iterators with random-access local iterators");
34+
if (__orig_n <= 0)
35+
return __first;
36+
37+
using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
38+
using __local_iter_t = typename _Traits::__local_iterator;
39+
using __difference_t = typename std::iterator_traits<__local_iter_t>::difference_type;
40+
__difference_t __n = __orig_n;
41+
auto __seg = _Traits::__segment(__first);
42+
auto __local_first = _Traits::__local(__first);
43+
__local_iter_t __local_last;
44+
45+
while (__n > 0) {
46+
__local_last = _Traits::__end(__seg);
47+
auto __seg_size = __local_last - __local_first;
48+
if (__n <= __seg_size) {
49+
__local_last = __local_first + __n;
50+
__func(__local_first, __local_last);
51+
break;
52+
}
53+
__func(__local_first, __local_last);
54+
__n -= __seg_size;
55+
__local_first = _Traits::__begin(++__seg);
56+
}
57+
58+
return _Traits::__compose(__seg, __local_last);
59+
}
60+
61+
_LIBCPP_END_NAMESPACE_STD
62+
63+
#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H

libcxx/include/__iterator/segmented_iterator.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
#include <__config>
4444
#include <__cstddef/size_t.h>
45+
#include <__iterator/iterator_traits.h>
4546
#include <__type_traits/integral_constant.h>
4647

4748
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -74,6 +75,11 @@ struct __has_specialization<_Tp, sizeof(_Tp) * 0> : true_type {};
7475
template <class _Iterator>
7576
using __is_segmented_iterator _LIBCPP_NODEBUG = __has_specialization<__segmented_iterator_traits<_Iterator> >;
7677

78+
template <class _SegmentedIterator>
79+
struct __has_random_access_local_iterator
80+
: __has_random_access_iterator_category<
81+
typename __segmented_iterator_traits< _SegmentedIterator >::__local_iterator > {};
82+
7783
_LIBCPP_END_NAMESPACE_STD
7884

7985
#endif // _LIBCPP___SEGMENTED_ITERATOR_H

libcxx/include/module.modulemap.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,7 @@ module std [system] {
437437
module find_segment_if { header "__algorithm/find_segment_if.h" }
438438
module find { header "__algorithm/find.h" }
439439
module for_each_n { header "__algorithm/for_each_n.h" }
440+
module for_each_n_segment { header "__algorithm/for_each_n_segment.h" }
440441
module for_each_segment { header "__algorithm/for_each_segment.h" }
441442
module for_each { header "__algorithm/for_each.h" }
442443
module generate_n { header "__algorithm/generate_n.h" }
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17
10+
11+
#include <algorithm>
12+
#include <cstddef>
13+
#include <deque>
14+
#include <list>
15+
#include <ranges>
16+
#include <string>
17+
#include <vector>
18+
19+
#include <benchmark/benchmark.h>
20+
21+
int main(int argc, char** argv) {
22+
auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); };
23+
24+
// std::for_each_n
25+
{
26+
auto bm = []<class Container>(std::string name, auto for_each_n) {
27+
using ElemType = typename Container::value_type;
28+
benchmark::RegisterBenchmark(
29+
name,
30+
[for_each_n](auto& st) {
31+
std::size_t const n = st.range(0);
32+
Container c(n, 1);
33+
auto first = c.begin();
34+
35+
for ([[maybe_unused]] auto _ : st) {
36+
benchmark::DoNotOptimize(c);
37+
auto result = for_each_n(first, n, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
38+
benchmark::DoNotOptimize(result);
39+
}
40+
})
41+
->Arg(8)
42+
->Arg(32)
43+
->Arg(50) // non power-of-two
44+
->Arg(1024)
45+
->Arg(4096)
46+
->Arg(8192)
47+
->Arg(1 << 14)
48+
->Arg(1 << 16)
49+
->Arg(1 << 18);
50+
};
51+
bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
52+
bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
53+
bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);
54+
}
55+
56+
// std::for_each_n for join_view
57+
{
58+
auto bm = []<class Container>(std::string name, auto for_each_n) {
59+
using C1 = typename Container::value_type;
60+
using ElemType = typename C1::value_type;
61+
benchmark::RegisterBenchmark(
62+
name,
63+
[for_each_n](auto& st) {
64+
std::size_t const size = st.range(0);
65+
std::size_t const seg_size = 256;
66+
std::size_t const segments = (size + seg_size - 1) / seg_size;
67+
Container c(segments);
68+
for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
69+
c[i].resize(std::min(seg_size, n), ElemType(1));
70+
}
71+
72+
auto view = c | std::views::join;
73+
auto first = view.begin();
74+
75+
for ([[maybe_unused]] auto _ : st) {
76+
benchmark::DoNotOptimize(c);
77+
auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
78+
benchmark::DoNotOptimize(result);
79+
}
80+
})
81+
->Arg(8)
82+
->Arg(32)
83+
->Arg(50) // non power-of-two
84+
->Arg(1024)
85+
->Arg(4096)
86+
->Arg(8192)
87+
->Arg(1 << 14)
88+
->Arg(1 << 16)
89+
->Arg(1 << 18);
90+
};
91+
bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
92+
}
93+
94+
benchmark::Initialize(&argc, argv);
95+
benchmark::RunSpecifiedBenchmarks();
96+
benchmark::Shutdown();
97+
return 0;
98+
}

0 commit comments

Comments
 (0)