Skip to content

[libc++] Optimize ranges::minmax #87335

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libcxx/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ set(BENCHMARK_TESTS
algorithms/make_heap.bench.cpp
algorithms/make_heap_then_sort_heap.bench.cpp
algorithms/min.bench.cpp
algorithms/minmax.bench.cpp
algorithms/min_max_element.bench.cpp
algorithms/mismatch.bench.cpp
algorithms/pop_heap.bench.cpp
Expand Down
68 changes: 68 additions & 0 deletions libcxx/benchmarks/algorithms/minmax.bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#include <algorithm>
#include <cassert>

#include <benchmark/benchmark.h>

void run_sizes(auto benchmark) {
benchmark->Arg(1)
->Arg(2)
->Arg(3)
->Arg(4)
->Arg(5)
->Arg(6)
->Arg(7)
->Arg(8)
->Arg(9)
->Arg(10)
->Arg(11)
->Arg(12)
->Arg(13)
->Arg(14)
->Arg(15)
->Arg(16)
->Arg(17)
->Arg(18)
->Arg(19)
->Arg(20)
->Arg(21)
->Arg(22)
->Arg(23)
->Arg(24)
->Arg(25)
->Arg(26)
->Arg(27)
->Arg(28)
->Arg(29)
->Arg(30)
->Arg(31)
->Arg(32)
->Arg(64)
->Arg(512)
->Arg(1024)
->Arg(4000)
->Arg(4096)
->Arg(5500)
->Arg(64000)
->Arg(65536)
->Arg(70000);
}

template <class T>
static void BM_std_minmax(benchmark::State& state) {
std::vector<T> vec(state.range(), 3);

for (auto _ : state) {
benchmark::DoNotOptimize(vec);
benchmark::DoNotOptimize(std::ranges::minmax(vec));
}
}
BENCHMARK(BM_std_minmax<char>)->Apply(run_sizes);
BENCHMARK(BM_std_minmax<short>)->Apply(run_sizes);
BENCHMARK(BM_std_minmax<int>)->Apply(run_sizes);
BENCHMARK(BM_std_minmax<long long>)->Apply(run_sizes);
BENCHMARK(BM_std_minmax<unsigned char>)->Apply(run_sizes);
BENCHMARK(BM_std_minmax<unsigned short>)->Apply(run_sizes);
BENCHMARK(BM_std_minmax<unsigned int>)->Apply(run_sizes);
BENCHMARK(BM_std_minmax<unsigned long long>)->Apply(run_sizes);

BENCHMARK_MAIN();
2 changes: 2 additions & 0 deletions libcxx/docs/ReleaseNotes/19.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ Improvements and New Features
resulting in a performance increase of up to 1400x.
- The ``std::mismatch`` algorithm has been optimized for integral types, which can lead up to 40x performance
improvements.
- The ``std::ranges::minmax`` algorithm has been optimized for integral types, resulting in a performance increase of
up to 100x.

- The ``_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM`` macro has been added to make the declarations in ``<strstream>`` available.

Expand Down
3 changes: 3 additions & 0 deletions libcxx/include/__algorithm/comp.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ struct __less<void, void> {
}
};

template <class _Tp>
inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true;

_LIBCPP_END_NAMESPACE_STD

#endif // _LIBCPP___ALGORITHM_COMP_H
17 changes: 16 additions & 1 deletion libcxx/include/__algorithm/ranges_minmax.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
#include <__iterator/projected.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
#include <__type_traits/desugars_to.h>
#include <__type_traits/is_reference.h>
#include <__type_traits/is_trivially_copyable.h>
#include <__type_traits/remove_cvref.h>
#include <__utility/forward.h>
#include <__utility/move.h>
Expand Down Expand Up @@ -83,7 +85,20 @@ struct __fn {

_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__first != __last, "range has to contain at least one element");

if constexpr (forward_range<_Range>) {
// This optimiation is not in minmax_element because clang doesn't see through the pointers and as a result doesn't
// vectorize the code.
if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> &&
__is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value &&
__desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) {
minmax_result<_ValueT> __result = {__r[0], __r[0]};
for (auto __e : __r) {
if (__e < __result.min)
__result.min = __e;
if (__result.max < __e)
__result.max = __e;
}
return __result;
} else if constexpr (forward_range<_Range>) {
// Special-case the one element case. Avoid repeatedly initializing objects from the result of an iterator
// dereference when doing so might not be idempotent. The `if constexpr` avoids the extra branch in cases where
// it's not needed.
Expand Down
6 changes: 6 additions & 0 deletions libcxx/include/__functional/operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> {
};
_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less);

template <class _Tp>
inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true;

#if _LIBCPP_STD_VER >= 14
template <>
struct _LIBCPP_TEMPLATE_VIS less<void> {
Expand All @@ -370,6 +373,9 @@ struct _LIBCPP_TEMPLATE_VIS less<void> {
}
typedef void is_transparent;
};

template <class _Tp>
inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Tp> = true;
#endif

#if _LIBCPP_STD_VER >= 14
Expand Down
3 changes: 3 additions & 0 deletions libcxx/include/__functional/ranges_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ struct greater_equal {
template <class _Tp, class _Up>
inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = true;

template <class _Tp, class _Up>
inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true;

#endif // _LIBCPP_STD_VER >= 20

_LIBCPP_END_NAMESPACE_STD
Expand Down
1 change: 1 addition & 0 deletions libcxx/include/__type_traits/desugars_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// Tags to represent the canonical operations
struct __equal_tag {};
struct __plus_tag {};
struct __less_tag {};

// This class template is used to determine whether an operation "desugars"
// (or boils down) to a given canonical operation.
Expand Down