-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[libc++] Replace __libcpp_{ctz, clz} with __builtin_{ctzg, clzg} #133920
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
8fd4fb2
to
0e45298
Compare
0e45298
to
a61ad9e
Compare
cf38f04
to
60bbd97
Compare
The MacOS 13 CI is currently blocking this PR:
Based on the CI logs, the environment is using AppleClang 15 (with Xcode 15.2), which does not support
|
@llvm/pr-subscribers-libcxx Author: Peng Liu (winner245) Changes
Closes #131179. Full diff: https://github.com/llvm/llvm-project/pull/133920.diff 7 Files Affected:
diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h
index 4332b62544b40..77f347d37660c 100644
--- a/libcxx/include/__algorithm/sort.h
+++ b/libcxx/include/__algorithm/sort.h
@@ -357,10 +357,10 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
// Swap one pair on each iteration as long as both bitsets have at least one
// element for swapping.
while (__left_bitset != 0 && __right_bitset != 0) {
- difference_type __tz_left = __libcpp_ctz(__left_bitset);
- __left_bitset = __libcpp_blsr(__left_bitset);
- difference_type __tz_right = __libcpp_ctz(__right_bitset);
- __right_bitset = __libcpp_blsr(__right_bitset);
+ difference_type __tz_left = std::__countr_zero(__left_bitset);
+ __left_bitset = std::__libcpp_blsr(__left_bitset);
+ difference_type __tz_right = std::__countr_zero(__right_bitset);
+ __right_bitset = std::__libcpp_blsr(__right_bitset);
_Ops::iter_swap(__first + __tz_left, __last - __tz_right);
}
}
@@ -456,7 +456,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
// Swap within the left side. Need to find set positions in the reverse
// order.
while (__left_bitset != 0) {
- difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
+ difference_type __tz_left = __detail::__block_size - 1 - std::__countl_zero(__left_bitset);
__left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
_RandomAccessIterator __it = __first + __tz_left;
if (__it != __lm1) {
@@ -469,7 +469,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
// Swap within the right side. Need to find set positions in the reverse
// order.
while (__right_bitset != 0) {
- difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
+ difference_type __tz_right = __detail::__block_size - 1 - std::__countl_zero(__right_bitset);
__right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
_RandomAccessIterator __it = __lm1 - __tz_right;
if (__it != __first) {
@@ -831,11 +831,11 @@ inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
if (__n == 0)
return 0;
if (sizeof(__n) <= sizeof(unsigned))
- return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
+ return sizeof(unsigned) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned>(__n));
if (sizeof(__n) <= sizeof(unsigned long))
- return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
+ return sizeof(unsigned long) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned long>(__n));
if (sizeof(__n) <= sizeof(unsigned long long))
- return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
+ return sizeof(unsigned long long) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned long long>(__n));
_Number __log2 = 0;
while (__n > 1) {
diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index d4df1d049b294..9c37749f92577 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
-// refactor this code to exclusively use __builtin_clzg.
-
#ifndef _LIBCPP___BIT_COUNTL_H
#define _LIBCPP___BIT_COUNTL_H
@@ -27,69 +24,10 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
- return __builtin_clz(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
- return __builtin_clzl(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
- return __builtin_clzll(__x);
-}
-
-#if _LIBCPP_HAS_INT128
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT {
-# if __has_builtin(__builtin_clzg)
- return __builtin_clzg(__x);
-# else
- // The function is written in this form due to C++ constexpr limitations.
- // The algorithm:
- // - Test whether any bit in the high 64-bits is set
- // - No bits set:
- // - The high 64-bits contain 64 leading zeros,
- // - Add the result of the low 64-bits.
- // - Any bits set:
- // - The number of leading zeros of the input is the number of leading
- // zeros in the high 64-bits.
- return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
- : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
-# endif
-}
-#endif // _LIBCPP_HAS_INT128
-
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
-#if __has_builtin(__builtin_clzg)
return __builtin_clzg(__t, numeric_limits<_Tp>::digits);
-#else // __has_builtin(__builtin_clzg)
- if (__t == 0)
- return numeric_limits<_Tp>::digits;
-
- if (sizeof(_Tp) <= sizeof(unsigned int))
- return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
- (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
- else if (sizeof(_Tp) <= sizeof(unsigned long))
- return std::__libcpp_clz(static_cast<unsigned long>(__t)) -
- (numeric_limits<unsigned long>::digits - numeric_limits<_Tp>::digits);
- else if (sizeof(_Tp) <= sizeof(unsigned long long))
- return std::__libcpp_clz(static_cast<unsigned long long>(__t)) -
- (numeric_limits<unsigned long long>::digits - numeric_limits<_Tp>::digits);
- else {
- int __ret = 0;
- int __iter = 0;
- const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
- while (true) {
- __t = std::__rotl(__t, __ulldigits);
- if ((__iter = std::__countl_zero(static_cast<unsigned long long>(__t))) != __ulldigits)
- break;
- __ret += __iter;
- }
- return __ret + __iter;
- }
-#endif // __has_builtin(__builtin_clzg)
}
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index 46c43921fc60d..f87175971bed3 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -6,14 +6,10 @@
//
//===----------------------------------------------------------------------===//
-// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
-// refactor this code to exclusively use __builtin_ctzg.
-
#ifndef _LIBCPP___BIT_COUNTR_H
#define _LIBCPP___BIT_COUNTR_H
#include <__assert>
-#include <__bit/rotate.h>
#include <__concepts/arithmetic.h>
#include <__config>
#include <__type_traits/is_unsigned.h>
@@ -28,55 +24,10 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
- return __builtin_ctz(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
- return __builtin_ctzl(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
- return __builtin_ctzll(__x);
-}
-
-// A constexpr implementation for C++11 and later (using clang extensions for constexpr support)
-// Precondition: __t != 0 (the caller __countr_zero handles __t == 0 as a special case)
-template <class _Tp>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero_impl(_Tp __t) _NOEXCEPT {
- _LIBCPP_ASSERT_INTERNAL(__t != 0, "__countr_zero_impl called with zero value");
- static_assert(is_unsigned<_Tp>::value, "__countr_zero_impl only works with unsigned types");
- if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned int)) {
- return std::__libcpp_ctz(static_cast<unsigned int>(__t));
- } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long)) {
- return std::__libcpp_ctz(static_cast<unsigned long>(__t));
- } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long long)) {
- return std::__libcpp_ctz(static_cast<unsigned long long>(__t));
- } else {
-#if _LIBCPP_STD_VER == 11
- unsigned long long __ull = static_cast<unsigned long long>(__t);
- const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
- return __ull == 0ull ? __ulldigits + std::__countr_zero_impl<_Tp>(__t >> __ulldigits) : std::__libcpp_ctz(__ull);
-#else
- int __ret = 0;
- const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
- while (static_cast<unsigned long long>(__t) == 0uLL) {
- __ret += __ulldigits;
- __t >>= __ulldigits;
- }
- return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
-#endif
- }
-}
-
template <class _Tp>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT {
static_assert(is_unsigned<_Tp>::value, "__countr_zero only works with unsigned types");
-#if __has_builtin(__builtin_ctzg) // TODO (LLVM 21): This can be dropped once we only support Clang >= 19.
return __builtin_ctzg(__t, numeric_limits<_Tp>::digits);
-#else
- return __t != 0 ? std::__countr_zero_impl(__t) : numeric_limits<_Tp>::digits;
-#endif
}
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 552e0f5e4d799..a3e6defd405f8 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -165,7 +165,7 @@ public:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; }
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT {
- return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
+ return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
}
private:
@@ -234,7 +234,7 @@ public:
}
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT {
- return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
+ return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
}
private:
diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h
index 710299df9b4da..9959d61336833 100644
--- a/libcxx/include/__charconv/to_chars_integral.h
+++ b/libcxx/include/__charconv/to_chars_integral.h
@@ -118,9 +118,8 @@ struct _LIBCPP_HIDDEN __integral<2> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
// If value == 0 still need one digit. If the value != this has no
- // effect since the code scans for the most significant bit set. (Note
- // that __libcpp_clz doesn't work for 0.)
- return numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1);
+ // effect since the code scans for the most significant bit set.
+ return numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1);
}
template <typename _Tp>
@@ -154,9 +153,8 @@ struct _LIBCPP_HIDDEN __integral<8> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
// If value == 0 still need one digit. If the value != this has no
- // effect since the code scans for the most significat bit set. (Note
- // that __libcpp_clz doesn't work for 0.)
- return ((numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1)) + 2) / 3;
+ // effect since the code scans for the most significat bit set.
+ return ((numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1)) + 2) / 3;
}
template <typename _Tp>
@@ -190,9 +188,8 @@ struct _LIBCPP_HIDDEN __integral<16> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
// If value == 0 still need one digit. If the value != this has no
- // effect since the code scans for the most significat bit set. (Note
- // that __libcpp_clz doesn't work for 0.)
- return (numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1) + 3) / 4;
+ // effect since the code scans for the most significat bit set.
+ return (numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1) + 3) / 4;
}
template <typename _Tp>
diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h
index dd1fa2354436a..46a3a0f3d2e39 100644
--- a/libcxx/include/__charconv/traits.h
+++ b/libcxx/include/__charconv/traits.h
@@ -45,12 +45,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uin
///
/// The algorithm is based on
/// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
- /// function requires its input to have at least one bit set the value of
- /// zero is set to one. This means the first element of the lookup table is
- /// zero.
+ /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+ /// used __libcpp_clz. Since that function requires its input to have at
+ /// least one bit set the value of zero is set to one. This means the first
+ /// element of the lookup table is zero.
static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
- auto __t = (32 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
+ auto __t = (32 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
return __t - (__v < __itoa::__pow10_32[__t]) + 1;
}
@@ -71,12 +71,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uin
///
/// The algorithm is based on
/// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
- /// function requires its input to have at least one bit set the value of
- /// zero is set to one. This means the first element of the lookup table is
- /// zero.
+ /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+ /// used __libcpp_clz. Since that function requires its input to have at
+ /// least one bit set the value of zero is set to one. This means the first
+ /// element of the lookup table is zero.
static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
- auto __t = (64 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
+ auto __t = (64 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
return __t - (__v < __itoa::__pow10_64[__t]) + 1;
}
@@ -98,15 +98,15 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__u
///
/// The algorithm is based on
/// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
- /// function requires its input to have at least one bit set the value of
- /// zero is set to one. This means the first element of the lookup table is
- /// zero.
+ /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+ /// used __libcpp_clz. Since that function requires its input to have at
+ /// least one bit set the value of zero is set to one. This means the first
+ /// element of the lookup table is zero.
static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
_LIBCPP_ASSERT_INTERNAL(
__v > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true.");
// There's always a bit set in the upper 64-bits.
- auto __t = (128 - std::__libcpp_clz(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
+ auto __t = (128 - std::__countl_zero(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
_LIBCPP_ASSERT_INTERNAL(__t >= __itoa::__pow10_128_offset, "Index out of bounds");
// __t is adjusted since the lookup table misses the lower entries.
return __t - (__v < __itoa::__pow10_128[__t - __itoa::__pow10_128_offset]) + 1;
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d7b312f8774fc..0860c53f84636 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -147,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) {
}
inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) {
- return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - __libcpp_clz(__n - 1)));
+ return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - std::__countl_zero(__n - 1)));
}
template <class _Tp, class _Hash, class _Equal, class _Alloc>
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the cleanup! LGTM with nit addressed.
60bbd97
to
f4c514a
Compare
__libcpp_{ctz, clz}
were previously used as fallbacks for__builtin_{ctzg, clzg}
to ensure compatibility with older compilers (Clang 18 and earlier), as__builtin_{ctzg, clzg}
became available in Clang 19. Now that support for Clang 18 has been officially dropped in #130142, we can now safely replace all instances of__libcpp_{ctz, clz}
with__count{l,r}_zero
(which internally call__builtin_{ctzg, clzg}
and eliminate the fallback logic.Closes #131179.