Replace __libcpp_{ctz,clz} with __builtin_{ctzg,clzg}

winner245 · winner245 · commit cf38f04a5c35 · 2025-04-01T12:40:05.000-04:00
diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h
@@ -357,10 +357,10 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
   // Swap one pair on each iteration as long as both bitsets have at least one
   // element for swapping.
   while (__left_bitset != 0 && __right_bitset != 0) {
-    difference_type __tz_left  = __libcpp_ctz(__left_bitset);
-    __left_bitset              = __libcpp_blsr(__left_bitset);
-    difference_type __tz_right = __libcpp_ctz(__right_bitset);
-    __right_bitset             = __libcpp_blsr(__right_bitset);
+    difference_type __tz_left  = std::__countr_zero(__left_bitset);
+    __left_bitset              = std::__libcpp_blsr(__left_bitset);
+    difference_type __tz_right = std::__countr_zero(__right_bitset);
+    __right_bitset             = std::__libcpp_blsr(__right_bitset);
     _Ops::iter_swap(__first + __tz_left, __last - __tz_right);
   }
 }
@@ -456,7 +456,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
     // Swap within the left side.  Need to find set positions in the reverse
     // order.
     while (__left_bitset != 0) {
-      difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
+      difference_type __tz_left = __detail::__block_size - 1 - std::__countl_zero(__left_bitset);
       __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
       _RandomAccessIterator __it = __first + __tz_left;
       if (__it != __lm1) {
@@ -469,7 +469,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
     // Swap within the right side.  Need to find set positions in the reverse
     // order.
     while (__right_bitset != 0) {
-      difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
+      difference_type __tz_right = __detail::__block_size - 1 - std::__countl_zero(__right_bitset);
       __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
       _RandomAccessIterator __it = __lm1 - __tz_right;
       if (__it != __first) {
@@ -831,11 +831,11 @@ inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
   if (__n == 0)
     return 0;
   if (sizeof(__n) <= sizeof(unsigned))
-    return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
+    return sizeof(unsigned) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned>(__n));
   if (sizeof(__n) <= sizeof(unsigned long))
-    return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
+    return sizeof(unsigned long) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned long>(__n));
   if (sizeof(__n) <= sizeof(unsigned long long))
-    return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
+    return sizeof(unsigned long long) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned long long>(__n));
 
   _Number __log2 = 0;
   while (__n > 1) {
diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
@@ -6,9 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
-//  refactor this code to exclusively use __builtin_clzg.
-
 #ifndef _LIBCPP___BIT_COUNTL_H
 #define _LIBCPP___BIT_COUNTL_H
 
@@ -27,69 +24,10 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
-  return __builtin_clz(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
-  return __builtin_clzl(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
-  return __builtin_clzll(__x);
-}
-
-#if _LIBCPP_HAS_INT128
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT {
-#  if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__x);
-#  else
-  // The function is written in this form due to C++ constexpr limitations.
-  // The algorithm:
-  // - Test whether any bit in the high 64-bits is set
-  // - No bits set:
-  //   - The high 64-bits contain 64 leading zeros,
-  //   - Add the result of the low 64-bits.
-  // - Any bits set:
-  //   - The number of leading zeros of the input is the number of leading
-  //     zeros in the high 64-bits.
-  return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
-                            : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
-#  endif
-}
-#endif // _LIBCPP_HAS_INT128
-
 template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
   static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
-#if __has_builtin(__builtin_clzg)
   return __builtin_clzg(__t, numeric_limits<_Tp>::digits);
-#else  // __has_builtin(__builtin_clzg)
-  if (__t == 0)
-    return numeric_limits<_Tp>::digits;
-
-  if (sizeof(_Tp) <= sizeof(unsigned int))
-    return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
-           (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
-  else if (sizeof(_Tp) <= sizeof(unsigned long))
-    return std::__libcpp_clz(static_cast<unsigned long>(__t)) -
-           (numeric_limits<unsigned long>::digits - numeric_limits<_Tp>::digits);
-  else if (sizeof(_Tp) <= sizeof(unsigned long long))
-    return std::__libcpp_clz(static_cast<unsigned long long>(__t)) -
-           (numeric_limits<unsigned long long>::digits - numeric_limits<_Tp>::digits);
-  else {
-    int __ret                      = 0;
-    int __iter                     = 0;
-    const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
-    while (true) {
-      __t = std::__rotl(__t, __ulldigits);
-      if ((__iter = std::__countl_zero(static_cast<unsigned long long>(__t))) != __ulldigits)
-        break;
-      __ret += __iter;
-    }
-    return __ret + __iter;
-  }
-#endif // __has_builtin(__builtin_clzg)
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
@@ -6,14 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
-//  refactor this code to exclusively use __builtin_ctzg.
-
 #ifndef _LIBCPP___BIT_COUNTR_H
 #define _LIBCPP___BIT_COUNTR_H
 
 #include <__assert>
-#include <__bit/rotate.h>
 #include <__concepts/arithmetic.h>
 #include <__config>
 #include <__type_traits/is_unsigned.h>
@@ -28,55 +24,10 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
-  return __builtin_ctz(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
-  return __builtin_ctzl(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
-  return __builtin_ctzll(__x);
-}
-
-// A constexpr implementation for C++11 and later (using clang extensions for constexpr support)
-// Precondition: __t != 0 (the caller __countr_zero handles __t == 0 as a special case)
-template <class _Tp>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero_impl(_Tp __t) _NOEXCEPT {
-  _LIBCPP_ASSERT_INTERNAL(__t != 0, "__countr_zero_impl called with zero value");
-  static_assert(is_unsigned<_Tp>::value, "__countr_zero_impl only works with unsigned types");
-  if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned int)) {
-    return std::__libcpp_ctz(static_cast<unsigned int>(__t));
-  } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long)) {
-    return std::__libcpp_ctz(static_cast<unsigned long>(__t));
-  } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long long)) {
-    return std::__libcpp_ctz(static_cast<unsigned long long>(__t));
-  } else {
-#if _LIBCPP_STD_VER == 11
-    unsigned long long __ull       = static_cast<unsigned long long>(__t);
-    const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
-    return __ull == 0ull ? __ulldigits + std::__countr_zero_impl<_Tp>(__t >> __ulldigits) : std::__libcpp_ctz(__ull);
-#else
-    int __ret                      = 0;
-    const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
-    while (static_cast<unsigned long long>(__t) == 0uLL) {
-      __ret += __ulldigits;
-      __t >>= __ulldigits;
-    }
-    return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
-#endif
-  }
-}
-
 template <class _Tp>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT {
   static_assert(is_unsigned<_Tp>::value, "__countr_zero only works with unsigned types");
-#if __has_builtin(__builtin_ctzg) // TODO (LLVM 21): This can be dropped once we only support Clang >= 19.
   return __builtin_ctzg(__t, numeric_limits<_Tp>::digits);
-#else
-  return __t != 0 ? std::__countr_zero_impl(__t) : numeric_limits<_Tp>::digits;
-#endif
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
@@ -165,7 +165,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT {
-    return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
+    return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
   }
 
 private:
@@ -234,7 +234,7 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT {
-    return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
+    return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
   }
 
 private:
diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h
@@ -118,9 +118,8 @@ struct _LIBCPP_HIDDEN __integral<2> {
   template <typename _Tp>
   _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
     // If value == 0 still need one digit. If the value != this has no
-    // effect since the code scans for the most significant bit set. (Note
-    // that __libcpp_clz doesn't work for 0.)
-    return numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1);
+    // effect since the code scans for the most significant bit set.
+    return numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1);
   }
 
   template <typename _Tp>
@@ -154,9 +153,8 @@ struct _LIBCPP_HIDDEN __integral<8> {
   template <typename _Tp>
   _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
     // If value == 0 still need one digit. If the value != this has no
-    // effect since the code scans for the most significat bit set. (Note
-    // that __libcpp_clz doesn't work for 0.)
-    return ((numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1)) + 2) / 3;
+    // effect since the code scans for the most significat bit set.
+    return ((numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1)) + 2) / 3;
   }
 
   template <typename _Tp>
@@ -190,9 +188,8 @@ struct _LIBCPP_HIDDEN __integral<16> {
   template <typename _Tp>
   _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
     // If value == 0 still need one digit. If the value != this has no
-    // effect since the code scans for the most significat bit set. (Note
-    // that __libcpp_clz doesn't work for 0.)
-    return (numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1) + 3) / 4;
+    // effect since the code scans for the most significat bit set.
+    return (numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1) + 3) / 4;
   }
 
   template <typename _Tp>
diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h
@@ -45,12 +45,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uin
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
-  /// function requires its input to have at least one bit set the value of
-  /// zero is set to one. This means the first element of the lookup table is
-  /// zero.
+  /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+  /// used __libcpp_clz. Since that function requires its input to have at
+  /// least one bit set the value of zero is set to one. This means the first
+  /// element of the lookup table is zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
-    auto __t = (32 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
+    auto __t = (32 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
     return __t - (__v < __itoa::__pow10_32[__t]) + 1;
   }
 
@@ -71,12 +71,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uin
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
-  /// function requires its input to have at least one bit set the value of
-  /// zero is set to one. This means the first element of the lookup table is
-  /// zero.
+  /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+  /// used __libcpp_clz. Since that function requires its input to have at
+  /// least one bit set the value of zero is set to one. This means the first
+  /// element of the lookup table is zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
-    auto __t = (64 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
+    auto __t = (64 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
     return __t - (__v < __itoa::__pow10_64[__t]) + 1;
   }
 
@@ -98,15 +98,15 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__u
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
-  /// function requires its input to have at least one bit set the value of
-  /// zero is set to one. This means the first element of the lookup table is
-  /// zero.
+  /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+  /// used __libcpp_clz. Since that function requires its input to have at
+  /// least one bit set the value of zero is set to one. This means the first
+  /// element of the lookup table is zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
     _LIBCPP_ASSERT_INTERNAL(
         __v > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true.");
     // There's always a bit set in the upper 64-bits.
-    auto __t = (128 - std::__libcpp_clz(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
+    auto __t = (128 - std::__countl_zero(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
     _LIBCPP_ASSERT_INTERNAL(__t >= __itoa::__pow10_128_offset, "Index out of bounds");
     // __t is adjusted since the lookup table misses the lower entries.
     return __t - (__v < __itoa::__pow10_128[__t - __itoa::__pow10_128_offset]) + 1;
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
@@ -147,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) {
 }
 
 inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) {
-  return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - __libcpp_clz(__n - 1)));
+  return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - std::__countl_zero(__n - 1)));
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>

Original file line number	Diff line number	Diff line change
`@@ -165,7 +165,7 @@ public:`
`165`	`165`
`166`	`166`	`_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; }`
`167`	`167`	`_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT {`
`168`		`- return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));`
	`168`	`+ return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));`
`169`	`169`	`}`
`170`	`170`
`171`	`171`	`private:`
`@@ -234,7 +234,7 @@ public:`
`234`	`234`	`}`
`235`	`235`
`236`	`236`	`_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT {`
`237`		`- return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));`
	`237`	`+ return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));`
`238`	`238`	`}`
`239`	`239`
`240`	`240`	`private:`
Original file line number	Diff line number	Diff line change
`@@ -147,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) {`
`147`	`147`	`}`
`148`	`148`
`149`	`149`	`inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) {`
`150`		`- return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - __libcpp_clz(__n - 1)));`
	`150`	`+ return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - std::__countl_zero(__n - 1)));`
`151`	`151`	`}`
`152`	`152`
`153`	`153`	`template <class _Tp, class _Hash, class _Equal, class _Alloc>`