Revert "[libc++] Replace __libcpp_{ctz, clz} with __builtin_{ctzg, clzg} (llvm#133920)"

AnthonyLatsis · AnthonyLatsis · commit cd7e7f3d321f · 2025-06-27T09:13:58.000+01:00
This reverts commit 9e3982d.
diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h
@@ -359,10 +359,10 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
   // Swap one pair on each iteration as long as both bitsets have at least one
   // element for swapping.
   while (__left_bitset != 0 && __right_bitset != 0) {
-    difference_type __tz_left  = std::__countr_zero(__left_bitset);
-    __left_bitset              = std::__libcpp_blsr(__left_bitset);
-    difference_type __tz_right = std::__countr_zero(__right_bitset);
-    __right_bitset             = std::__libcpp_blsr(__right_bitset);
+    difference_type __tz_left  = __libcpp_ctz(__left_bitset);
+    __left_bitset              = __libcpp_blsr(__left_bitset);
+    difference_type __tz_right = __libcpp_ctz(__right_bitset);
+    __right_bitset             = __libcpp_blsr(__right_bitset);
     _Ops::iter_swap(__first + __tz_left, __last - __tz_right);
   }
 }
@@ -458,7 +458,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
     // Swap within the left side.  Need to find set positions in the reverse
     // order.
     while (__left_bitset != 0) {
-      difference_type __tz_left = __detail::__block_size - 1 - std::__countl_zero(__left_bitset);
+      difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
       __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
       _RandomAccessIterator __it = __first + __tz_left;
       if (__it != __lm1) {
@@ -471,7 +471,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
     // Swap within the right side.  Need to find set positions in the reverse
     // order.
     while (__right_bitset != 0) {
-      difference_type __tz_right = __detail::__block_size - 1 - std::__countl_zero(__right_bitset);
+      difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
       __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
       _RandomAccessIterator __it = __lm1 - __tz_right;
       if (__it != __first) {
diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
@@ -6,6 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
+//  refactor this code to exclusively use __builtin_clzg.
+
 #ifndef _LIBCPP___BIT_COUNTL_H
 #define _LIBCPP___BIT_COUNTL_H
 
@@ -24,10 +27,69 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
+  return __builtin_clz(__x);
+}
+
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
+  return __builtin_clzl(__x);
+}
+
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
+  return __builtin_clzll(__x);
+}
+
+#if _LIBCPP_HAS_INT128
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT {
+#  if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__x);
+#  else
+  // The function is written in this form due to C++ constexpr limitations.
+  // The algorithm:
+  // - Test whether any bit in the high 64-bits is set
+  // - No bits set:
+  //   - The high 64-bits contain 64 leading zeros,
+  //   - Add the result of the low 64-bits.
+  // - Any bits set:
+  //   - The number of leading zeros of the input is the number of leading
+  //     zeros in the high 64-bits.
+  return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
+                            : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
+#  endif
+}
+#endif // _LIBCPP_HAS_INT128
+
 template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
   static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
+#if __has_builtin(__builtin_clzg)
   return __builtin_clzg(__t, numeric_limits<_Tp>::digits);
+#else  // __has_builtin(__builtin_clzg)
+  if (__t == 0)
+    return numeric_limits<_Tp>::digits;
+
+  if (sizeof(_Tp) <= sizeof(unsigned int))
+    return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
+           (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
+  else if (sizeof(_Tp) <= sizeof(unsigned long))
+    return std::__libcpp_clz(static_cast<unsigned long>(__t)) -
+           (numeric_limits<unsigned long>::digits - numeric_limits<_Tp>::digits);
+  else if (sizeof(_Tp) <= sizeof(unsigned long long))
+    return std::__libcpp_clz(static_cast<unsigned long long>(__t)) -
+           (numeric_limits<unsigned long long>::digits - numeric_limits<_Tp>::digits);
+  else {
+    int __ret                      = 0;
+    int __iter                     = 0;
+    const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
+    while (true) {
+      __t = std::__rotl(__t, __ulldigits);
+      if ((__iter = std::__countl_zero(static_cast<unsigned long long>(__t))) != __ulldigits)
+        break;
+      __ret += __iter;
+    }
+    return __ret + __iter;
+  }
+#endif // __has_builtin(__builtin_clzg)
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
@@ -6,10 +6,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
+//  refactor this code to exclusively use __builtin_ctzg.
+
 #ifndef _LIBCPP___BIT_COUNTR_H
 #define _LIBCPP___BIT_COUNTR_H
 
 #include <__assert>
+#include <__bit/rotate.h>
 #include <__concepts/arithmetic.h>
 #include <__config>
 #include <__type_traits/is_unsigned.h>
@@ -24,10 +28,55 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
+  return __builtin_ctz(__x);
+}
+
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
+  return __builtin_ctzl(__x);
+}
+
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
+  return __builtin_ctzll(__x);
+}
+
+// A constexpr implementation for C++11 and later (using clang extensions for constexpr support)
+// Precondition: __t != 0 (the caller __countr_zero handles __t == 0 as a special case)
+template <class _Tp>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero_impl(_Tp __t) _NOEXCEPT {
+  _LIBCPP_ASSERT_INTERNAL(__t != 0, "__countr_zero_impl called with zero value");
+  static_assert(is_unsigned<_Tp>::value, "__countr_zero_impl only works with unsigned types");
+  if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned int)) {
+    return std::__libcpp_ctz(static_cast<unsigned int>(__t));
+  } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long)) {
+    return std::__libcpp_ctz(static_cast<unsigned long>(__t));
+  } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long long)) {
+    return std::__libcpp_ctz(static_cast<unsigned long long>(__t));
+  } else {
+#if _LIBCPP_STD_VER == 11
+    unsigned long long __ull       = static_cast<unsigned long long>(__t);
+    const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
+    return __ull == 0ull ? __ulldigits + std::__countr_zero_impl<_Tp>(__t >> __ulldigits) : std::__libcpp_ctz(__ull);
+#else
+    int __ret                      = 0;
+    const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
+    while (static_cast<unsigned long long>(__t) == 0uLL) {
+      __ret += __ulldigits;
+      __t >>= __ulldigits;
+    }
+    return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
+#endif
+  }
+}
+
 template <class _Tp>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT {
   static_assert(is_unsigned<_Tp>::value, "__countr_zero only works with unsigned types");
+#if __has_builtin(__builtin_ctzg) // TODO (LLVM 21): This can be dropped once we only support Clang >= 19.
   return __builtin_ctzg(__t, numeric_limits<_Tp>::digits);
+#else
+  return __t != 0 ? std::__countr_zero_impl(__t) : numeric_limits<_Tp>::digits;
+#endif
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
@@ -165,7 +165,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT {
-    return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
+    return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
   }
 
 private:
@@ -234,7 +234,7 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT {
-    return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
+    return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
   }
 
 private:
diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h
@@ -114,8 +114,9 @@ struct _LIBCPP_HIDDEN __integral<2> {
   template <typename _Tp>
   _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
     // If value == 0 still need one digit. If the value != this has no
-    // effect since the code scans for the most significant bit set.
-    return numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1);
+    // effect since the code scans for the most significant bit set. (Note
+    // that __libcpp_clz doesn't work for 0.)
+    return numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1);
   }
 
   template <typename _Tp>
@@ -149,8 +150,9 @@ struct _LIBCPP_HIDDEN __integral<8> {
   template <typename _Tp>
   _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
     // If value == 0 still need one digit. If the value != this has no
-    // effect since the code scans for the most significat bit set.
-    return ((numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1)) + 2) / 3;
+    // effect since the code scans for the most significat bit set. (Note
+    // that __libcpp_clz doesn't work for 0.)
+    return ((numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1)) + 2) / 3;
   }
 
   template <typename _Tp>
@@ -184,8 +186,9 @@ struct _LIBCPP_HIDDEN __integral<16> {
   template <typename _Tp>
   _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
     // If value == 0 still need one digit. If the value != this has no
-    // effect since the code scans for the most significat bit set.
-    return (numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1) + 3) / 4;
+    // effect since the code scans for the most significat bit set. (Note
+    // that __libcpp_clz doesn't work for 0.)
+    return (numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1) + 3) / 4;
   }
 
   template <typename _Tp>
diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h
@@ -43,9 +43,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uin
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __countl_zero.
+  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
+  /// function requires its input to have at least one bit set the value of
+  /// zero is set to one. This means the first element of the lookup table is
+  /// zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
-    auto __t = (32 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
+    auto __t = (32 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
     return __t - (__v < __itoa::__pow10_32[__t]) + 1;
   }
 
@@ -66,9 +69,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uin
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __countl_zero.
+  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
+  /// function requires its input to have at least one bit set the value of
+  /// zero is set to one. This means the first element of the lookup table is
+  /// zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
-    auto __t = (64 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
+    auto __t = (64 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
     return __t - (__v < __itoa::__pow10_64[__t]) + 1;
   }
 
@@ -90,12 +96,15 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__u
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __countl_zero.
+  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
+  /// function requires its input to have at least one bit set the value of
+  /// zero is set to one. This means the first element of the lookup table is
+  /// zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
     _LIBCPP_ASSERT_INTERNAL(
         __v > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true.");
     // There's always a bit set in the upper 64-bits.
-    auto __t = (128 - std::__countl_zero(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
+    auto __t = (128 - std::__libcpp_clz(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
     _LIBCPP_ASSERT_INTERNAL(__t >= __itoa::__pow10_128_offset, "Index out of bounds");
     // __t is adjusted since the lookup table misses the lower entries.
     return __t - (__v < __itoa::__pow10_128[__t - __itoa::__pow10_128_offset]) + 1;
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
@@ -147,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) {
 }
 
 inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) {
-  return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - std::__countl_zero(__n - 1)));
+  return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - __libcpp_clz(__n - 1)));
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>

Original file line number	Diff line number	Diff line change
`@@ -165,7 +165,7 @@ public:`
`165`	`165`
`166`	`166`	`_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; }`
`167`	`167`	`_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT {`
`168`		`- return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));`
	`168`	`+ return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));`
`169`	`169`	`}`
`170`	`170`
`171`	`171`	`private:`
`@@ -234,7 +234,7 @@ public:`
`234`	`234`	`}`
`235`	`235`
`236`	`236`	`_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT {`
`237`		`- return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));`
	`237`	`+ return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));`
`238`	`238`	`}`
`239`	`239`
`240`	`240`	`private:`
Original file line number	Diff line number	Diff line change
`@@ -147,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) {`
`147`	`147`	`}`
`148`	`148`
`149`	`149`	`inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) {`
`150`		`- return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - std::__countl_zero(__n - 1)));`
	`150`	`+ return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - __libcpp_clz(__n - 1)));`
`151`	`151`	`}`
`152`	`152`
`153`	`153`	`template <class _Tp, class _Hash, class _Equal, class _Alloc>`