-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[ADT] Simplify countr_zero and countl_zero with constexpr if (NFC) #141517
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ADT] Simplify countr_zero and countl_zero with constexpr if (NFC) #141517
Conversation
The high-level idea in countr_zero and countl_zero is to use the intrinsic if available and fall back otherwise, but the actual implementation is unnecessarily complicated. Specifically, without this patch, we are going through a long chain of macros to check the availability of _BitScanForward64: #if defined(__GNUC__) || defined(_MSC_VER) #if !defined(_MSC_VER) || defined(_M_X64) #if __has_builtin(__builtin_ctzll) || defined(__GNUC__) ... #elif defined(_MSC_VER) _BitScanForward64(...); #endif #endif #endif This basically says we can use _BitScanForward64 if: defined(_MSC_VER) && defined(_M_X64) This patch simplifies all this with "constexpr if" and consolidates the implementation into the body of countr_zero and countl_zero.
@llvm/pr-subscribers-llvm-adt Author: Kazu Hirata (kazutakahirata) ChangesThe high-level idea in countr_zero and countl_zero is to use the Specifically, without this patch, we are going through a long chain of #if defined(GNUC) || defined(_MSC_VER) This basically says we can use _BitScanForward64 if: defined(_MSC_VER) && defined(_M_X64) This patch simplifies all this with "constexpr if" and consolidates Full diff: https://github.com/llvm/llvm-project/pull/141517.diff 1 Files Affected:
diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h
index 8544efb5c45d4..b952825e574bc 100644
--- a/llvm/include/llvm/ADT/bit.h
+++ b/llvm/include/llvm/ADT/bit.h
@@ -148,36 +148,20 @@ template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
return (Value != 0) && ((Value & (Value - 1)) == 0);
}
-namespace detail {
-template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
- static unsigned count(T Val) {
- if (!Val)
- return std::numeric_limits<T>::digits;
- if (Val & 0x1)
- return 0;
-
- // Bisection method.
- unsigned ZeroBits = 0;
- T Shift = std::numeric_limits<T>::digits >> 1;
- T Mask = std::numeric_limits<T>::max() >> Shift;
- while (Shift) {
- if ((Val & Mask) == 0) {
- Val >>= Shift;
- ZeroBits |= Shift;
- }
- Shift >>= 1;
- Mask >>= Shift;
- }
- return ZeroBits;
- }
-};
-
-#if defined(__GNUC__) || defined(_MSC_VER)
-template <typename T> struct TrailingZerosCounter<T, 4> {
- static unsigned count(T Val) {
- if (Val == 0)
- return 32;
+/// Count number of 0's from the least significant bit to the most
+/// stopping at the first 1.
+///
+/// Only unsigned integral types are allowed.
+///
+/// Returns std::numeric_limits<T>::digits on an input of 0.
+template <typename T> [[nodiscard]] int countr_zero(T Val) {
+ static_assert(std::is_unsigned_v<T>,
+ "Only unsigned integral types are allowed.");
+ if (!Val)
+ return std::numeric_limits<T>::digits;
+ // Use the intrinsic if available.
+ if constexpr (sizeof(T) == 4) {
#if __has_builtin(__builtin_ctz) || defined(__GNUC__)
return __builtin_ctz(Val);
#elif defined(_MSC_VER)
@@ -185,65 +169,45 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
_BitScanForward(&Index, Val);
return Index;
#endif
- }
-};
-
-#if !defined(_MSC_VER) || defined(_M_X64)
-template <typename T> struct TrailingZerosCounter<T, 8> {
- static unsigned count(T Val) {
- if (Val == 0)
- return 64;
-
+ } else if constexpr (sizeof(T) == 8) {
#if __has_builtin(__builtin_ctzll) || defined(__GNUC__)
return __builtin_ctzll(Val);
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && defined(_M_X64)
unsigned long Index;
_BitScanForward64(&Index, Val);
return Index;
#endif
}
-};
-#endif
-#endif
-} // namespace detail
-/// Count number of 0's from the least significant bit to the most
+ // Fall back to the bisection method.
+ unsigned ZeroBits = 0;
+ T Shift = std::numeric_limits<T>::digits >> 1;
+ T Mask = std::numeric_limits<T>::max() >> Shift;
+ while (Shift) {
+ if ((Val & Mask) == 0) {
+ Val >>= Shift;
+ ZeroBits |= Shift;
+ }
+ Shift >>= 1;
+ Mask >>= Shift;
+ }
+ return ZeroBits;
+}
+
+/// Count number of 0's from the most significant bit to the least
/// stopping at the first 1.
///
/// Only unsigned integral types are allowed.
///
/// Returns std::numeric_limits<T>::digits on an input of 0.
-template <typename T> [[nodiscard]] int countr_zero(T Val) {
+template <typename T> [[nodiscard]] int countl_zero(T Val) {
static_assert(std::is_unsigned_v<T>,
"Only unsigned integral types are allowed.");
- return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val);
-}
-
-namespace detail {
-template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
- static unsigned count(T Val) {
- if (!Val)
- return std::numeric_limits<T>::digits;
-
- // Bisection method.
- unsigned ZeroBits = 0;
- for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
- T Tmp = Val >> Shift;
- if (Tmp)
- Val = Tmp;
- else
- ZeroBits |= Shift;
- }
- return ZeroBits;
- }
-};
-
-#if defined(__GNUC__) || defined(_MSC_VER)
-template <typename T> struct LeadingZerosCounter<T, 4> {
- static unsigned count(T Val) {
- if (Val == 0)
- return 32;
+ if (!Val)
+ return std::numeric_limits<T>::digits;
+ // Use the intrinsic if available.
+ if constexpr (sizeof(T) == 4) {
#if __has_builtin(__builtin_clz) || defined(__GNUC__)
return __builtin_clz(Val);
#elif defined(_MSC_VER)
@@ -251,38 +215,26 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
_BitScanReverse(&Index, Val);
return Index ^ 31;
#endif
- }
-};
-
-#if !defined(_MSC_VER) || defined(_M_X64)
-template <typename T> struct LeadingZerosCounter<T, 8> {
- static unsigned count(T Val) {
- if (Val == 0)
- return 64;
-
+ } else if constexpr (sizeof(T) == 8) {
#if __has_builtin(__builtin_clzll) || defined(__GNUC__)
return __builtin_clzll(Val);
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && defined(_M_X64)
unsigned long Index;
_BitScanReverse64(&Index, Val);
return Index ^ 63;
#endif
}
-};
-#endif
-#endif
-} // namespace detail
-/// Count number of 0's from the most significant bit to the least
-/// stopping at the first 1.
-///
-/// Only unsigned integral types are allowed.
-///
-/// Returns std::numeric_limits<T>::digits on an input of 0.
-template <typename T> [[nodiscard]] int countl_zero(T Val) {
- static_assert(std::is_unsigned_v<T>,
- "Only unsigned integral types are allowed.");
- return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val);
+ // Fall back to the bisection method.
+ unsigned ZeroBits = 0;
+ for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
+ T Tmp = Val >> Shift;
+ if (Tmp)
+ Val = Tmp;
+ else
+ ZeroBits |= Shift;
+ }
+ return ZeroBits;
}
/// Count the number of ones from the most significant bit to the first
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/87/builds/1632 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/52/builds/8779 Here is the relevant piece of the build log for the reference
|
The high-level idea in countr_zero and countl_zero is to use the
intrinsic if available and fall back otherwise, but the actual
implementation is unnecessarily complicated.
Specifically, without this patch, we are going through a long chain of
macros to check the availability of _BitScanForward64:
#if defined(GNUC) || defined(_MSC_VER)
#if !defined(_MSC_VER) || defined(_M_X64)
#if __has_builtin(__builtin_ctzll) || defined(GNUC)
...
#elif defined(_MSC_VER)
_BitScanForward64(...);
#endif
#endif
#endif
This basically says we can use _BitScanForward64 if:
defined(_MSC_VER) && defined(_M_X64)
This patch simplifies all this with "constexpr if" and consolidates
the implementation into the body of countr_zero and countl_zero.