Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-29782: Use __builtin_clzl for bits_in_digit if available #594

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/Python.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
#include "pymath.h"
#include "pytime.h"
#include "pymem.h"
#include "pyintrinsics.h"

#include "object.h"
#include "objimpl.h"
Expand Down
29 changes: 29 additions & 0 deletions Include/pyintrinsics.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef Py_PYINTRINSICS_H
#define Py_PYINTRINSICS_H

/* Return the smallest integer k such that n < 2**k, or 0 if n == 0.
* Equivalent to floor(lg(x))+1. Also equivalent to: bitwidth_of_type -
* count_leading_zero_bits(x)
*/

#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ >= 4))
#define HAVE_BIT_LENGTH
static inline unsigned int _Py_bit_length(unsigned long d) {
return d ? (8 * sizeof(unsigned long) - __builtin_clzl(d)) : 0;
}
#elif defined(_MSC_VER)
#define HAVE_BIT_LENGTH
#pragma intrinsic(_BitScanReverse)
#include <intrin.h>
static inline unsigned int _Py_bit_length(unsigned long d) {
unsigned long idx;
if (_BitScanReverse(&idx, d))
return idx + 1;
else
return 0;
}
#else
extern unsigned int _Py_bit_length(unsigned long);
#endif

#endif /* Py_PYINTRINSICS_H */
2 changes: 2 additions & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ PYTHON_OBJS= \
Python/pyfpe.o \
Python/pyhash.o \
Python/pylifecycle.o \
Python/pyintrinsics.o \
Python/pymath.o \
Python/pystate.o \
Python/pythonrun.o \
Expand Down Expand Up @@ -959,6 +960,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/pyfpe.h \
$(srcdir)/Include/pyhash.h \
$(srcdir)/Include/pylifecycle.h \
$(srcdir)/Include/pyintrinsics.h \
$(srcdir)/Include/pymath.h \
$(srcdir)/Include/pygetopt.h \
$(srcdir)/Include/pymacro.h \
Expand Down
38 changes: 8 additions & 30 deletions Modules/mathmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1315,28 +1315,6 @@ math_fsum(PyObject *module, PyObject *seq)
#undef NUM_PARTIALS


/* Return the smallest integer k such that n < 2**k, or 0 if n == 0.
* Equivalent to floor(lg(x))+1. Also equivalent to: bitwidth_of_type -
* count_leading_zero_bits(x)
*/

/* XXX: This routine does more or less the same thing as
* bits_in_digit() in Objects/longobject.c. Someday it would be nice to
* consolidate them. On BSD, there's a library function called fls()
* that we could use, and GCC provides __builtin_clz().
*/

static unsigned long
bit_length(unsigned long n)
{
unsigned long len = 0;
while (n != 0) {
++len;
n >>= 1;
}
return len;
}

static unsigned long
count_set_bits(unsigned long n)
{
Expand Down Expand Up @@ -1415,7 +1393,7 @@ count_set_bits(unsigned long n)

/* factorial_partial_product: Compute product(range(start, stop, 2)) using
* divide and conquer. Assumes start and stop are odd and stop > start.
* max_bits must be >= bit_length(stop - 2). */
* max_bits must be >= _Py_bit_length(stop - 2). */

static PyObject *
factorial_partial_product(unsigned long start, unsigned long stop,
Expand All @@ -1430,14 +1408,14 @@ factorial_partial_product(unsigned long start, unsigned long stop,
* the answer.
*
* Storing some integer z requires floor(lg(z))+1 bits, which is
* conveniently the value returned by bit_length(z). The
* conveniently the value returned by _Py_bit_length(z). The
* product x*y will require at most
* bit_length(x) + bit_length(y) bits to store, based
* _Py_bit_length(x) + _Py_bit_length(y) bits to store, based
* on the idea that lg product = lg x + lg y.
*
* We know that stop - 2 is the largest number to be multiplied. From
* there, we have: bit_length(answer) <= num_operands *
* bit_length(stop - 2)
* there, we have: _Py_bit_length(answer) <= num_operands *
* _Py_bit_length(stop - 2)
*/

num_operands = (stop - start) / 2;
Expand All @@ -1454,7 +1432,7 @@ factorial_partial_product(unsigned long start, unsigned long stop,
/* find midpoint of range(start, stop), rounded up to next odd number. */
midpoint = (start + num_operands) | 1;
left = factorial_partial_product(start, midpoint,
bit_length(midpoint - 2));
_Py_bit_length(midpoint - 2));
if (left == NULL)
goto error;
right = factorial_partial_product(midpoint, stop, max_bits);
Expand Down Expand Up @@ -1484,7 +1462,7 @@ factorial_odd_part(unsigned long n)
Py_INCREF(outer);

upper = 3;
for (i = bit_length(n) - 2; i >= 0; i--) {
for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
v = n >> i;
if (v <= 2)
continue;
Expand All @@ -1494,7 +1472,7 @@ factorial_odd_part(unsigned long n)
/* Here inner is the product of all odd integers j in the range (0,
n/2**(i+1)]. The factorial_partial_product call below gives the
product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
partial = factorial_partial_product(lower, upper, bit_length(upper-2));
partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
/* inner *= partial */
if (partial == NULL)
goto error;
Expand Down
38 changes: 9 additions & 29 deletions Objects/longobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,26 +717,6 @@ _PyLong_Sign(PyObject *vv)
return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1);
}

/* bits_in_digit(d) returns the unique integer k such that 2**(k-1) <= d <
2**k if d is nonzero, else 0. */

static const unsigned char BitLengthTable[32] = {
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
};

static int
bits_in_digit(digit d)
{
int d_bits = 0;
while (d >= 32) {
d_bits += 6;
d >>= 6;
}
d_bits += (int)BitLengthTable[d];
return d_bits;
}

size_t
_PyLong_NumBits(PyObject *vv)
{
Expand All @@ -754,7 +734,7 @@ _PyLong_NumBits(PyObject *vv)
if ((size_t)(ndigits - 1) > SIZE_MAX / (size_t)PyLong_SHIFT)
goto Overflow;
result = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT;
msd_bits = bits_in_digit(msd);
msd_bits = _Py_bit_length(msd);
if (SIZE_MAX - msd_bits < result)
goto Overflow;
result += msd_bits;
Expand Down Expand Up @@ -1820,7 +1800,7 @@ long_format_binary(PyObject *aa, int base, int alternate,
return -1;
}
size_a_in_bits = (size_a - 1) * PyLong_SHIFT +
bits_in_digit(a->ob_digit[size_a - 1]);
_Py_bit_length(a->ob_digit[size_a - 1]);
/* Allow 1 character for a '-' sign. */
sz = negative + (size_a_in_bits + (bits - 1)) / bits;
}
Expand Down Expand Up @@ -2638,7 +2618,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)

/* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2.
shift v1 left by the same amount. Results go into w and v. */
d = PyLong_SHIFT - bits_in_digit(w1->ob_digit[size_w-1]);
d = PyLong_SHIFT - _Py_bit_length(w1->ob_digit[size_w-1]);
carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d);
assert(carry == 0);
carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d);
Expand Down Expand Up @@ -2759,7 +2739,7 @@ _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e)
*e = 0;
return 0.0;
}
a_bits = bits_in_digit(a->ob_digit[a_size-1]);
a_bits = _Py_bit_length(a->ob_digit[a_size-1]);
/* The following is an overflow-free version of the check
"if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */
if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 &&
Expand Down Expand Up @@ -3892,8 +3872,8 @@ long_true_divide(PyObject *v, PyObject *w)
/* Extreme underflow */
goto underflow_or_zero;
/* Next line is now safe from overflowing a Py_ssize_t */
diff = diff * PyLong_SHIFT + bits_in_digit(a->ob_digit[a_size - 1]) -
bits_in_digit(b->ob_digit[b_size - 1]);
diff = diff * PyLong_SHIFT + _Py_bit_length(a->ob_digit[a_size - 1]) -
_Py_bit_length(b->ob_digit[b_size - 1]);
/* Now diff = a_bits - b_bits. */
if (diff > DBL_MAX_EXP)
goto overflow;
Expand Down Expand Up @@ -3969,7 +3949,7 @@ long_true_divide(PyObject *v, PyObject *w)
}
x_size = Py_ABS(Py_SIZE(x));
assert(x_size > 0); /* result of division is never zero */
x_bits = (x_size-1)*PyLong_SHIFT+bits_in_digit(x->ob_digit[x_size-1]);
x_bits = (x_size-1)*PyLong_SHIFT+_Py_bit_length(x->ob_digit[x_size-1]);

/* The number of extra bits that have to be rounded away. */
extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
Expand Down Expand Up @@ -4611,7 +4591,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg)
alloc_b = Py_SIZE(b);
/* reduce until a fits into 2 digits */
while ((size_a = Py_SIZE(a)) > 2) {
nbits = bits_in_digit(a->ob_digit[size_a-1]);
nbits = _Py_bit_length(a->ob_digit[size_a-1]);
/* extract top 2*PyLong_SHIFT bits of a into x, along with
corresponding bits of b into y */
size_b = Py_SIZE(b);
Expand Down Expand Up @@ -5132,7 +5112,7 @@ int_bit_length_impl(PyObject *self)
return PyLong_FromLong(0);

msd = ((PyLongObject *)self)->ob_digit[ndigits-1];
msd_bits = bits_in_digit(msd);
msd_bits = _Py_bit_length(msd);

if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT)
return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits);
Expand Down
18 changes: 18 additions & 0 deletions Python/pyintrinsics.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include "Python.h"

#ifndef HAVE_BIT_LENGTH
static const unsigned char BitLengthTable[32] = {
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
};

unsigned int _Py_bit_length(unsigned long d) {
unsigned int d_bits = 0;
while (d >= 32) {
d_bits += 6;
d >>= 6;
}
d_bits += (unsigned int)BitLengthTable[d];
return d_bits;
}
#endif /* HAVE_BIT_LENGTH */