Skip to content

bpo-31031: Unify duplicate bits_in_digit and bit_length #2866

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Include/pymath.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,4 +227,12 @@ PyAPI_FUNC(void) _Py_set_387controlword(unsigned short);
* behavior. */
#define _Py_InIntegralTypeRange(type, v) (_Py_IntegralTypeMin(type) <= v && v <= _Py_IntegralTypeMax(type))

/* Return the smallest integer k such that n < 2**k, or 0 if n == 0.
* Equivalent to floor(log2(x))+1. Also equivalent to: bitwidth_of_type -
* count_leading_zero_bits(x)
*/
#ifndef Py_LIMITED_API
PyAPI_FUNC(unsigned int) _Py_bit_length(unsigned long d);
#endif

#endif /* Py_PYMATH_H */
28 changes: 3 additions & 25 deletions Modules/mathmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1443,28 +1443,6 @@ math_fsum(PyObject *module, PyObject *seq)
#undef NUM_PARTIALS


/* Return the smallest integer k such that n < 2**k, or 0 if n == 0.
* Equivalent to floor(lg(x))+1. Also equivalent to: bitwidth_of_type -
* count_leading_zero_bits(x)
*/

/* XXX: This routine does more or less the same thing as
* bits_in_digit() in Objects/longobject.c. Someday it would be nice to
* consolidate them. On BSD, there's a library function called fls()
* that we could use, and GCC provides __builtin_clz().
*/

static unsigned long
bit_length(unsigned long n)
{
unsigned long len = 0;
while (n != 0) {
++len;
n >>= 1;
}
return len;
}

static unsigned long
count_set_bits(unsigned long n)
{
Expand Down Expand Up @@ -1879,7 +1857,7 @@ factorial_partial_product(unsigned long start, unsigned long stop,
/* find midpoint of range(start, stop), rounded up to next odd number. */
midpoint = (start + num_operands) | 1;
left = factorial_partial_product(start, midpoint,
bit_length(midpoint - 2));
_Py_bit_length(midpoint - 2));
if (left == NULL)
goto error;
right = factorial_partial_product(midpoint, stop, max_bits);
Expand Down Expand Up @@ -1909,7 +1887,7 @@ factorial_odd_part(unsigned long n)
Py_INCREF(outer);

upper = 3;
for (i = bit_length(n) - 2; i >= 0; i--) {
for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
v = n >> i;
if (v <= 2)
continue;
Expand All @@ -1919,7 +1897,7 @@ factorial_odd_part(unsigned long n)
/* Here inner is the product of all odd integers j in the range (0,
n/2**(i+1)]. The factorial_partial_product call below gives the
product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
partial = factorial_partial_product(lower, upper, bit_length(upper-2));
partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
/* inner *= partial */
if (partial == NULL)
goto error;
Expand Down
38 changes: 9 additions & 29 deletions Objects/longobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -787,26 +787,6 @@ _PyLong_Sign(PyObject *vv)
return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1);
}

/* bits_in_digit(d) returns the unique integer k such that 2**(k-1) <= d <
2**k if d is nonzero, else 0. */

static const unsigned char BitLengthTable[32] = {
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
};

static int
bits_in_digit(digit d)
{
int d_bits = 0;
while (d >= 32) {
d_bits += 6;
d >>= 6;
}
d_bits += (int)BitLengthTable[d];
return d_bits;
}

size_t
_PyLong_NumBits(PyObject *vv)
{
Expand All @@ -824,7 +804,7 @@ _PyLong_NumBits(PyObject *vv)
if ((size_t)(ndigits - 1) > SIZE_MAX / (size_t)PyLong_SHIFT)
goto Overflow;
result = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT;
msd_bits = bits_in_digit(msd);
msd_bits = _Py_bit_length(msd);
if (SIZE_MAX - msd_bits < result)
goto Overflow;
result += msd_bits;
Expand Down Expand Up @@ -1985,7 +1965,7 @@ long_format_binary(PyObject *aa, int base, int alternate,
return -1;
}
size_a_in_bits = (size_a - 1) * PyLong_SHIFT +
bits_in_digit(a->ob_digit[size_a - 1]);
_Py_bit_length(a->ob_digit[size_a - 1]);
/* Allow 1 character for a '-' sign. */
sz = negative + (size_a_in_bits + (bits - 1)) / bits;
}
Expand Down Expand Up @@ -2805,7 +2785,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)

/* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2.
shift v1 left by the same amount. Results go into w and v. */
d = PyLong_SHIFT - bits_in_digit(w1->ob_digit[size_w-1]);
d = PyLong_SHIFT - _Py_bit_length(w1->ob_digit[size_w-1]);
carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d);
assert(carry == 0);
carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d);
Expand Down Expand Up @@ -2926,7 +2906,7 @@ _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e)
*e = 0;
return 0.0;
}
a_bits = bits_in_digit(a->ob_digit[a_size-1]);
a_bits = _Py_bit_length(a->ob_digit[a_size-1]);
/* The following is an overflow-free version of the check
"if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */
if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 &&
Expand Down Expand Up @@ -4020,8 +4000,8 @@ long_true_divide(PyObject *v, PyObject *w)
/* Extreme underflow */
goto underflow_or_zero;
/* Next line is now safe from overflowing a Py_ssize_t */
diff = diff * PyLong_SHIFT + bits_in_digit(a->ob_digit[a_size - 1]) -
bits_in_digit(b->ob_digit[b_size - 1]);
diff = diff * PyLong_SHIFT + _Py_bit_length(a->ob_digit[a_size - 1]) -
_Py_bit_length(b->ob_digit[b_size - 1]);
/* Now diff = a_bits - b_bits. */
if (diff > DBL_MAX_EXP)
goto overflow;
Expand Down Expand Up @@ -4097,7 +4077,7 @@ long_true_divide(PyObject *v, PyObject *w)
}
x_size = Py_ABS(Py_SIZE(x));
assert(x_size > 0); /* result of division is never zero */
x_bits = (x_size-1)*PyLong_SHIFT+bits_in_digit(x->ob_digit[x_size-1]);
x_bits = (x_size-1)*PyLong_SHIFT+_Py_bit_length(x->ob_digit[x_size-1]);

/* The number of extra bits that have to be rounded away. */
extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
Expand Down Expand Up @@ -4805,7 +4785,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg)
alloc_b = Py_SIZE(b);
/* reduce until a fits into 2 digits */
while ((size_a = Py_SIZE(a)) > 2) {
nbits = bits_in_digit(a->ob_digit[size_a-1]);
nbits = _Py_bit_length(a->ob_digit[size_a-1]);
/* extract top 2*PyLong_SHIFT bits of a into x, along with
corresponding bits of b into y */
size_b = Py_SIZE(b);
Expand Down Expand Up @@ -5322,7 +5302,7 @@ int_bit_length_impl(PyObject *self)
return PyLong_FromLong(0);

msd = ((PyLongObject *)self)->ob_digit[ndigits-1];
msd_bits = bits_in_digit(msd);
msd_bits = _Py_bit_length(msd);

if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT)
return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits);
Expand Down
15 changes: 15 additions & 0 deletions Python/pymath.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,18 @@ round(double x)
return copysign(y, x);
}
#endif /* HAVE_ROUND */

static const unsigned int BitLengthTable[32] = {
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
};

unsigned int _Py_bit_length(unsigned long d) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to convert this function to an static inline function only exposed in the internal C API?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For sure, the declaration in pymath.h should be protected by an #ifndef Py_LIMITED_API. @vstinner: I'm not sure I'm understanding you correctly, though. Making this static would make it unavailable to the other files that need it, wouldn't it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see: you're suggesting moving the entire function definition to pymath.h, and making it static inline. Is that right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. Sorry, my comment was unclear. I converted some macros to static inline functions in header files.

I don't know if it would be worth it in term of performance.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be worth a try; I'd be surprised if it makes a measurable difference, but we won't know without testing.

But if not, we do need the Py_LIMITED_API guard: we don't want to expose _Py_bit_length to 3rd party libraries.

unsigned int d_bits = 0;
while (d >= 32) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we unroll manually the loop using SIZEOF_LONG? For example, 4 bytes long need 0 loop. 8 bytes long only require a single if(). There is no platform supported by Python with long larger than 8 bytes (64 bits).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A 4-byte long would need up to 6 iterations (and an 8-byte long up to 11): we're only removing 6 bits per iteration.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can probably be optimized to work without branching (using the __clz intrinsic or equivalents). But I'd prefer to keep this patch as a pure refactoring, simply chosing the better of the existing implementations, for now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh wait, I didn't notice that it's "only" 6 bits per iteration. I read 32 bits by mistake. You can ignore my comment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using clz was discussed in: https://bugs.python.org/issue29782

But it was rejected: https://bugs.python.org/issue29782#msg290973

Maybe the best we can do is to add a short comment to explain why we use a "naive" implementation (not really naive, it uses a precomputed table) linking to bpo-29782.

Would you mind to add such short comment?

By the way, int.bit_length() has a complexity of compleO(1) in practice, no? (I consider that this function has a complexity of O(1), especially when you consider Python int objects made of many "Python digits".)

d_bits += 6;
d >>= 6;
}
d_bits += BitLengthTable[d];
return d_bits;
}