Skip to content

Commit f10dafc

Browse files
bpo-46407: Optimizing some modulo operations (GH-30653)
Added new internal functions to compute mod without also computing the quotient. The loops can be leaner then, which leads to modestly but reliably faster execution in contexts that know they don't need the quotient. Code by Jeremiah Vivian (Pascual).
1 parent e7a6285 commit f10dafc

File tree

3 files changed

+108
-9
lines changed

3 files changed

+108
-9
lines changed

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1860,6 +1860,7 @@ Kurt Vile
18601860
Norman Vine
18611861
Pauli Virtanen
18621862
Frank Visser
1863+
Jeremiah Vivian (Pascual)
18631864
Johannes Vogel
18641865
Michael Vogt
18651866
Radu Voicilas
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Optimize some modulo operations in ``Objects/longobject.c``. Patch by Jeremiah Vivian.

Objects/longobject.c

Lines changed: 106 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,6 +1670,35 @@ divrem1(PyLongObject *a, digit n, digit *prem)
16701670
return long_normalize(z);
16711671
}
16721672

1673+
/* Remainder of long pin, w/ size digits, by non-zero digit n,
1674+
returning the remainder. pin points at the LSD. */
1675+
1676+
static digit
1677+
inplace_rem1(digit *pin, Py_ssize_t size, digit n)
1678+
{
1679+
twodigits rem = 0;
1680+
1681+
assert(n > 0 && n <= PyLong_MASK);
1682+
while (--size >= 0)
1683+
rem = ((rem << PyLong_SHIFT) | pin[size]) % n;
1684+
return (digit)rem;
1685+
}
1686+
1687+
/* Get the remainder of an integer divided by a digit, returning
1688+
the remainder as the result of the function. The sign of a is
1689+
ignored; n should not be zero. */
1690+
1691+
static PyLongObject *
1692+
rem1(PyLongObject *a, digit n)
1693+
{
1694+
const Py_ssize_t size = Py_ABS(Py_SIZE(a));
1695+
1696+
assert(n > 0 && n <= PyLong_MASK);
1697+
return (PyLongObject *)PyLong_FromLong(
1698+
(long)inplace_rem1(a->ob_digit, size, n)
1699+
);
1700+
}
1701+
16731702
/* Convert an integer to a base 10 string. Returns a new non-shared
16741703
string. (Return value is non-shared so that callers can modify the
16751704
returned value if necessary.) */
@@ -2689,6 +2718,47 @@ long_divrem(PyLongObject *a, PyLongObject *b,
26892718
return 0;
26902719
}
26912720

2721+
/* Int remainder, top-level routine */
2722+
2723+
static int
2724+
long_rem(PyLongObject *a, PyLongObject *b, PyLongObject **prem)
2725+
{
2726+
Py_ssize_t size_a = Py_ABS(Py_SIZE(a)), size_b = Py_ABS(Py_SIZE(b));
2727+
2728+
if (size_b == 0) {
2729+
PyErr_SetString(PyExc_ZeroDivisionError,
2730+
"integer modulo by zero");
2731+
return -1;
2732+
}
2733+
if (size_a < size_b ||
2734+
(size_a == size_b &&
2735+
a->ob_digit[size_a-1] < b->ob_digit[size_b-1])) {
2736+
/* |a| < |b|. */
2737+
*prem = (PyLongObject *)long_long((PyObject *)a);
2738+
return -(*prem == NULL);
2739+
}
2740+
if (size_b == 1) {
2741+
*prem = rem1(a, b->ob_digit[0]);
2742+
if (*prem == NULL)
2743+
return -1;
2744+
}
2745+
else {
2746+
/* Slow path using divrem. */
2747+
x_divrem(a, b, prem);
2748+
if (*prem == NULL)
2749+
return -1;
2750+
}
2751+
/* Set the sign. */
2752+
if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0) {
2753+
_PyLong_Negate(prem);
2754+
if (*prem == NULL) {
2755+
Py_CLEAR(*prem);
2756+
return -1;
2757+
}
2758+
}
2759+
return 0;
2760+
}
2761+
26922762
/* Unsigned int division with remainder -- the algorithm. The arguments v1
26932763
and w1 should satisfy 2 <= Py_ABS(Py_SIZE(w1)) <= Py_ABS(Py_SIZE(v1)). */
26942764

@@ -3814,6 +3884,37 @@ l_divmod(PyLongObject *v, PyLongObject *w,
38143884
return 0;
38153885
}
38163886

3887+
/* Compute
3888+
* *pmod = v % w
3889+
* pmod cannot be NULL. The caller owns a reference to pmod.
3890+
*/
3891+
static int
3892+
l_mod(PyLongObject *v, PyLongObject *w, PyLongObject **pmod)
3893+
{
3894+
PyLongObject *mod;
3895+
3896+
assert(pmod);
3897+
if (Py_ABS(Py_SIZE(v)) == 1 && Py_ABS(Py_SIZE(w)) == 1) {
3898+
/* Fast path for single-digit longs */
3899+
*pmod = (PyLongObject *)fast_mod(v, w);
3900+
return -(*pmod == NULL);
3901+
}
3902+
if (long_rem(v, w, &mod) < 0)
3903+
return -1;
3904+
if ((Py_SIZE(mod) < 0 && Py_SIZE(w) > 0) ||
3905+
(Py_SIZE(mod) > 0 && Py_SIZE(w) < 0)) {
3906+
PyLongObject *temp;
3907+
temp = (PyLongObject *) long_add(mod, w);
3908+
Py_DECREF(mod);
3909+
mod = temp;
3910+
if (mod == NULL)
3911+
return -1;
3912+
}
3913+
*pmod = mod;
3914+
3915+
return 0;
3916+
}
3917+
38173918
static PyObject *
38183919
long_div(PyObject *a, PyObject *b)
38193920
{
@@ -4100,11 +4201,7 @@ long_mod(PyObject *a, PyObject *b)
41004201

41014202
CHECK_BINOP(a, b);
41024203

4103-
if (Py_ABS(Py_SIZE(a)) == 1 && Py_ABS(Py_SIZE(b)) == 1) {
4104-
return fast_mod((PyLongObject*)a, (PyLongObject*)b);
4105-
}
4106-
4107-
if (l_divmod((PyLongObject*)a, (PyLongObject*)b, NULL, &mod) < 0)
4204+
if (l_mod((PyLongObject*)a, (PyLongObject*)b, &mod) < 0)
41084205
mod = NULL;
41094206
return (PyObject *)mod;
41104207
}
@@ -4333,10 +4430,10 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
43334430
while the "large exponent" case multiplies directly by base 31
43344431
times. It can be unboundedly faster to multiply by
43354432
base % modulus instead.
4336-
We could _always_ do this reduction, but l_divmod() isn't cheap,
4433+
We could _always_ do this reduction, but l_mod() isn't cheap,
43374434
so we only do it when it buys something. */
43384435
if (Py_SIZE(a) < 0 || Py_SIZE(a) > Py_SIZE(c)) {
4339-
if (l_divmod(a, c, NULL, &temp) < 0)
4436+
if (l_mod(a, c, &temp) < 0)
43404437
goto Error;
43414438
Py_DECREF(a);
43424439
a = temp;
@@ -4357,7 +4454,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
43574454
#define REDUCE(X) \
43584455
do { \
43594456
if (c != NULL) { \
4360-
if (l_divmod(X, c, NULL, &temp) < 0) \
4457+
if (l_mod(X, c, &temp) < 0) \
43614458
goto Error; \
43624459
Py_XDECREF(X); \
43634460
X = temp; \
@@ -5022,7 +5119,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg)
50225119

50235120
if (k == 0) {
50245121
/* no progress; do a Euclidean step */
5025-
if (l_divmod(a, b, NULL, &r) < 0)
5122+
if (l_mod(a, b, &r) < 0)
50265123
goto error;
50275124
Py_DECREF(a);
50285125
a = b;

0 commit comments

Comments
 (0)