Skip to content

Commit 7203ca8

Browse files
ext/bcmath: Made the same changes to _bc_do_add as _bc_do_sub (#14196)
The code for _bc_do_add and _bc_do_sub were written slightly differently for similar processing (and add was slower than sub), so I changed the code to one similar to sub. Also, _bc_do_add has been changed to use SIMD to perform faster calculations when possible.
1 parent 525cbe0 commit 7203ca8

File tree

1 file changed

+93
-45
lines changed

1 file changed

+93
-45
lines changed

ext/bcmath/libbcmath/src/doaddsub.c

Lines changed: 93 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -41,75 +41,123 @@
4141
bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
4242
{
4343
bc_num sum;
44-
size_t sum_scale, sum_digits;
44+
size_t sum_len = MAX(n1->n_len, n2->n_len) + 1;
45+
size_t sum_scale = MAX(n1->n_scale, n2->n_scale);
46+
size_t min_len = MIN (n1->n_len, n2->n_len);
47+
size_t min_scale = MIN(n1->n_scale, n2->n_scale);
48+
size_t min_bytes = min_len + min_scale;
4549
char *n1ptr, *n2ptr, *sumptr;
46-
size_t n1bytes, n2bytes;
47-
bool carry;
50+
bool carry = 0;
51+
size_t count;
4852

4953
/* Prepare sum. */
50-
sum_scale = MAX (n1->n_scale, n2->n_scale);
51-
sum_digits = MAX (n1->n_len, n2->n_len) + 1;
52-
sum = bc_new_num (sum_digits, MAX(sum_scale, scale_min));
54+
sum = bc_new_num (sum_len, MAX(sum_scale, scale_min));
5355

5456
/* Start with the fraction part. Initialize the pointers. */
55-
n1bytes = n1->n_scale;
56-
n2bytes = n2->n_scale;
57-
n1ptr = (char *) (n1->n_value + n1->n_len + n1bytes - 1);
58-
n2ptr = (char *) (n2->n_value + n2->n_len + n2bytes - 1);
59-
sumptr = (char *) (sum->n_value + sum_scale + sum_digits - 1);
57+
n1ptr = (char *) (n1->n_value + n1->n_len + n1->n_scale - 1);
58+
n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1);
59+
sumptr = (char *) (sum->n_value + sum_scale + sum_len - 1);
6060

6161
/* Add the fraction part. First copy the longer fraction.*/
62-
if (n1bytes != n2bytes) {
63-
if (n1bytes > n2bytes) {
64-
while (n1bytes > n2bytes) {
65-
*sumptr-- = *n1ptr--;
66-
n1bytes--;
67-
}
68-
} else {
69-
while (n2bytes > n1bytes) {
70-
*sumptr-- = *n2ptr--;
71-
n2bytes--;
72-
}
62+
if (n1->n_scale != min_scale) {
63+
/* n1 has the longer scale */
64+
for (count = n1->n_scale - min_scale; count > 0; count--) {
65+
*sumptr-- = *n1ptr--;
66+
}
67+
} else {
68+
/* n2 has the longer scale */
69+
for (count = n2->n_scale - min_scale; count > 0; count--) {
70+
*sumptr-- = *n2ptr--;
7371
}
7472
}
7573

7674
/* Now add the remaining fraction part and equal size integer parts. */
77-
n1bytes += n1->n_len;
78-
n2bytes += n2->n_len;
79-
carry = 0;
80-
while ((n1bytes > 0) && (n2bytes > 0)) {
75+
count = 0;
76+
/* Uses SIMD to perform calculations at high speed. */
77+
if (min_bytes >= sizeof(BC_UINT_T)) {
78+
sumptr++;
79+
n1ptr++;
80+
n2ptr++;
81+
while (count + sizeof(BC_UINT_T) <= min_bytes) {
82+
sumptr -= sizeof(BC_UINT_T);
83+
n1ptr -= sizeof(BC_UINT_T);
84+
n2ptr -= sizeof(BC_UINT_T);
85+
86+
BC_UINT_T n1bytes;
87+
BC_UINT_T n2bytes;
88+
memcpy(&n1bytes, n1ptr, sizeof(n1bytes));
89+
memcpy(&n2bytes, n2ptr, sizeof(n2bytes));
90+
91+
#if BC_LITTLE_ENDIAN
92+
/* Little endian requires changing the order of bytes. */
93+
n1bytes = BC_BSWAP(n1bytes);
94+
n2bytes = BC_BSWAP(n2bytes);
95+
#endif
96+
97+
/*
98+
* In order to add 1 to the "next digit" when a carry occurs, adjust it so that it
99+
* overflows when add 10.
100+
* e.g.
101+
* 00001001(9) + 00000001(1) = 00001010(10) to
102+
* 11111111 + 00000001 = 00000000(0) and carry 1
103+
*/
104+
n1bytes += SWAR_REPEAT(0xF6) + n2bytes + carry;
105+
/* If the most significant bit is 0, a carry has occurred. */
106+
carry = !(n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1)));
107+
108+
/*
109+
* The calculation result is a mixture of bytes that have been carried and bytes that have not.
110+
* The most significant bit of each byte is 0 if it is carried forward, and 1 if it is not.
111+
* Using this, subtract the 0xF6 added for adjustment from the byte that has not been carried
112+
* over to return it to the correct value as a decimal number.
113+
*/
114+
BC_UINT_T sum_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0xF6;
115+
n1bytes -= sum_mask;
116+
117+
#if BC_LITTLE_ENDIAN
118+
/* Little endian requires changing the order of bytes back. */
119+
n1bytes = BC_BSWAP(n1bytes);
120+
#endif
121+
122+
memcpy(sumptr, &n1bytes, sizeof(n1bytes));
123+
124+
count += sizeof(BC_UINT_T);
125+
}
126+
sumptr--;
127+
n1ptr--;
128+
n2ptr--;
129+
}
130+
131+
for (; count < min_bytes; count++) {
81132
*sumptr = *n1ptr-- + *n2ptr-- + carry;
82-
if (*sumptr > (BASE - 1)) {
83-
carry = 1;
133+
if (*sumptr >= BASE) {
84134
*sumptr -= BASE;
135+
carry = 1;
85136
} else {
86137
carry = 0;
87138
}
88139
sumptr--;
89-
n1bytes--;
90-
n2bytes--;
91140
}
92141

93142
/* Now add carry the longer integer part. */
94-
if (n1bytes == 0) {
95-
n1bytes = n2bytes;
96-
n1ptr = n2ptr;
97-
}
98-
while (n1bytes-- > 0) {
99-
*sumptr = *n1ptr-- + carry;
100-
if (*sumptr > (BASE - 1)) {
101-
carry = true;
102-
*sumptr -= BASE;
103-
} else {
104-
carry = false;
143+
if (n1->n_len != n2->n_len) {
144+
if (n2->n_len > n1->n_len) {
145+
n1ptr = n2ptr;
146+
}
147+
for (count = sum_len - min_len; count > 1; count--) {
148+
*sumptr = *n1ptr-- + carry;
149+
if (*sumptr >= BASE) {
150+
*sumptr -= BASE;
151+
carry = 1;
152+
} else {
153+
carry = 0;
154+
}
155+
sumptr--;
105156
}
106-
sumptr--;
107157
}
108158

109159
/* Set final carry. */
110-
if (carry) {
111-
*sumptr += 1;
112-
}
160+
*sumptr += carry;
113161

114162
/* Adjust sum and return. */
115163
_bc_rm_leading_zeros(sum);

0 commit comments

Comments
 (0)