|
41 | 41 | bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
|
42 | 42 | {
|
43 | 43 | bc_num sum;
|
44 |
| - size_t sum_scale, sum_digits; |
| 44 | + size_t sum_len = MAX(n1->n_len, n2->n_len) + 1; |
| 45 | + size_t sum_scale = MAX(n1->n_scale, n2->n_scale); |
| 46 | + size_t min_len = MIN (n1->n_len, n2->n_len); |
| 47 | + size_t min_scale = MIN(n1->n_scale, n2->n_scale); |
| 48 | + size_t min_bytes = min_len + min_scale; |
45 | 49 | char *n1ptr, *n2ptr, *sumptr;
|
46 |
| - size_t n1bytes, n2bytes; |
47 |
| - bool carry; |
| 50 | + bool carry = 0; |
| 51 | + size_t count; |
48 | 52 |
|
49 | 53 | /* Prepare sum. */
|
50 |
| - sum_scale = MAX (n1->n_scale, n2->n_scale); |
51 |
| - sum_digits = MAX (n1->n_len, n2->n_len) + 1; |
52 |
| - sum = bc_new_num (sum_digits, MAX(sum_scale, scale_min)); |
| 54 | + sum = bc_new_num (sum_len, MAX(sum_scale, scale_min)); |
53 | 55 |
|
54 | 56 | /* Start with the fraction part. Initialize the pointers. */
|
55 |
| - n1bytes = n1->n_scale; |
56 |
| - n2bytes = n2->n_scale; |
57 |
| - n1ptr = (char *) (n1->n_value + n1->n_len + n1bytes - 1); |
58 |
| - n2ptr = (char *) (n2->n_value + n2->n_len + n2bytes - 1); |
59 |
| - sumptr = (char *) (sum->n_value + sum_scale + sum_digits - 1); |
| 57 | + n1ptr = (char *) (n1->n_value + n1->n_len + n1->n_scale - 1); |
| 58 | + n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1); |
| 59 | + sumptr = (char *) (sum->n_value + sum_scale + sum_len - 1); |
60 | 60 |
|
61 | 61 | /* Add the fraction part. First copy the longer fraction.*/
|
62 |
| - if (n1bytes != n2bytes) { |
63 |
| - if (n1bytes > n2bytes) { |
64 |
| - while (n1bytes > n2bytes) { |
65 |
| - *sumptr-- = *n1ptr--; |
66 |
| - n1bytes--; |
67 |
| - } |
68 |
| - } else { |
69 |
| - while (n2bytes > n1bytes) { |
70 |
| - *sumptr-- = *n2ptr--; |
71 |
| - n2bytes--; |
72 |
| - } |
| 62 | + if (n1->n_scale != min_scale) { |
| 63 | + /* n1 has the longer scale */ |
| 64 | + for (count = n1->n_scale - min_scale; count > 0; count--) { |
| 65 | + *sumptr-- = *n1ptr--; |
| 66 | + } |
| 67 | + } else { |
| 68 | + /* n2 has the longer scale */ |
| 69 | + for (count = n2->n_scale - min_scale; count > 0; count--) { |
| 70 | + *sumptr-- = *n2ptr--; |
73 | 71 | }
|
74 | 72 | }
|
75 | 73 |
|
76 | 74 | /* Now add the remaining fraction part and equal size integer parts. */
|
77 |
| - n1bytes += n1->n_len; |
78 |
| - n2bytes += n2->n_len; |
79 |
| - carry = 0; |
80 |
| - while ((n1bytes > 0) && (n2bytes > 0)) { |
| 75 | + count = 0; |
| 76 | + /* Uses SIMD to perform calculations at high speed. */ |
| 77 | + if (min_bytes >= sizeof(BC_UINT_T)) { |
| 78 | + sumptr++; |
| 79 | + n1ptr++; |
| 80 | + n2ptr++; |
| 81 | + while (count + sizeof(BC_UINT_T) <= min_bytes) { |
| 82 | + sumptr -= sizeof(BC_UINT_T); |
| 83 | + n1ptr -= sizeof(BC_UINT_T); |
| 84 | + n2ptr -= sizeof(BC_UINT_T); |
| 85 | + |
| 86 | + BC_UINT_T n1bytes; |
| 87 | + BC_UINT_T n2bytes; |
| 88 | + memcpy(&n1bytes, n1ptr, sizeof(n1bytes)); |
| 89 | + memcpy(&n2bytes, n2ptr, sizeof(n2bytes)); |
| 90 | + |
| 91 | +#if BC_LITTLE_ENDIAN |
| 92 | + /* Little endian requires changing the order of bytes. */ |
| 93 | + n1bytes = BC_BSWAP(n1bytes); |
| 94 | + n2bytes = BC_BSWAP(n2bytes); |
| 95 | +#endif |
| 96 | + |
| 97 | + /* |
| 98 | + * In order to add 1 to the "next digit" when a carry occurs, adjust it so that it |
| 99 | + * overflows when add 10. |
| 100 | + * e.g. |
| 101 | + * 00001001(9) + 00000001(1) = 00001010(10) to |
| 102 | + * 11111111 + 00000001 = 00000000(0) and carry 1 |
| 103 | + */ |
| 104 | + n1bytes += SWAR_REPEAT(0xF6) + n2bytes + carry; |
| 105 | + /* If the most significant bit is 0, a carry has occurred. */ |
| 106 | + carry = !(n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1))); |
| 107 | + |
| 108 | + /* |
| 109 | + * The calculation result is a mixture of bytes that have been carried and bytes that have not. |
| 110 | + * The most significant bit of each byte is 0 if it is carried forward, and 1 if it is not. |
| 111 | + * Using this, subtract the 0xF6 added for adjustment from the byte that has not been carried |
| 112 | + * over to return it to the correct value as a decimal number. |
| 113 | + */ |
| 114 | + BC_UINT_T sum_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0xF6; |
| 115 | + n1bytes -= sum_mask; |
| 116 | + |
| 117 | +#if BC_LITTLE_ENDIAN |
| 118 | + /* Little endian requires changing the order of bytes back. */ |
| 119 | + n1bytes = BC_BSWAP(n1bytes); |
| 120 | +#endif |
| 121 | + |
| 122 | + memcpy(sumptr, &n1bytes, sizeof(n1bytes)); |
| 123 | + |
| 124 | + count += sizeof(BC_UINT_T); |
| 125 | + } |
| 126 | + sumptr--; |
| 127 | + n1ptr--; |
| 128 | + n2ptr--; |
| 129 | + } |
| 130 | + |
| 131 | + for (; count < min_bytes; count++) { |
81 | 132 | *sumptr = *n1ptr-- + *n2ptr-- + carry;
|
82 |
| - if (*sumptr > (BASE - 1)) { |
83 |
| - carry = 1; |
| 133 | + if (*sumptr >= BASE) { |
84 | 134 | *sumptr -= BASE;
|
| 135 | + carry = 1; |
85 | 136 | } else {
|
86 | 137 | carry = 0;
|
87 | 138 | }
|
88 | 139 | sumptr--;
|
89 |
| - n1bytes--; |
90 |
| - n2bytes--; |
91 | 140 | }
|
92 | 141 |
|
93 | 142 | /* Now add carry the longer integer part. */
|
94 |
| - if (n1bytes == 0) { |
95 |
| - n1bytes = n2bytes; |
96 |
| - n1ptr = n2ptr; |
97 |
| - } |
98 |
| - while (n1bytes-- > 0) { |
99 |
| - *sumptr = *n1ptr-- + carry; |
100 |
| - if (*sumptr > (BASE - 1)) { |
101 |
| - carry = true; |
102 |
| - *sumptr -= BASE; |
103 |
| - } else { |
104 |
| - carry = false; |
| 143 | + if (n1->n_len != n2->n_len) { |
| 144 | + if (n2->n_len > n1->n_len) { |
| 145 | + n1ptr = n2ptr; |
| 146 | + } |
| 147 | + for (count = sum_len - min_len; count > 1; count--) { |
| 148 | + *sumptr = *n1ptr-- + carry; |
| 149 | + if (*sumptr >= BASE) { |
| 150 | + *sumptr -= BASE; |
| 151 | + carry = 1; |
| 152 | + } else { |
| 153 | + carry = 0; |
| 154 | + } |
| 155 | + sumptr--; |
105 | 156 | }
|
106 |
| - sumptr--; |
107 | 157 | }
|
108 | 158 |
|
109 | 159 | /* Set final carry. */
|
110 |
| - if (carry) { |
111 |
| - *sumptr += 1; |
112 |
| - } |
| 160 | + *sumptr += carry; |
113 | 161 |
|
114 | 162 | /* Adjust sum and return. */
|
115 | 163 | _bc_rm_leading_zeros(sum);
|
|
0 commit comments