Skip to content

Commit 0fba040

Browse files
committed
added separate aligned and unaligned ufunc support
1 parent fe6cabc commit 0fba040

File tree

1 file changed

+126
-29
lines changed

1 file changed

+126
-29
lines changed

quaddtype/numpy_quaddtype/src/umath.cpp

Lines changed: 126 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtype
104104

105105
template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
106106
int
107-
quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const data[],
108-
npy_intp const dimensions[], npy_intp const strides[],
109-
NpyAuxData *auxdata)
107+
quad_generic_unary_op_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
108+
npy_intp const dimensions[], npy_intp const strides[],
109+
NpyAuxData *auxdata)
110110
{
111111
npy_intp N = dimensions[0];
112112
char *in_ptr = data[0];
@@ -135,6 +135,34 @@ quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const d
135135
return 0;
136136
}
137137

138+
template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
139+
int
140+
quad_generic_unary_op_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
141+
npy_intp const dimensions[], npy_intp const strides[],
142+
NpyAuxData *auxdata)
143+
{
144+
npy_intp N = dimensions[0];
145+
char *in_ptr = data[0];
146+
char *out_ptr = data[1];
147+
npy_intp in_stride = strides[0];
148+
npy_intp out_stride = strides[1];
149+
150+
QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
151+
QuadBackendType backend = descr->backend;
152+
153+
while (N--) {
154+
if (backend == BACKEND_SLEEF) {
155+
sleef_op((Sleef_quad *)in_ptr, (Sleef_quad *)out_ptr);
156+
}
157+
else {
158+
longdouble_op((long double *)in_ptr, (long double *)out_ptr);
159+
}
160+
in_ptr += in_stride;
161+
out_ptr += out_stride;
162+
}
163+
return 0;
164+
}
165+
138166
template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
139167
int
140168
create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name)
@@ -149,15 +177,17 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name)
149177
PyType_Slot slots[] = {
150178
{NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors},
151179
{NPY_METH_strided_loop,
152-
(void *)&quad_generic_unary_op_strided_loop<sleef_op, longdouble_op>},
180+
(void *)&quad_generic_unary_op_strided_loop_aligned<sleef_op, longdouble_op>},
181+
{NPY_METH_unaligned_strided_loop,
182+
(void *)&quad_generic_unary_op_strided_loop_unaligned<sleef_op, longdouble_op>},
153183
{0, NULL}};
154184

155185
PyArrayMethod_Spec Spec = {
156186
.name = "quad_unary_op",
157187
.nin = 1,
158188
.nout = 1,
159189
.casting = NPY_NO_CASTING,
160-
.flags = (NPY_ARRAYMETHOD_FLAGS)0,
190+
.flags = NPY_METH_SUPPORTS_UNALIGNED,
161191
.dtypes = dtypes,
162192
.slots = slots,
163193
};
@@ -245,7 +275,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
245275
PyArray_Descr *const given_descrs[],
246276
PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset))
247277
{
248-
249278
QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
250279
QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
251280
QuadBackendType target_backend;
@@ -255,7 +284,8 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
255284
if (descr_in1->backend != descr_in2->backend) {
256285
target_backend = BACKEND_LONGDOUBLE;
257286
casting = NPY_SAFE_CASTING;
258-
} else {
287+
}
288+
else {
259289
target_backend = descr_in1->backend;
260290
}
261291

@@ -266,7 +296,8 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
266296
if (!loop_descrs[i]) {
267297
return (NPY_CASTING)-1;
268298
}
269-
} else {
299+
}
300+
else {
270301
Py_INCREF(given_descrs[i]);
271302
loop_descrs[i] = given_descrs[i];
272303
}
@@ -278,14 +309,16 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
278309
if (!loop_descrs[2]) {
279310
return (NPY_CASTING)-1;
280311
}
281-
} else {
312+
}
313+
else {
282314
QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2];
283315
if (descr_out->backend != target_backend) {
284316
loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
285317
if (!loop_descrs[2]) {
286318
return (NPY_CASTING)-1;
287319
}
288-
} else {
320+
}
321+
else {
289322
Py_INCREF(given_descrs[2]);
290323
loop_descrs[2] = given_descrs[2];
291324
}
@@ -295,9 +328,9 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
295328

296329
template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
297330
int
298-
quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data[],
299-
npy_intp const dimensions[], npy_intp const strides[],
300-
NpyAuxData *auxdata)
331+
quad_generic_binop_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
332+
npy_intp const dimensions[], npy_intp const strides[],
333+
NpyAuxData *auxdata)
301334
{
302335
npy_intp N = dimensions[0];
303336
char *in1_ptr = data[0], *in2_ptr = data[1];
@@ -329,6 +362,37 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data
329362
return 0;
330363
}
331364

365+
template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
366+
int
367+
quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
368+
npy_intp const dimensions[], npy_intp const strides[],
369+
NpyAuxData *auxdata)
370+
{
371+
npy_intp N = dimensions[0];
372+
char *in1_ptr = data[0], *in2_ptr = data[1];
373+
char *out_ptr = data[2];
374+
npy_intp in1_stride = strides[0];
375+
npy_intp in2_stride = strides[1];
376+
npy_intp out_stride = strides[2];
377+
378+
QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
379+
QuadBackendType backend = descr->backend;
380+
381+
while (N--) {
382+
if (backend == BACKEND_SLEEF) {
383+
sleef_op((Sleef_quad *)out_ptr, (Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr);
384+
}
385+
else {
386+
longdouble_op((long double *)out_ptr, (long double *)in1_ptr, (long double *)in2_ptr);
387+
}
388+
389+
in1_ptr += in1_stride;
390+
in2_ptr += in2_stride;
391+
out_ptr += out_stride;
392+
}
393+
return 0;
394+
}
395+
332396
static int
333397
quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
334398
PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
@@ -344,31 +408,26 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
344408
for (int i = 0; i < 3; i++) {
345409
Py_INCREF(op_dtypes[1]);
346410
new_op_dtypes[i] = op_dtypes[1];
347-
348411
}
349412
return 0;
350413
}
351414

352415
// Check if any input or signature is QuadPrecision
353416
for (int i = 0; i < nin; i++) {
354-
355417
if (op_dtypes[i] == &QuadPrecDType) {
356418
has_quad = true;
357-
358419
}
359420
}
360421

361422
if (has_quad) {
362423
common = &QuadPrecDType;
363-
364424
}
365425
else {
366426
for (int i = nin; i < nargs; i++) {
367427
if (signature[i] != NULL) {
368428
if (common == NULL) {
369429
Py_INCREF(signature[i]);
370430
common = signature[i];
371-
372431
}
373432
else if (common != signature[i]) {
374433
Py_CLEAR(common); // Not homogeneous, unset common
@@ -388,7 +447,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
388447

389448
return -1;
390449
}
391-
392450
}
393451

394452
// Set all new_op_dtypes to the common dtype
@@ -424,15 +482,17 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
424482
PyType_Slot slots[] = {
425483
{NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors},
426484
{NPY_METH_strided_loop,
427-
(void *)&quad_generic_binop_strided_loop<sleef_op, longdouble_op>},
485+
(void *)&quad_generic_binop_strided_loop_aligned<sleef_op, longdouble_op>},
486+
{NPY_METH_unaligned_strided_loop,
487+
(void *)&quad_generic_binop_strided_loop_unaligned<sleef_op, longdouble_op>},
428488
{0, NULL}};
429489

430490
PyArrayMethod_Spec Spec = {
431491
.name = "quad_binop",
432492
.nin = 2,
433493
.nout = 1,
434494
.casting = NPY_NO_CASTING,
435-
.flags = NPY_METH_IS_REORDERABLE,
495+
.flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE),
436496
.dtypes = dtypes,
437497
.slots = slots,
438498
};
@@ -500,9 +560,9 @@ init_quad_binary_ops(PyObject *numpy)
500560

501561
template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
502562
int
503-
quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[],
504-
npy_intp const dimensions[], npy_intp const strides[],
505-
NpyAuxData *auxdata)
563+
quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
564+
npy_intp const dimensions[], npy_intp const strides[],
565+
NpyAuxData *auxdata)
506566
{
507567
npy_intp N = dimensions[0];
508568
char *in1_ptr = data[0], *in2_ptr = data[1];
@@ -513,7 +573,6 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
513573

514574
QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
515575
QuadBackendType backend = descr->backend;
516-
size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
517576

518577
while (N--) {
519578
if (backend == BACKEND_SLEEF) {
@@ -532,6 +591,42 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
532591
return 0;
533592
}
534593

594+
template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
595+
int
596+
quad_generic_comp_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
597+
npy_intp const dimensions[], npy_intp const strides[],
598+
NpyAuxData *auxdata)
599+
{
600+
npy_intp N = dimensions[0];
601+
char *in1_ptr = data[0], *in2_ptr = data[1];
602+
char *out_ptr = data[2];
603+
npy_intp in1_stride = strides[0];
604+
npy_intp in2_stride = strides[1];
605+
npy_intp out_stride = strides[2];
606+
607+
QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
608+
QuadBackendType backend = descr->backend;
609+
size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
610+
611+
quad_value in1, in2;
612+
while (N--) {
613+
memcpy(&in1, in1_ptr, elem_size);
614+
memcpy(&in2, in2_ptr, elem_size);
615+
616+
if (backend == BACKEND_SLEEF) {
617+
*((npy_bool *)out_ptr) = sleef_comp(&in1.sleef_value, &in2.sleef_value);
618+
}
619+
else {
620+
*((npy_bool *)out_ptr) = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
621+
}
622+
623+
in1_ptr += in1_stride;
624+
in2_ptr += in2_stride;
625+
out_ptr += out_stride;
626+
}
627+
return 0;
628+
}
629+
535630
NPY_NO_EXPORT int
536631
comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
537632
PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
@@ -559,16 +654,18 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
559654

560655
PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType};
561656

562-
PyType_Slot slots[] = {
563-
{NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
564-
{0, NULL}};
657+
PyType_Slot slots[] = {{NPY_METH_strided_loop,
658+
(void *)&quad_generic_comp_strided_loop_aligned<sleef_comp, ld_comp>},
659+
{NPY_METH_unaligned_strided_loop,
660+
(void *)&quad_generic_comp_strided_loop_unaligned<sleef_comp, ld_comp>},
661+
{0, NULL}};
565662

566663
PyArrayMethod_Spec Spec = {
567664
.name = "quad_comp",
568665
.nin = 2,
569666
.nout = 1,
570667
.casting = NPY_NO_CASTING,
571-
.flags = (NPY_ARRAYMETHOD_FLAGS)0,
668+
.flags = NPY_METH_SUPPORTS_UNALIGNED,
572669
.dtypes = dtypes,
573670
.slots = slots,
574671
};

0 commit comments

Comments
 (0)