@@ -104,9 +104,9 @@ quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtype
104
104
105
105
template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
106
106
int
107
- quad_generic_unary_op_strided_loop (PyArrayMethod_Context *context, char *const data[],
108
- npy_intp const dimensions[], npy_intp const strides[],
109
- NpyAuxData *auxdata)
107
+ quad_generic_unary_op_strided_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
108
+ npy_intp const dimensions[], npy_intp const strides[],
109
+ NpyAuxData *auxdata)
110
110
{
111
111
npy_intp N = dimensions[0 ];
112
112
char *in_ptr = data[0 ];
@@ -135,6 +135,34 @@ quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const d
135
135
return 0 ;
136
136
}
137
137
138
+ template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
139
+ int
140
+ quad_generic_unary_op_strided_loop_aligned (PyArrayMethod_Context *context, char *const data[],
141
+ npy_intp const dimensions[], npy_intp const strides[],
142
+ NpyAuxData *auxdata)
143
+ {
144
+ npy_intp N = dimensions[0 ];
145
+ char *in_ptr = data[0 ];
146
+ char *out_ptr = data[1 ];
147
+ npy_intp in_stride = strides[0 ];
148
+ npy_intp out_stride = strides[1 ];
149
+
150
+ QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors [0 ];
151
+ QuadBackendType backend = descr->backend ;
152
+
153
+ while (N--) {
154
+ if (backend == BACKEND_SLEEF) {
155
+ sleef_op ((Sleef_quad *)in_ptr, (Sleef_quad *)out_ptr);
156
+ }
157
+ else {
158
+ longdouble_op ((long double *)in_ptr, (long double *)out_ptr);
159
+ }
160
+ in_ptr += in_stride;
161
+ out_ptr += out_stride;
162
+ }
163
+ return 0 ;
164
+ }
165
+
138
166
template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
139
167
int
140
168
create_quad_unary_ufunc (PyObject *numpy, const char *ufunc_name)
@@ -149,15 +177,17 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name)
149
177
PyType_Slot slots[] = {
150
178
{NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors},
151
179
{NPY_METH_strided_loop,
152
- (void *)&quad_generic_unary_op_strided_loop<sleef_op, longdouble_op>},
180
+ (void *)&quad_generic_unary_op_strided_loop_aligned<sleef_op, longdouble_op>},
181
+ {NPY_METH_unaligned_strided_loop,
182
+ (void *)&quad_generic_unary_op_strided_loop_unaligned<sleef_op, longdouble_op>},
153
183
{0 , NULL }};
154
184
155
185
PyArrayMethod_Spec Spec = {
156
186
.name = " quad_unary_op" ,
157
187
.nin = 1 ,
158
188
.nout = 1 ,
159
189
.casting = NPY_NO_CASTING,
160
- .flags = (NPY_ARRAYMETHOD_FLAGS) 0 ,
190
+ .flags = NPY_METH_SUPPORTS_UNALIGNED ,
161
191
.dtypes = dtypes,
162
192
.slots = slots,
163
193
};
@@ -245,7 +275,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
245
275
PyArray_Descr *const given_descrs[],
246
276
PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED (view_offset))
247
277
{
248
-
249
278
QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0 ];
250
279
QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1 ];
251
280
QuadBackendType target_backend;
@@ -255,7 +284,8 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
255
284
if (descr_in1->backend != descr_in2->backend ) {
256
285
target_backend = BACKEND_LONGDOUBLE;
257
286
casting = NPY_SAFE_CASTING;
258
- } else {
287
+ }
288
+ else {
259
289
target_backend = descr_in1->backend ;
260
290
}
261
291
@@ -266,7 +296,8 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
266
296
if (!loop_descrs[i]) {
267
297
return (NPY_CASTING)-1 ;
268
298
}
269
- } else {
299
+ }
300
+ else {
270
301
Py_INCREF (given_descrs[i]);
271
302
loop_descrs[i] = given_descrs[i];
272
303
}
@@ -278,14 +309,16 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
278
309
if (!loop_descrs[2 ]) {
279
310
return (NPY_CASTING)-1 ;
280
311
}
281
- } else {
312
+ }
313
+ else {
282
314
QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2 ];
283
315
if (descr_out->backend != target_backend) {
284
316
loop_descrs[2 ] = (PyArray_Descr *)new_quaddtype_instance (target_backend);
285
317
if (!loop_descrs[2 ]) {
286
318
return (NPY_CASTING)-1 ;
287
319
}
288
- } else {
320
+ }
321
+ else {
289
322
Py_INCREF (given_descrs[2 ]);
290
323
loop_descrs[2 ] = given_descrs[2 ];
291
324
}
@@ -295,9 +328,9 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
295
328
296
329
template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
297
330
int
298
- quad_generic_binop_strided_loop (PyArrayMethod_Context *context, char *const data[],
299
- npy_intp const dimensions[], npy_intp const strides[],
300
- NpyAuxData *auxdata)
331
+ quad_generic_binop_strided_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
332
+ npy_intp const dimensions[], npy_intp const strides[],
333
+ NpyAuxData *auxdata)
301
334
{
302
335
npy_intp N = dimensions[0 ];
303
336
char *in1_ptr = data[0 ], *in2_ptr = data[1 ];
@@ -329,6 +362,37 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data
329
362
return 0 ;
330
363
}
331
364
365
+ template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
366
+ int
367
+ quad_generic_binop_strided_loop_aligned (PyArrayMethod_Context *context, char *const data[],
368
+ npy_intp const dimensions[], npy_intp const strides[],
369
+ NpyAuxData *auxdata)
370
+ {
371
+ npy_intp N = dimensions[0 ];
372
+ char *in1_ptr = data[0 ], *in2_ptr = data[1 ];
373
+ char *out_ptr = data[2 ];
374
+ npy_intp in1_stride = strides[0 ];
375
+ npy_intp in2_stride = strides[1 ];
376
+ npy_intp out_stride = strides[2 ];
377
+
378
+ QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors [0 ];
379
+ QuadBackendType backend = descr->backend ;
380
+
381
+ while (N--) {
382
+ if (backend == BACKEND_SLEEF) {
383
+ sleef_op ((Sleef_quad *)out_ptr, (Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr);
384
+ }
385
+ else {
386
+ longdouble_op ((long double *)out_ptr, (long double *)in1_ptr, (long double *)in2_ptr);
387
+ }
388
+
389
+ in1_ptr += in1_stride;
390
+ in2_ptr += in2_stride;
391
+ out_ptr += out_stride;
392
+ }
393
+ return 0 ;
394
+ }
395
+
332
396
static int
333
397
quad_ufunc_promoter (PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
334
398
PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
@@ -344,31 +408,26 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
344
408
for (int i = 0 ; i < 3 ; i++) {
345
409
Py_INCREF (op_dtypes[1 ]);
346
410
new_op_dtypes[i] = op_dtypes[1 ];
347
-
348
411
}
349
412
return 0 ;
350
413
}
351
414
352
415
// Check if any input or signature is QuadPrecision
353
416
for (int i = 0 ; i < nin; i++) {
354
-
355
417
if (op_dtypes[i] == &QuadPrecDType) {
356
418
has_quad = true ;
357
-
358
419
}
359
420
}
360
421
361
422
if (has_quad) {
362
423
common = &QuadPrecDType;
363
-
364
424
}
365
425
else {
366
426
for (int i = nin; i < nargs; i++) {
367
427
if (signature[i] != NULL ) {
368
428
if (common == NULL ) {
369
429
Py_INCREF (signature[i]);
370
430
common = signature[i];
371
-
372
431
}
373
432
else if (common != signature[i]) {
374
433
Py_CLEAR (common); // Not homogeneous, unset common
@@ -388,7 +447,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
388
447
389
448
return -1 ;
390
449
}
391
-
392
450
}
393
451
394
452
// Set all new_op_dtypes to the common dtype
@@ -424,15 +482,17 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
424
482
PyType_Slot slots[] = {
425
483
{NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors},
426
484
{NPY_METH_strided_loop,
427
- (void *)&quad_generic_binop_strided_loop<sleef_op, longdouble_op>},
485
+ (void *)&quad_generic_binop_strided_loop_aligned<sleef_op, longdouble_op>},
486
+ {NPY_METH_unaligned_strided_loop,
487
+ (void *)&quad_generic_binop_strided_loop_unaligned<sleef_op, longdouble_op>},
428
488
{0 , NULL }};
429
489
430
490
PyArrayMethod_Spec Spec = {
431
491
.name = " quad_binop" ,
432
492
.nin = 2 ,
433
493
.nout = 1 ,
434
494
.casting = NPY_NO_CASTING,
435
- .flags = NPY_METH_IS_REORDERABLE,
495
+ .flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE) ,
436
496
.dtypes = dtypes,
437
497
.slots = slots,
438
498
};
@@ -500,9 +560,9 @@ init_quad_binary_ops(PyObject *numpy)
500
560
501
561
template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
502
562
int
503
- quad_generic_comp_strided_loop (PyArrayMethod_Context *context, char *const data[],
504
- npy_intp const dimensions[], npy_intp const strides[],
505
- NpyAuxData *auxdata)
563
+ quad_generic_comp_strided_loop_aligned (PyArrayMethod_Context *context, char *const data[],
564
+ npy_intp const dimensions[], npy_intp const strides[],
565
+ NpyAuxData *auxdata)
506
566
{
507
567
npy_intp N = dimensions[0 ];
508
568
char *in1_ptr = data[0 ], *in2_ptr = data[1 ];
@@ -513,7 +573,6 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
513
573
514
574
QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors [0 ];
515
575
QuadBackendType backend = descr->backend ;
516
- size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof (Sleef_quad) : sizeof (long double );
517
576
518
577
while (N--) {
519
578
if (backend == BACKEND_SLEEF) {
@@ -532,6 +591,42 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
532
591
return 0 ;
533
592
}
534
593
594
+ template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
595
+ int
596
+ quad_generic_comp_strided_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
597
+ npy_intp const dimensions[], npy_intp const strides[],
598
+ NpyAuxData *auxdata)
599
+ {
600
+ npy_intp N = dimensions[0 ];
601
+ char *in1_ptr = data[0 ], *in2_ptr = data[1 ];
602
+ char *out_ptr = data[2 ];
603
+ npy_intp in1_stride = strides[0 ];
604
+ npy_intp in2_stride = strides[1 ];
605
+ npy_intp out_stride = strides[2 ];
606
+
607
+ QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors [0 ];
608
+ QuadBackendType backend = descr->backend ;
609
+ size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof (Sleef_quad) : sizeof (long double );
610
+
611
+ quad_value in1, in2;
612
+ while (N--) {
613
+ memcpy (&in1, in1_ptr, elem_size);
614
+ memcpy (&in2, in2_ptr, elem_size);
615
+
616
+ if (backend == BACKEND_SLEEF) {
617
+ *((npy_bool *)out_ptr) = sleef_comp (&in1.sleef_value , &in2.sleef_value );
618
+ }
619
+ else {
620
+ *((npy_bool *)out_ptr) = ld_comp (&in1.longdouble_value , &in2.longdouble_value );
621
+ }
622
+
623
+ in1_ptr += in1_stride;
624
+ in2_ptr += in2_stride;
625
+ out_ptr += out_stride;
626
+ }
627
+ return 0 ;
628
+ }
629
+
535
630
NPY_NO_EXPORT int
536
631
comparison_ufunc_promoter (PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
537
632
PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
@@ -559,16 +654,18 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
559
654
560
655
PyArray_DTypeMeta *dtypes[3 ] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType};
561
656
562
- PyType_Slot slots[] = {
563
- {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
564
- {0 , NULL }};
657
+ PyType_Slot slots[] = {{NPY_METH_strided_loop,
658
+ (void *)&quad_generic_comp_strided_loop_aligned<sleef_comp, ld_comp>},
659
+ {NPY_METH_unaligned_strided_loop,
660
+ (void *)&quad_generic_comp_strided_loop_unaligned<sleef_comp, ld_comp>},
661
+ {0 , NULL }};
565
662
566
663
PyArrayMethod_Spec Spec = {
567
664
.name = " quad_comp" ,
568
665
.nin = 2 ,
569
666
.nout = 1 ,
570
667
.casting = NPY_NO_CASTING,
571
- .flags = (NPY_ARRAYMETHOD_FLAGS) 0 ,
668
+ .flags = NPY_METH_SUPPORTS_UNALIGNED ,
572
669
.dtypes = dtypes,
573
670
.slots = slots,
574
671
};
0 commit comments