@@ -107,14 +107,14 @@ static inline void set128f(float *dst, float value, int len)
107
107
int stop_len = len / ALTIVEC_LEN_FLOAT ;
108
108
stop_len *= ALTIVEC_LEN_FLOAT ;
109
109
110
- if (isAligned ((uintptr_t ) (dst ), ALTIVEC_LEN_BYTES )) {
110
+ if (isAligned ((uintptr_t )(dst ), ALTIVEC_LEN_BYTES )) {
111
111
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
112
112
vec_st (tmp , 0 , dst + i );
113
113
}
114
114
} else {
115
- int unaligned_float = (uintptr_t ) (dst ) % ALTIVEC_LEN_FLOAT ; // could this happen though?
116
- if (unaligned_float == 0 ) { // dst is not aligned on 16bytes boundary but is at least aligned on float
117
- int unaligned_elts = ((uintptr_t ) (dst ) % ALTIVEC_LEN_BYTES ) / sizeof (float );
115
+ int unaligned_float = (uintptr_t )(dst ) % ALTIVEC_LEN_FLOAT ; // could this happen though?
116
+ if (unaligned_float == 0 ) { // dst is not aligned on 16bytes boundary but is at least aligned on float
117
+ int unaligned_elts = ((uintptr_t )(dst ) % ALTIVEC_LEN_BYTES ) / sizeof (float );
118
118
for (int i = 0 ; i < unaligned_elts ; i ++ ) {
119
119
dst [i ] = value ;
120
120
}
@@ -141,16 +141,16 @@ static inline void mul128f(float *src1, float *src2, float *dst, int len)
141
141
int stop_len = len / ALTIVEC_LEN_FLOAT ;
142
142
stop_len *= ALTIVEC_LEN_FLOAT ;
143
143
144
- if (areAligned3 ((uintptr_t ) (src1 ), (uintptr_t ) (src2 ), (uintptr_t ) (dst ), ALTIVEC_LEN_BYTES )) {
144
+ if (areAligned3 ((uintptr_t )(src1 ), (uintptr_t )(src2 ), (uintptr_t )(dst ), ALTIVEC_LEN_BYTES )) {
145
145
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
146
146
v4sf a = vec_ld (0 , src1 + i );
147
147
v4sf b = vec_ld (0 , src2 + i );
148
148
vec_st (vec_mul (a , b ), 0 , dst + i );
149
149
}
150
150
} else {
151
- int unalign_src1 = (uintptr_t ) (src1 ) % ALTIVEC_LEN_BYTES ;
152
- int unalign_src2 = (uintptr_t ) (src2 ) % ALTIVEC_LEN_BYTES ;
153
- int unalign_dst = (uintptr_t ) (dst ) % ALTIVEC_LEN_BYTES ;
151
+ int unalign_src1 = (uintptr_t )(src1 ) % ALTIVEC_LEN_BYTES ;
152
+ int unalign_src2 = (uintptr_t )(src2 ) % ALTIVEC_LEN_BYTES ;
153
+ int unalign_dst = (uintptr_t )(dst ) % ALTIVEC_LEN_BYTES ;
154
154
155
155
/*To be improved : we constantly use unaligned load or store of those data
156
156
There exist better unaligned stream load or store which could improve performance
@@ -188,16 +188,16 @@ static inline void minevery128f(float *src1, float *src2, float *dst, int len)
188
188
int stop_len = len / ALTIVEC_LEN_FLOAT ;
189
189
stop_len *= ALTIVEC_LEN_FLOAT ;
190
190
191
- if (areAligned3 ((uintptr_t ) (src1 ), (uintptr_t ) (src2 ), (uintptr_t ) (dst ), ALTIVEC_LEN_BYTES )) {
191
+ if (areAligned3 ((uintptr_t )(src1 ), (uintptr_t )(src2 ), (uintptr_t )(dst ), ALTIVEC_LEN_BYTES )) {
192
192
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
193
193
v4sf a = vec_ld (0 , src1 + i );
194
194
v4sf b = vec_ld (0 , src2 + i );
195
195
vec_st (vec_min (a , b ), 0 , dst + i );
196
196
}
197
197
} else {
198
- int unalign_src1 = (uintptr_t ) (src1 ) % ALTIVEC_LEN_BYTES ;
199
- int unalign_src2 = (uintptr_t ) (src2 ) % ALTIVEC_LEN_BYTES ;
200
- int unalign_dst = (uintptr_t ) (dst ) % ALTIVEC_LEN_BYTES ;
198
+ int unalign_src1 = (uintptr_t )(src1 ) % ALTIVEC_LEN_BYTES ;
199
+ int unalign_src2 = (uintptr_t )(src2 ) % ALTIVEC_LEN_BYTES ;
200
+ int unalign_dst = (uintptr_t )(dst ) % ALTIVEC_LEN_BYTES ;
201
201
202
202
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
203
203
v4sf a , b ;
@@ -236,7 +236,7 @@ static inline void cplxtoreal128f(float *src, float *dstRe, float *dstIm, int le
236
236
const v16u8 re_mask = {0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 };
237
237
const v16u8 im_mask = {4 , 5 , 6 , 7 , 12 , 13 , 14 , 15 , 20 , 21 , 22 , 23 , 28 , 29 , 30 , 31 };
238
238
239
- if (areAligned3 ((uintptr_t ) (src ), (uintptr_t ) (dstRe ), (uintptr_t ) (dstIm ), ALTIVEC_LEN_BYTES )) {
239
+ if (areAligned3 ((uintptr_t )(src ), (uintptr_t )(dstRe ), (uintptr_t )(dstIm ), ALTIVEC_LEN_BYTES )) {
240
240
for (int i = 0 ; i < stop_len ; i += 2 * ALTIVEC_LEN_FLOAT ) {
241
241
v4sf vec1 = vec_ld (0 , src + i );
242
242
v4sf vec2 = vec_ld (0 , src + i + ALTIVEC_LEN_FLOAT );
@@ -247,9 +247,9 @@ static inline void cplxtoreal128f(float *src, float *dstRe, float *dstIm, int le
247
247
j += ALTIVEC_LEN_FLOAT ;
248
248
}
249
249
} else {
250
- int unalign_src = (uintptr_t ) (src ) % ALTIVEC_LEN_BYTES ;
251
- int unalign_dstRe = (uintptr_t ) (dstRe ) % ALTIVEC_LEN_BYTES ;
252
- int unalign_dstIm = (uintptr_t ) (dstIm ) % ALTIVEC_LEN_BYTES ;
250
+ int unalign_src = (uintptr_t )(src ) % ALTIVEC_LEN_BYTES ;
251
+ int unalign_dstRe = (uintptr_t )(dstRe ) % ALTIVEC_LEN_BYTES ;
252
+ int unalign_dstIm = (uintptr_t )(dstIm ) % ALTIVEC_LEN_BYTES ;
253
253
254
254
for (int i = 0 ; i < stop_len ; i += 2 * ALTIVEC_LEN_FLOAT ) {
255
255
v4sf vec1 , vec2 ;
@@ -292,14 +292,14 @@ static inline void log2_128f(float *src, float *dst, int len)
292
292
int stop_len = len / ALTIVEC_LEN_FLOAT ;
293
293
stop_len *= ALTIVEC_LEN_FLOAT ;
294
294
295
- if (areAligned2 ((uintptr_t ) (src ), (uintptr_t ) (dst ), ALTIVEC_LEN_BYTES )) {
295
+ if (areAligned2 ((uintptr_t )(src ), (uintptr_t )(dst ), ALTIVEC_LEN_BYTES )) {
296
296
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
297
297
v4sf a = vec_ld (0 , src + i );
298
298
vec_st (vec_loge (a ), 0 , dst + i );
299
299
}
300
300
} else {
301
- int unalign_src = (uintptr_t ) (src ) % ALTIVEC_LEN_BYTES ;
302
- int unalign_dst = (uintptr_t ) (dst ) % ALTIVEC_LEN_BYTES ;
301
+ int unalign_src = (uintptr_t )(src ) % ALTIVEC_LEN_BYTES ;
302
+ int unalign_dst = (uintptr_t )(dst ) % ALTIVEC_LEN_BYTES ;
303
303
304
304
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
305
305
v4sf a ;
@@ -330,14 +330,14 @@ static inline void ln_128f(float *src, float *dst, int len)
330
330
331
331
const v4sf ln2_vec = {LN2 , LN2 , LN2 , LN2 };
332
332
333
- if (areAligned2 ((uintptr_t ) (src ), (uintptr_t ) (dst ), ALTIVEC_LEN_BYTES )) {
333
+ if (areAligned2 ((uintptr_t )(src ), (uintptr_t )(dst ), ALTIVEC_LEN_BYTES )) {
334
334
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
335
335
v4sf a = vec_ld (0 , src + i );
336
336
vec_st (vec_mul (vec_loge (a ), ln2_vec ), 0 , dst + i );
337
337
}
338
338
} else {
339
- int unalign_src = (uintptr_t ) (src ) % ALTIVEC_LEN_BYTES ;
340
- int unalign_dst = (uintptr_t ) (dst ) % ALTIVEC_LEN_BYTES ;
339
+ int unalign_src = (uintptr_t )(src ) % ALTIVEC_LEN_BYTES ;
340
+ int unalign_dst = (uintptr_t )(dst ) % ALTIVEC_LEN_BYTES ;
341
341
342
342
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
343
343
v4sf a ;
@@ -368,14 +368,14 @@ static inline void log10_128f(float *src, float *dst, int len)
368
368
369
369
const v4sf ln2_ln10_vec = {LN2_DIV_LN10 , LN2_DIV_LN10 , LN2_DIV_LN10 , LN2_DIV_LN10 };
370
370
371
- if (areAligned2 ((uintptr_t ) (src ), (uintptr_t ) (dst ), ALTIVEC_LEN_BYTES )) {
371
+ if (areAligned2 ((uintptr_t )(src ), (uintptr_t )(dst ), ALTIVEC_LEN_BYTES )) {
372
372
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
373
373
v4sf a = vec_ld (0 , src + i );
374
374
vec_st (vec_mul (vec_loge (a ), ln2_ln10_vec ), 0 , dst + i );
375
375
}
376
376
} else {
377
- int unalign_src = (uintptr_t ) (src ) % ALTIVEC_LEN_BYTES ;
378
- int unalign_dst = (uintptr_t ) (dst ) % ALTIVEC_LEN_BYTES ;
377
+ int unalign_src = (uintptr_t )(src ) % ALTIVEC_LEN_BYTES ;
378
+ int unalign_dst = (uintptr_t )(dst ) % ALTIVEC_LEN_BYTES ;
379
379
380
380
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
381
381
v4sf a ;
@@ -404,7 +404,7 @@ static inline void magnitude128f_split(float *srcRe, float *srcIm, float *dst, i
404
404
int stop_len = len / ALTIVEC_LEN_FLOAT ;
405
405
stop_len *= ALTIVEC_LEN_FLOAT ;
406
406
407
- if (areAligned3 ((uintptr_t ) (srcRe ), (uintptr_t ) (srcIm ), (uintptr_t ) (dst ), ALTIVEC_LEN_BYTES )) {
407
+ if (areAligned3 ((uintptr_t )(srcRe ), (uintptr_t )(srcIm ), (uintptr_t )(dst ), ALTIVEC_LEN_BYTES )) {
408
408
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
409
409
v4sf re_tmp = vec_ld (0 , srcRe + i );
410
410
v4sf re2 = vec_mul (re_tmp , re_tmp );
@@ -413,9 +413,9 @@ static inline void magnitude128f_split(float *srcRe, float *srcIm, float *dst, i
413
413
vec_st (vec_sqrt (vec_add (re2 , im2 )), 0 , dst + i );
414
414
}
415
415
} else {
416
- int unalign_srcRe = (uintptr_t ) (srcRe ) % ALTIVEC_LEN_BYTES ;
417
- int unalign_srcIm = (uintptr_t ) (srcRe ) % ALTIVEC_LEN_BYTES ;
418
- int unalign_dst = (uintptr_t ) (dst ) % ALTIVEC_LEN_BYTES ;
416
+ int unalign_srcRe = (uintptr_t )(srcRe ) % ALTIVEC_LEN_BYTES ;
417
+ int unalign_srcIm = (uintptr_t )(srcRe ) % ALTIVEC_LEN_BYTES ;
418
+ int unalign_dst = (uintptr_t )(dst ) % ALTIVEC_LEN_BYTES ;
419
419
420
420
for (int i = 0 ; i < stop_len ; i += ALTIVEC_LEN_FLOAT ) {
421
421
v4sf re_tmp , re2 , im_tmp , im2 , res ;
0 commit comments