Skip to content

Commit 2122f9d

Browse files
committed
minor corrections
1 parent 9665e5f commit 2122f9d

14 files changed

+614
-590
lines changed

simd_utils.h

+16-16
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ static inline int isAligned(uintptr_t ptr, size_t alignment)
3535
#ifndef ALWAYS_ALIGNED
3636

3737
#ifndef ARM // ARM manages disalignment in hardware
38-
if (((uintptr_t) (ptr) % alignment) == 0)
38+
if (((uintptr_t)(ptr) % alignment) == 0)
3939
return 1;
4040
return 0;
4141
#else
@@ -52,8 +52,8 @@ static inline int areAligned2(uintptr_t ptr1, uintptr_t ptr2, size_t alignment)
5252
#ifndef ALWAYS_ALIGNED
5353

5454
#ifndef ARM // ARM manages disalignment in hardware
55-
if (((uintptr_t) (ptr1) % alignment) == 0)
56-
if (((uintptr_t) (ptr2) % alignment) == 0)
55+
if (((uintptr_t)(ptr1) % alignment) == 0)
56+
if (((uintptr_t)(ptr2) % alignment) == 0)
5757
return 1;
5858
return 0;
5959
#else
@@ -70,9 +70,9 @@ static inline int areAligned3(uintptr_t ptr1, uintptr_t ptr2, uintptr_t ptr3, si
7070
#ifndef ALWAYS_ALIGNED
7171

7272
#ifndef ARM // ARM manages disalignment in hardware
73-
if (((uintptr_t) (ptr1) % alignment) == 0)
74-
if (((uintptr_t) (ptr2) % alignment) == 0)
75-
if (((uintptr_t) (ptr3) % alignment) == 0)
73+
if (((uintptr_t)(ptr1) % alignment) == 0)
74+
if (((uintptr_t)(ptr2) % alignment) == 0)
75+
if (((uintptr_t)(ptr3) % alignment) == 0)
7676
return 1;
7777
return 0;
7878
#else
@@ -496,7 +496,7 @@ static inline int posix_memalign(void **pointer, size_t len, int alignement)
496496
void *p, *p0 = malloc(len + alignement);
497497
if (!p0)
498498
return (void *) NULL;
499-
p = (void *) (((size_t) p0 + alignement) & (~((size_t) (alignement - 1))));
499+
p = (void *) (((size_t) p0 + alignement) & (~((size_t)(alignement - 1))));
500500
*((void **) p - 1) = p0;
501501

502502
*pointer = p;
@@ -508,7 +508,7 @@ static inline void *aligned_malloc(size_t len, int alignement)
508508
void *p, *p0 = malloc(len + alignement);
509509
if (!p0)
510510
return (void *) NULL;
511-
p = (void *) (((size_t) p0 + alignement) & (~((size_t) (alignement - 1))));
511+
p = (void *) (((size_t) p0 + alignement) & (~((size_t)(alignement - 1))));
512512
*((void **) p - 1) = p0;
513513
return p;
514514
}
@@ -763,31 +763,31 @@ static inline void convertFloat32ToU8_C(float *src, uint8_t *dst, int len, int r
763763
#endif
764764
for (int i = 0; i < len; i++) {
765765
float tmp = floorf(src[i] * scale_fact_mult);
766-
dst[i] = (uint8_t) (tmp > 255.0f ? 255.0f : tmp);
766+
dst[i] = (uint8_t)(tmp > 255.0f ? 255.0f : tmp);
767767
}
768768
} else if (rounding_mode == RndNear) {
769769
#ifdef OMP
770770
#pragma omp simd
771771
#endif
772772
for (int i = 0; i < len; i++) {
773773
float tmp = roundf(src[i] * scale_fact_mult);
774-
dst[i] = (uint8_t) (tmp > 255.0f ? 255.0f : tmp);
774+
dst[i] = (uint8_t)(tmp > 255.0f ? 255.0f : tmp);
775775
}
776776
} else if (rounding_mode == RndFinancial) {
777777
#ifdef OMP
778778
#pragma omp simd
779779
#endif
780780
for (int i = 0; i < len; i++) {
781781
float tmp = (roundf(src[i] * scale_fact_mult * 0.5f) / 2.0f);
782-
dst[i] = (uint8_t) (tmp > 255.0f ? 255.0f : tmp);
782+
dst[i] = (uint8_t)(tmp > 255.0f ? 255.0f : tmp);
783783
}
784784
} else {
785785
#ifdef OMP
786786
#pragma omp simd
787787
#endif
788788
for (int i = 0; i < len; i++) {
789789
float tmp = src[i] * scale_fact_mult;
790-
dst[i] = (uint8_t) (tmp > 255.0f ? 255.0f : tmp);
790+
dst[i] = (uint8_t)(tmp > 255.0f ? 255.0f : tmp);
791791
}
792792
}
793793
}
@@ -804,7 +804,7 @@ static inline void convertFloat32ToI16_C(float *src, int16_t *dst, int len, int
804804
#endif
805805
for (int i = 0; i < len; i++) {
806806
float tmp = (roundf(src[i] * scale_fact_mult * 0.5f) / 2.0f);
807-
dst[i] = (int16_t) (tmp > 32767.0f ? 32767.0f : tmp); // round to nearest even with round(x/2)*2
807+
dst[i] = (int16_t)(tmp > 32767.0f ? 32767.0f : tmp); // round to nearest even with round(x/2)*2
808808
}
809809
} else {
810810
if (rounding_mode == RndZero) {
@@ -819,7 +819,7 @@ static inline void convertFloat32ToI16_C(float *src, int16_t *dst, int len, int
819819
#endif
820820
for (int i = 0; i < len; i++) {
821821
float tmp = nearbyintf(src[i] * scale_fact_mult);
822-
dst[i] = (int16_t) (tmp > 32767.0f ? 32767.0f : tmp);
822+
dst[i] = (int16_t)(tmp > 32767.0f ? 32767.0f : tmp);
823823
}
824824
}
825825
}
@@ -835,7 +835,7 @@ static inline void convertFloat32ToU16_C(float *src, uint16_t *dst, int len, int
835835
#endif
836836
for (int i = 0; i < len; i++) {
837837
float tmp = (roundf(src[i] * scale_fact_mult * 0.5f) / 2.0f);
838-
dst[i] = (uint16_t) (tmp > 65535.0f ? 65535.0f : tmp); // round to nearest even with round(x/2)*2
838+
dst[i] = (uint16_t)(tmp > 65535.0f ? 65535.0f : tmp); // round to nearest even with round(x/2)*2
839839
}
840840
} else {
841841
if (rounding_mode == RndZero) {
@@ -850,7 +850,7 @@ static inline void convertFloat32ToU16_C(float *src, uint16_t *dst, int len, int
850850
#endif
851851
for (int i = 0; i < len; i++) {
852852
float tmp = nearbyintf(src[i] * scale_fact_mult);
853-
dst[i] = (uint16_t) (tmp > 65535.0f ? 65535.0f : tmp);
853+
dst[i] = (uint16_t)(tmp > 65535.0f ? 65535.0f : tmp);
854854
}
855855
}
856856
}

simd_utils_altivec_float.h

+29-29
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,14 @@ static inline void set128f(float *dst, float value, int len)
107107
int stop_len = len / ALTIVEC_LEN_FLOAT;
108108
stop_len *= ALTIVEC_LEN_FLOAT;
109109

110-
if (isAligned((uintptr_t) (dst), ALTIVEC_LEN_BYTES)) {
110+
if (isAligned((uintptr_t)(dst), ALTIVEC_LEN_BYTES)) {
111111
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
112112
vec_st(tmp, 0, dst + i);
113113
}
114114
} else {
115-
int unaligned_float = (uintptr_t) (dst) % ALTIVEC_LEN_FLOAT; // could this happen though?
116-
if (unaligned_float == 0) { // dst is not aligned on 16bytes boundary but is at least aligned on float
117-
int unaligned_elts = ((uintptr_t) (dst) % ALTIVEC_LEN_BYTES) / sizeof(float);
115+
int unaligned_float = (uintptr_t)(dst) % ALTIVEC_LEN_FLOAT; // could this happen though?
116+
if (unaligned_float == 0) { // dst is not aligned on 16bytes boundary but is at least aligned on float
117+
int unaligned_elts = ((uintptr_t)(dst) % ALTIVEC_LEN_BYTES) / sizeof(float);
118118
for (int i = 0; i < unaligned_elts; i++) {
119119
dst[i] = value;
120120
}
@@ -141,16 +141,16 @@ static inline void mul128f(float *src1, float *src2, float *dst, int len)
141141
int stop_len = len / ALTIVEC_LEN_FLOAT;
142142
stop_len *= ALTIVEC_LEN_FLOAT;
143143

144-
if (areAligned3((uintptr_t) (src1), (uintptr_t) (src2), (uintptr_t) (dst), ALTIVEC_LEN_BYTES)) {
144+
if (areAligned3((uintptr_t)(src1), (uintptr_t)(src2), (uintptr_t)(dst), ALTIVEC_LEN_BYTES)) {
145145
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
146146
v4sf a = vec_ld(0, src1 + i);
147147
v4sf b = vec_ld(0, src2 + i);
148148
vec_st(vec_mul(a, b), 0, dst + i);
149149
}
150150
} else {
151-
int unalign_src1 = (uintptr_t) (src1) % ALTIVEC_LEN_BYTES;
152-
int unalign_src2 = (uintptr_t) (src2) % ALTIVEC_LEN_BYTES;
153-
int unalign_dst = (uintptr_t) (dst) % ALTIVEC_LEN_BYTES;
151+
int unalign_src1 = (uintptr_t)(src1) % ALTIVEC_LEN_BYTES;
152+
int unalign_src2 = (uintptr_t)(src2) % ALTIVEC_LEN_BYTES;
153+
int unalign_dst = (uintptr_t)(dst) % ALTIVEC_LEN_BYTES;
154154

155155
/*To be improved : we constantly use unaligned load or store of those data
156156
There exist better unaligned stream load or store which could improve performance
@@ -188,16 +188,16 @@ static inline void minevery128f(float *src1, float *src2, float *dst, int len)
188188
int stop_len = len / ALTIVEC_LEN_FLOAT;
189189
stop_len *= ALTIVEC_LEN_FLOAT;
190190

191-
if (areAligned3((uintptr_t) (src1), (uintptr_t) (src2), (uintptr_t) (dst), ALTIVEC_LEN_BYTES)) {
191+
if (areAligned3((uintptr_t)(src1), (uintptr_t)(src2), (uintptr_t)(dst), ALTIVEC_LEN_BYTES)) {
192192
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
193193
v4sf a = vec_ld(0, src1 + i);
194194
v4sf b = vec_ld(0, src2 + i);
195195
vec_st(vec_min(a, b), 0, dst + i);
196196
}
197197
} else {
198-
int unalign_src1 = (uintptr_t) (src1) % ALTIVEC_LEN_BYTES;
199-
int unalign_src2 = (uintptr_t) (src2) % ALTIVEC_LEN_BYTES;
200-
int unalign_dst = (uintptr_t) (dst) % ALTIVEC_LEN_BYTES;
198+
int unalign_src1 = (uintptr_t)(src1) % ALTIVEC_LEN_BYTES;
199+
int unalign_src2 = (uintptr_t)(src2) % ALTIVEC_LEN_BYTES;
200+
int unalign_dst = (uintptr_t)(dst) % ALTIVEC_LEN_BYTES;
201201

202202
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
203203
v4sf a, b;
@@ -236,7 +236,7 @@ static inline void cplxtoreal128f(float *src, float *dstRe, float *dstIm, int le
236236
const v16u8 re_mask = {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27};
237237
const v16u8 im_mask = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31};
238238

239-
if (areAligned3((uintptr_t) (src), (uintptr_t) (dstRe), (uintptr_t) (dstIm), ALTIVEC_LEN_BYTES)) {
239+
if (areAligned3((uintptr_t)(src), (uintptr_t)(dstRe), (uintptr_t)(dstIm), ALTIVEC_LEN_BYTES)) {
240240
for (int i = 0; i < stop_len; i += 2 * ALTIVEC_LEN_FLOAT) {
241241
v4sf vec1 = vec_ld(0, src + i);
242242
v4sf vec2 = vec_ld(0, src + i + ALTIVEC_LEN_FLOAT);
@@ -247,9 +247,9 @@ static inline void cplxtoreal128f(float *src, float *dstRe, float *dstIm, int le
247247
j += ALTIVEC_LEN_FLOAT;
248248
}
249249
} else {
250-
int unalign_src = (uintptr_t) (src) % ALTIVEC_LEN_BYTES;
251-
int unalign_dstRe = (uintptr_t) (dstRe) % ALTIVEC_LEN_BYTES;
252-
int unalign_dstIm = (uintptr_t) (dstIm) % ALTIVEC_LEN_BYTES;
250+
int unalign_src = (uintptr_t)(src) % ALTIVEC_LEN_BYTES;
251+
int unalign_dstRe = (uintptr_t)(dstRe) % ALTIVEC_LEN_BYTES;
252+
int unalign_dstIm = (uintptr_t)(dstIm) % ALTIVEC_LEN_BYTES;
253253

254254
for (int i = 0; i < stop_len; i += 2 * ALTIVEC_LEN_FLOAT) {
255255
v4sf vec1, vec2;
@@ -292,14 +292,14 @@ static inline void log2_128f(float *src, float *dst, int len)
292292
int stop_len = len / ALTIVEC_LEN_FLOAT;
293293
stop_len *= ALTIVEC_LEN_FLOAT;
294294

295-
if (areAligned2((uintptr_t) (src), (uintptr_t) (dst), ALTIVEC_LEN_BYTES)) {
295+
if (areAligned2((uintptr_t)(src), (uintptr_t)(dst), ALTIVEC_LEN_BYTES)) {
296296
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
297297
v4sf a = vec_ld(0, src + i);
298298
vec_st(vec_loge(a), 0, dst + i);
299299
}
300300
} else {
301-
int unalign_src = (uintptr_t) (src) % ALTIVEC_LEN_BYTES;
302-
int unalign_dst = (uintptr_t) (dst) % ALTIVEC_LEN_BYTES;
301+
int unalign_src = (uintptr_t)(src) % ALTIVEC_LEN_BYTES;
302+
int unalign_dst = (uintptr_t)(dst) % ALTIVEC_LEN_BYTES;
303303

304304
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
305305
v4sf a;
@@ -330,14 +330,14 @@ static inline void ln_128f(float *src, float *dst, int len)
330330

331331
const v4sf ln2_vec = {LN2, LN2, LN2, LN2};
332332

333-
if (areAligned2((uintptr_t) (src), (uintptr_t) (dst), ALTIVEC_LEN_BYTES)) {
333+
if (areAligned2((uintptr_t)(src), (uintptr_t)(dst), ALTIVEC_LEN_BYTES)) {
334334
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
335335
v4sf a = vec_ld(0, src + i);
336336
vec_st(vec_mul(vec_loge(a), ln2_vec), 0, dst + i);
337337
}
338338
} else {
339-
int unalign_src = (uintptr_t) (src) % ALTIVEC_LEN_BYTES;
340-
int unalign_dst = (uintptr_t) (dst) % ALTIVEC_LEN_BYTES;
339+
int unalign_src = (uintptr_t)(src) % ALTIVEC_LEN_BYTES;
340+
int unalign_dst = (uintptr_t)(dst) % ALTIVEC_LEN_BYTES;
341341

342342
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
343343
v4sf a;
@@ -368,14 +368,14 @@ static inline void log10_128f(float *src, float *dst, int len)
368368

369369
const v4sf ln2_ln10_vec = {LN2_DIV_LN10, LN2_DIV_LN10, LN2_DIV_LN10, LN2_DIV_LN10};
370370

371-
if (areAligned2((uintptr_t) (src), (uintptr_t) (dst), ALTIVEC_LEN_BYTES)) {
371+
if (areAligned2((uintptr_t)(src), (uintptr_t)(dst), ALTIVEC_LEN_BYTES)) {
372372
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
373373
v4sf a = vec_ld(0, src + i);
374374
vec_st(vec_mul(vec_loge(a), ln2_ln10_vec), 0, dst + i);
375375
}
376376
} else {
377-
int unalign_src = (uintptr_t) (src) % ALTIVEC_LEN_BYTES;
378-
int unalign_dst = (uintptr_t) (dst) % ALTIVEC_LEN_BYTES;
377+
int unalign_src = (uintptr_t)(src) % ALTIVEC_LEN_BYTES;
378+
int unalign_dst = (uintptr_t)(dst) % ALTIVEC_LEN_BYTES;
379379

380380
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
381381
v4sf a;
@@ -404,7 +404,7 @@ static inline void magnitude128f_split(float *srcRe, float *srcIm, float *dst, i
404404
int stop_len = len / ALTIVEC_LEN_FLOAT;
405405
stop_len *= ALTIVEC_LEN_FLOAT;
406406

407-
if (areAligned3((uintptr_t) (srcRe), (uintptr_t) (srcIm), (uintptr_t) (dst), ALTIVEC_LEN_BYTES)) {
407+
if (areAligned3((uintptr_t)(srcRe), (uintptr_t)(srcIm), (uintptr_t)(dst), ALTIVEC_LEN_BYTES)) {
408408
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
409409
v4sf re_tmp = vec_ld(0, srcRe + i);
410410
v4sf re2 = vec_mul(re_tmp, re_tmp);
@@ -413,9 +413,9 @@ static inline void magnitude128f_split(float *srcRe, float *srcIm, float *dst, i
413413
vec_st(vec_sqrt(vec_add(re2, im2)), 0, dst + i);
414414
}
415415
} else {
416-
int unalign_srcRe = (uintptr_t) (srcRe) % ALTIVEC_LEN_BYTES;
417-
int unalign_srcIm = (uintptr_t) (srcRe) % ALTIVEC_LEN_BYTES;
418-
int unalign_dst = (uintptr_t) (dst) % ALTIVEC_LEN_BYTES;
416+
int unalign_srcRe = (uintptr_t)(srcRe) % ALTIVEC_LEN_BYTES;
417+
int unalign_srcIm = (uintptr_t)(srcRe) % ALTIVEC_LEN_BYTES;
418+
int unalign_dst = (uintptr_t)(dst) % ALTIVEC_LEN_BYTES;
419419

420420
for (int i = 0; i < stop_len; i += ALTIVEC_LEN_FLOAT) {
421421
v4sf re_tmp, re2, im_tmp, im2, res;

0 commit comments

Comments
 (0)