@@ -25,19 +25,28 @@ namespace kernel
2525 // Utils
2626
2727 static const int THREADS = 256 ;
28- #define UINTMAXFLOAT 4294967296 .0f
29- #define UINTLMAXDOUBLE (4294967296.0 *4294967296.0 )
3028 #define PI_VAL 3.1415926535897932384626433832795028841971693993751058209749445923078164
3129
30+ // Conversion to floats adapted from Random123
31+ #define UINTMAX 0xffffffff
32+ #define FLT_FACTOR ((1 .0f )/(UINTMAX + (1 .0f )))
33+ #define HALF_FLT_FACTOR ((0 .5f )*FLT_FACTOR)
34+
35+ #define UINTLMAX 0xffffffffffffffff
36+ #define DBL_FACTOR ((1.0 )/(UINTLMAX + (1.0 )))
37+ #define HALF_DBL_FACTOR ((0.5 )*DBL_FACTOR)
38+
39+ // Generates rationals in (0, 1]
3240 __device__ static float getFloat (const uint &num)
3341 {
34- return float (num)/UINTMAXFLOAT ;
42+ return (num*FLT_FACTOR + HALF_FLT_FACTOR) ;
3543 }
3644
45+ // Generates rationals in (0, 1]
3746 __device__ static double getDouble (const uint &num1, const uint &num2)
3847 {
3948 uintl num = (((uintl)num1)<<32 ) | ((uintl)num2);
40- return double (num)/UINTLMAXDOUBLE ;
49+ return (num*DBL_FACTOR + HALF_DBL_FACTOR) ;
4150 }
4251
4352 template <typename T>
@@ -150,33 +159,33 @@ namespace kernel
150159 __device__ static void writeOut128Bytes (float *out, const uint &index,
151160 const uint &r1, const uint &r2, const uint &r3, const uint &r4)
152161 {
153- out[index] = getFloat (r1);
154- out[index + blockDim.x ] = getFloat (r2);
155- out[index + 2 *blockDim.x ] = getFloat (r3);
156- out[index + 3 *blockDim.x ] = getFloat (r4);
162+ out[index] = 1 . f - getFloat (r1);
163+ out[index + blockDim.x ] = 1 . f - getFloat (r2);
164+ out[index + 2 *blockDim.x ] = 1 . f - getFloat (r3);
165+ out[index + 3 *blockDim.x ] = 1 . f - getFloat (r4);
157166 }
158167
159168 __device__ static void writeOut128Bytes (cfloat *out, const uint &index,
160169 const uint &r1, const uint &r2, const uint &r3, const uint &r4)
161170 {
162- out[index].x = getFloat (r1);
163- out[index].y = getFloat (r2);
164- out[index + blockDim.x ].x = getFloat (r3);
165- out[index + blockDim.x ].y = getFloat (r4);
171+ out[index].x = 1 . f - getFloat (r1);
172+ out[index].y = 1 . f - getFloat (r2);
173+ out[index + blockDim.x ].x = 1 . f - getFloat (r3);
174+ out[index + blockDim.x ].y = 1 . f - getFloat (r4);
166175 }
167176
168177 __device__ static void writeOut128Bytes (double *out, const uint &index,
169178 const uint &r1, const uint &r2, const uint &r3, const uint &r4)
170179 {
171- out[index] = getDouble (r1, r2);
172- out[index + blockDim.x ] = getDouble (r3, r4);
180+ out[index] = 1.0 - getDouble (r1, r2);
181+ out[index + blockDim.x ] = 1.0 - getDouble (r3, r4);
173182 }
174183
175184 __device__ static void writeOut128Bytes (cdouble *out, const uint &index,
176185 const uint &r1, const uint &r2, const uint &r3, const uint &r4)
177186 {
178- out[index].x = getDouble (r1, r2);
179- out[index].y = getDouble (r3, r4);
187+ out[index].x = 1.0 - getDouble (r1, r2);
188+ out[index].y = 1.0 - getDouble (r3, r4);
180189 }
181190
182191 // Normalized writes without boundary checking
@@ -305,38 +314,38 @@ namespace kernel
305314 __device__ static void partialWriteOut128Bytes (float *out, const uint &index,
306315 const uint &r1, const uint &r2, const uint &r3, const uint &r4, const uint &elements)
307316 {
308- if (index < elements) {out[index] = getFloat (r1);}
309- if (index + blockDim.x < elements) {out[index + blockDim.x ] = getFloat (r2);}
310- if (index + 2 *blockDim.x < elements) {out[index + 2 *blockDim.x ] = getFloat (r3);}
311- if (index + 3 *blockDim.x < elements) {out[index + 3 *blockDim.x ] = getFloat (r4);}
317+ if (index < elements) {out[index] = 1 . f - getFloat (r1);}
318+ if (index + blockDim.x < elements) {out[index + blockDim.x ] = 1 . f - getFloat (r2);}
319+ if (index + 2 *blockDim.x < elements) {out[index + 2 *blockDim.x ] = 1 . f - getFloat (r3);}
320+ if (index + 3 *blockDim.x < elements) {out[index + 3 *blockDim.x ] = 1 . f - getFloat (r4);}
312321 }
313322
314323 __device__ static void partialWriteOut128Bytes (cfloat *out, const uint &index,
315324 const uint &r1, const uint &r2, const uint &r3, const uint &r4, const uint &elements)
316325 {
317326 if (index < elements) {
318- out[index].x = getFloat (r1);
319- out[index].y = getFloat (r2);
327+ out[index].x = 1 . f - getFloat (r1);
328+ out[index].y = 1 . f - getFloat (r2);
320329 }
321330 if (index + blockDim.x < elements) {
322- out[index + blockDim.x ].x = getFloat (r3);
323- out[index + blockDim.x ].y = getFloat (r4);
331+ out[index + blockDim.x ].x = 1 . f - getFloat (r3);
332+ out[index + blockDim.x ].y = 1 . f - getFloat (r4);
324333 }
325334 }
326335
327336 __device__ static void partialWriteOut128Bytes (double *out, const uint &index,
328337 const uint &r1, const uint &r2, const uint &r3, const uint &r4, const uint &elements)
329338 {
330- if (index < elements) {out[index] = getDouble (r1, r2);}
331- if (index + blockDim.x < elements) {out[index + blockDim.x ] = getDouble (r3, r4);}
339+ if (index < elements) {out[index] = 1.0 - getDouble (r1, r2);}
340+ if (index + blockDim.x < elements) {out[index + blockDim.x ] = 1.0 - getDouble (r3, r4);}
332341 }
333342
334343 __device__ static void partialWriteOut128Bytes (cdouble *out, const uint &index,
335344 const uint &r1, const uint &r2, const uint &r3, const uint &r4, const uint &elements)
336345 {
337346 if (index < elements) {
338- out[index].x = getDouble (r1, r2);
339- out[index].y = getDouble (r3, r4);
347+ out[index].x = 1.0 - getDouble (r1, r2);
348+ out[index].y = 1.0 - getDouble (r3, r4);
340349 }
341350 }
342351
0 commit comments