Skip to content

Commit e59eb93

Browse files
Kumar Aatishumar456
authored andcommitted
Fixed randu and randn ranges for floating types
Randu now generates random values in range [0, 1) for floating types. Randn avoids Infs by making sure the Box-Muller transform is not given 0 as an input.
1 parent eec4e70 commit e59eb93

File tree

3 files changed

+79
-50
lines changed

3 files changed

+79
-50
lines changed

src/backend/cpu/kernel/random_engine.hpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,17 @@ namespace cpu
2020
namespace kernel
2121
{
2222
//Utils
23-
static const float UINTMAXFLOAT = 4294967296.0f;
24-
static const float UINTLMAXDOUBLE = (4294967296.0*4294967296.0);
2523
static const double PI_VAL = 3.1415926535897932384626433832795028841971693993751058209749445923078164;
2624

25+
//Conversion to floats adapted from Random123
26+
#define UINTMAX 0xffffffff
27+
#define FLT_FACTOR ((1.0f)/(UINTMAX + (1.0f)))
28+
#define HALF_FLT_FACTOR ((0.5f)*FLT_FACTOR)
29+
30+
#define UINTLMAX 0xffffffffffffffff
31+
#define DBL_FACTOR ((1.0)/(UINTLMAX + (1.0)))
32+
#define HALF_DBL_FACTOR ((0.5)*DBL_FACTOR)
33+
2734
template <typename T>
2835
T transform(uint *val, int index)
2936
{
@@ -76,15 +83,17 @@ namespace kernel
7683
return transform<uintl>(val, index);
7784
}
7885

86+
//Generates rationals in [0, 1)
7987
template <> float transform<float>(uint *val, int index)
8088
{
81-
return (float)val[index]/UINTMAXFLOAT;
89+
return 1.f - (val[index]*FLT_FACTOR + HALF_FLT_FACTOR);
8290
}
8391

92+
//Generates rationals in [0, 1)
8493
template <> double transform<double>(uint *val, int index)
8594
{
8695
uintl v = transform<uintl>(val, index);
87-
return (double)v/UINTLMAXDOUBLE;
96+
return 1.0 - (v*DBL_FACTOR + HALF_DBL_FACTOR);
8897
}
8998

9099
template <typename T>
@@ -131,8 +140,8 @@ namespace kernel
131140
/*
132141
* The log of a real value x where 0 < x < 1 is negative.
133142
*/
134-
T r = sqrt((T)(-2.0) * log(r1));
135-
T theta = 2 * (T)PI_VAL * (r2);
143+
T r = sqrt((T)(-2.0) * log((T)(1.0) - r1));
144+
T theta = 2 * (T)PI_VAL * ((T)(1.0) - r2);
136145
*out1 = r*sin(theta);
137146
*out2 = r*cos(theta);
138147
}

src/backend/cuda/kernel/random_engine.hpp

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,28 @@ namespace kernel
2525
//Utils
2626

2727
static const int THREADS = 256;
28-
#define UINTMAXFLOAT 4294967296.0f
29-
#define UINTLMAXDOUBLE (4294967296.0*4294967296.0)
3028
#define PI_VAL 3.1415926535897932384626433832795028841971693993751058209749445923078164
3129

30+
//Conversion to floats adapted from Random123
31+
#define UINTMAX 0xffffffff
32+
#define FLT_FACTOR ((1.0f)/(UINTMAX + (1.0f)))
33+
#define HALF_FLT_FACTOR ((0.5f)*FLT_FACTOR)
34+
35+
#define UINTLMAX 0xffffffffffffffff
36+
#define DBL_FACTOR ((1.0)/(UINTLMAX + (1.0)))
37+
#define HALF_DBL_FACTOR ((0.5)*DBL_FACTOR)
38+
39+
//Generates rationals in (0, 1]
3240
__device__ static float getFloat(const uint &num)
3341
{
34-
return float(num)/UINTMAXFLOAT;
42+
return (num*FLT_FACTOR + HALF_FLT_FACTOR);
3543
}
3644

45+
//Generates rationals in (0, 1]
3746
__device__ static double getDouble(const uint &num1, const uint &num2)
3847
{
3948
uintl num = (((uintl)num1)<<32) | ((uintl)num2);
40-
return double(num)/UINTLMAXDOUBLE;
49+
return (num*DBL_FACTOR + HALF_DBL_FACTOR);
4150
}
4251

4352
template <typename T>
@@ -150,33 +159,33 @@ namespace kernel
150159
__device__ static void writeOut128Bytes(float *out, const uint &index,
151160
const uint &r1, const uint &r2, const uint &r3, const uint &r4)
152161
{
153-
out[index] = getFloat(r1);
154-
out[index + blockDim.x] = getFloat(r2);
155-
out[index + 2*blockDim.x] = getFloat(r3);
156-
out[index + 3*blockDim.x] = getFloat(r4);
162+
out[index] = 1.f - getFloat(r1);
163+
out[index + blockDim.x] = 1.f - getFloat(r2);
164+
out[index + 2*blockDim.x] = 1.f - getFloat(r3);
165+
out[index + 3*blockDim.x] = 1.f - getFloat(r4);
157166
}
158167

159168
__device__ static void writeOut128Bytes(cfloat *out, const uint &index,
160169
const uint &r1, const uint &r2, const uint &r3, const uint &r4)
161170
{
162-
out[index].x = getFloat(r1);
163-
out[index].y = getFloat(r2);
164-
out[index + blockDim.x].x = getFloat(r3);
165-
out[index + blockDim.x].y = getFloat(r4);
171+
out[index].x = 1.f - getFloat(r1);
172+
out[index].y = 1.f - getFloat(r2);
173+
out[index + blockDim.x].x = 1.f - getFloat(r3);
174+
out[index + blockDim.x].y = 1.f - getFloat(r4);
166175
}
167176

168177
__device__ static void writeOut128Bytes(double *out, const uint &index,
169178
const uint &r1, const uint &r2, const uint &r3, const uint &r4)
170179
{
171-
out[index] = getDouble(r1, r2);
172-
out[index + blockDim.x] = getDouble(r3, r4);
180+
out[index] = 1.0 - getDouble(r1, r2);
181+
out[index + blockDim.x] = 1.0 - getDouble(r3, r4);
173182
}
174183

175184
__device__ static void writeOut128Bytes(cdouble *out, const uint &index,
176185
const uint &r1, const uint &r2, const uint &r3, const uint &r4)
177186
{
178-
out[index].x = getDouble(r1, r2);
179-
out[index].y = getDouble(r3, r4);
187+
out[index].x = 1.0 - getDouble(r1, r2);
188+
out[index].y = 1.0 - getDouble(r3, r4);
180189
}
181190

182191
//Normalized writes without boundary checking
@@ -305,38 +314,38 @@ namespace kernel
305314
__device__ static void partialWriteOut128Bytes(float *out, const uint &index,
306315
const uint &r1, const uint &r2, const uint &r3, const uint &r4, const uint &elements)
307316
{
308-
if (index < elements) {out[index] = getFloat(r1);}
309-
if (index + blockDim.x < elements) {out[index + blockDim.x] = getFloat(r2);}
310-
if (index + 2*blockDim.x < elements) {out[index + 2*blockDim.x] = getFloat(r3);}
311-
if (index + 3*blockDim.x < elements) {out[index + 3*blockDim.x] = getFloat(r4);}
317+
if (index < elements) {out[index] = 1.f - getFloat(r1);}
318+
if (index + blockDim.x < elements) {out[index + blockDim.x] = 1.f - getFloat(r2);}
319+
if (index + 2*blockDim.x < elements) {out[index + 2*blockDim.x] = 1.f - getFloat(r3);}
320+
if (index + 3*blockDim.x < elements) {out[index + 3*blockDim.x] = 1.f - getFloat(r4);}
312321
}
313322

314323
__device__ static void partialWriteOut128Bytes(cfloat *out, const uint &index,
315324
const uint &r1, const uint &r2, const uint &r3, const uint &r4, const uint &elements)
316325
{
317326
if (index < elements) {
318-
out[index].x = getFloat(r1);
319-
out[index].y = getFloat(r2);
327+
out[index].x = 1.f - getFloat(r1);
328+
out[index].y = 1.f - getFloat(r2);
320329
}
321330
if (index + blockDim.x < elements) {
322-
out[index + blockDim.x].x = getFloat(r3);
323-
out[index + blockDim.x].y = getFloat(r4);
331+
out[index + blockDim.x].x = 1.f - getFloat(r3);
332+
out[index + blockDim.x].y = 1.f - getFloat(r4);
324333
}
325334
}
326335

327336
__device__ static void partialWriteOut128Bytes(double *out, const uint &index,
328337
const uint &r1, const uint &r2, const uint &r3, const uint &r4, const uint &elements)
329338
{
330-
if (index < elements) {out[index] = getDouble(r1, r2);}
331-
if (index + blockDim.x < elements) {out[index + blockDim.x] = getDouble(r3, r4);}
339+
if (index < elements) {out[index] = 1.0 - getDouble(r1, r2);}
340+
if (index + blockDim.x < elements) {out[index + blockDim.x] = 1.0 - getDouble(r3, r4);}
332341
}
333342

334343
__device__ static void partialWriteOut128Bytes(cdouble *out, const uint &index,
335344
const uint &r1, const uint &r2, const uint &r3, const uint &r4, const uint &elements)
336345
{
337346
if (index < elements) {
338-
out[index].x = getDouble(r1, r2);
339-
out[index].y = getDouble(r3, r4);
347+
out[index].x = 1.0 - getDouble(r1, r2);
348+
out[index].y = 1.0 - getDouble(r3, r4);
340349
}
341350
}
342351

src/backend/opencl/kernel/random_engine_write.cl

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,17 @@
77
* http://arrayfire.com/licenses/BSD-3-Clause
88
********************************************************/
99

10-
#define UINTMAXFLOAT 4294967296.0f
11-
#define UINTLMAXDOUBLE (4294967296.0*4294967296.0)
1210
#define PI_VAL 3.1415926535897932384626433832795028841971693993751058209749445923078164
1311

12+
//Conversion to floats adapted from Random123
13+
#define UINTMAX 0xffffffff
14+
#define FLT_FACTOR ((1.0f)/(UINTMAX + (1.0f)))
15+
#define HALF_FLT_FACTOR ((0.5f)*FLT_FACTOR)
16+
17+
//Generates rationals in (0, 1]
1418
float getFloat(const uint * const num)
1519
{
16-
return ((float)(*num))/UINTMAXFLOAT;
20+
return ((*num)*FLT_FACTOR + HALF_FLT_FACTOR);
1721
}
1822

1923
//Writes without boundary checking
@@ -129,10 +133,10 @@ void writeOut128Bytes_ulong(__global ulong *out, const uint * const index,
129133
void writeOut128Bytes_float(__global float *out, const uint * const index,
130134
const uint * const r1, const uint * const r2, const uint * const r3, const uint * const r4)
131135
{
132-
out[*index] = getFloat(r1);
133-
out[*index + THREADS] = getFloat(r2);
134-
out[*index + 2*THREADS] = getFloat(r3);
135-
out[*index + 3*THREADS] = getFloat(r4);
136+
out[*index] = 1.f - getFloat(r1);
137+
out[*index + THREADS] = 1.f - getFloat(r2);
138+
out[*index + 2*THREADS] = 1.f - getFloat(r3);
139+
out[*index + 3*THREADS] = 1.f - getFloat(r4);
136140
}
137141

138142

@@ -252,10 +256,10 @@ void partialWriteOut128Bytes_ulong(__global ulong *out, const uint * const index
252256
void partialWriteOut128Bytes_float(__global float *out, const uint * const index,
253257
const uint * const r1, const uint * const r2, const uint * const r3, const uint * const r4, const uint * const elements)
254258
{
255-
if (*index < *elements) {out[*index] = getFloat(r1);}
256-
if (*index + THREADS < *elements) {out[*index + THREADS] = getFloat(r2);}
257-
if (*index + 2*THREADS < *elements) {out[*index + 2*THREADS] = getFloat(r3);}
258-
if (*index + 3*THREADS < *elements) {out[*index + 3*THREADS] = getFloat(r4);}
259+
if (*index < *elements) {out[*index] = 1.f - getFloat(r1);}
260+
if (*index + THREADS < *elements) {out[*index + THREADS] = 1.f - getFloat(r2);}
261+
if (*index + 2*THREADS < *elements) {out[*index + 2*THREADS] = 1.f - getFloat(r3);}
262+
if (*index + 3*THREADS < *elements) {out[*index + 3*THREADS] = 1.f - getFloat(r4);}
259263
}
260264

261265
#if RAND_DIST == 1
@@ -302,24 +306,31 @@ void partialBoxMullerWriteOut128Bytes_float(__global float *out, const uint * co
302306
#endif
303307

304308
#ifdef USE_DOUBLE
309+
310+
//Conversion to floats adapted from Random123
311+
#define UINTLMAX 0xffffffffffffffff
312+
#define DBL_FACTOR ((1.0)/(UINTLMAX + (1.0)))
313+
#define HALF_DBL_FACTOR ((0.5)*DBL_FACTOR)
314+
315+
//Generates rationals in (0, 1]
305316
double getDouble(const uint * const num1, const uint * const num2)
306317
{
307318
ulong num = (((ulong)*num1)<<32) | ((ulong)*num2);
308-
return ((double)num)/UINTLMAXDOUBLE;
319+
return (num*DBL_FACTOR + HALF_DBL_FACTOR);
309320
}
310321

311322
void writeOut128Bytes_double(__global double *out, const uint * const index,
312323
const uint * const r1, const uint * const r2, const uint * const r3, const uint * const r4)
313324
{
314-
out[*index] = getDouble(r1, r2);
315-
out[*index + THREADS] = getDouble(r3, r4);
325+
out[*index] = 1.0 - getDouble(r1, r2);
326+
out[*index + THREADS] = 1.0 - getDouble(r3, r4);
316327
}
317328

318329
void partialWriteOut128Bytes_double(__global double *out, const uint * const index,
319330
const uint * const r1, const uint * const r2, const uint * const r3, const uint * const r4, const uint * const elements)
320331
{
321-
if (*index < *elements) {out[*index] = getDouble(r1, r2);}
322-
if (*index + THREADS < *elements) {out[*index + THREADS] = getDouble(r3, r4);}
332+
if (*index < *elements) {out[*index] = 1.0 - getDouble(r1, r2);}
333+
if (*index + THREADS < *elements) {out[*index + THREADS] = 1.0 - getDouble(r3, r4);}
323334
}
324335

325336
#if RAND_DIST == 1

0 commit comments

Comments
 (0)