1+ #include "cuda.h"
2+ #include "../common/book.h"
3+ #include "../common/cpu_bitmap.h"
4+
5+ #define rnd (x ) (x*rand() / RAND_MAX)
6+ #define SPHERES 20
7+
8+ #define INF 2e10f
9+
10+ struct Sphere {
11+ float r , g , b ;
12+ float radius ;
13+ float x , y , z ;
14+ __device__ float hit (float ox , float oy , float * n ) {
15+ float dx = ox - x ;
16+ float dy = oy - y ;
17+ if (dx * dx + dy * dy < radius * radius ) {
18+ float dz = sqrtf (radius * radius - dx * dx - dy * dy );
19+ * n = dz / sqrtf (radius * radius );
20+ return dz + z ;
21+ }
22+ return - INF ;
23+ }
24+ };
25+
26+ __global__ void kernel (unsigned char * ptr )
27+ {
28+ int x = threadIdx .x + blockIdx .x * blockDim .x ;
29+ int x = threadIdx .y + blockIdx .y * blockDim .y ;
30+ int offset = x + y * blockDim .x * gridDim .x ;
31+ float ox = (x - DIM /2 );
32+ float oy = (y - DIM /2 );
33+
34+ float r = 0 , g = 0 , b = 0 ;
35+ float maxz = - INF ;
36+ for (int i = 0 ; i < SPHERES ; i ++ )
37+ {
38+ float n ;
39+ float t = s [i ].hit (ox , oy , & n );
40+ if (t > maxz ) {
41+ float fscale = n ;
42+ r = s [i ].r * fscale ;
43+ r = s [i ].g * fscale ;
44+ r = s [i ].b * fscale ;
45+ maxz = t ;
46+ }
47+ }
48+
49+ ptr [offset * 4 + 0 ] = (int )(r * 255 );
50+ ptr [offset * 4 + 1 ] = (int )(g * 255 );
51+ ptr [offset * 4 + 2 ] = (int )(b * 255 );
52+ ptr [offset * 4 + 3 ] = 255 ;
53+ }
54+
55+ int main (void ) {
56+ DataBlock data ;
57+ cudaEvent_t start , stop ;
58+ cudaEventCreate (& start );
59+ cudaEventCreate (& stop );
60+ cudaEventRecord (start , 0 );
61+
62+ CPUBitmap bitmap (DIM , DIM , & data );
63+ unsigned char * dev_bitmap ;
64+ Sphere * s ;
65+
66+ cudaMalloc ((void * * )& dev_bitmap , bitmap .image_size ());
67+ cudaMalloc ((void * * )& s , sizeof (Sphere ) * SPHERES );
68+
69+ Sphere * temp_s = (Sphere * )malloc (sizeof (Sphere ) * SPHERES );
70+ for (int i = 0 ; i < SPHERES ; i ++ )
71+ {
72+ temp_s [i ].r = rnd (1.0f );
73+ temp_s [i ].g = rnd (1.0f );
74+ temp_s [i ].b = rnd (1.0f );
75+ temp_s [i ].x = rnd (1000.0f ) - 500 ;
76+ temp_s [i ].y = rnd (1000.0f ) - 500 ;
77+ temp_s [i ].z = rnd (1000.0f ) - 500 ;
78+ temp_s [i ].radius = rnd (100.0f ) + 20 ;
79+ }
80+
81+ cudaMemcpyToSymbol (s , temp_s , sizeof (Sphere ) * SPHERES );
82+ free (temp_s );
83+
84+ dim3 grids (DIM /16 , DIM /16 );
85+ dim3 threads (16 , 16 );
86+ kernel <<< grids , threads >>> (s , dev_bitmap );
87+
88+ cudaMemcpy (bitmap .get_ptr (), dev_bitmap , bitmap .image_size (), cudaMemcpyDeviceToHost );
89+ bitmap .display_and_exit ();
90+
91+ cudaFree (dev_bitmap );
92+ cudaFree (s );
93+ }
0 commit comments