Skip to content

Commit 3f366d2

Browse files
authored
example-c
1 parent 329091f commit 3f366d2

File tree

2 files changed

+150
-0
lines changed

2 files changed

+150
-0
lines changed

Codes/produtointerno.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#include "../common/book.h"
2+
3+
#define imin(a,b) (a<b?a:b)
4+
5+
const int N = 33 * 1024;
6+
const int threadsPerBlock = 256;
7+
const int blocksPerGrid = imin(32, (N+threadsPerBlock-1) / threadsPerBlock);
8+
9+
__global__ void dot(float *a, float *b, float *c)
10+
{
11+
__shared__ float cache[threadsPerBlock];
12+
int tid = threadIdx.x + blockIdx.x * blockDim.x;
13+
int cacheIndex = threadIdx.x;
14+
float temp = 0;
15+
while (tid < N) {
16+
temp += a[tid] * b[tid];
17+
tid += blockDim.x * gridDim.x;
18+
}
19+
20+
cache[cacheIndex] = temp;
21+
int i = blockDim.x/2;
22+
while (i != 0)
23+
{
24+
if (cacheIndex < i)
25+
cache[cacheIndex] += cache[cacheIndex + 1];
26+
__syncthreads();
27+
i /= 2;
28+
}
29+
}
30+
31+
int main(void)
32+
{
33+
float *a, *b, c, *partial_c;
34+
float *dev_a, *dev_b, *dev_partial_c;
35+
a = new float[N];
36+
b = new float[N];
37+
partial_c = new float[blocksPerGrid];
38+
39+
cudaMalloc((void**)&dev_a, N*sizeof(float));
40+
cudaMalloc((void**)&dev_b, N*sizeof(float));
41+
cudaMalloc((void**)&dev_partial_c, blocksPerGrid*sizeof(float));
42+
43+
for (int i = 0; i < N; i++) {
44+
a[i] = i;
45+
b[i] = i * 2;
46+
}
47+
48+
cudaMemcpy(dev_a, a, N*sizeof(float), cudaMemcpyHostToDevice);
49+
cudaMemcpy(dev_b, b, N*sizeof(float), cudaMemcpyHostToDevice);
50+
51+
dot<<<blocksPerGrid, threadsPerBlock>>>(dev_a, dev_b, dev_partial_c);
52+
53+
cudaMemcpy(partial_c, dev_partial_c, blocksPerGrid*sizeof(float), cudaMemcpyDeviceToHost);
54+
c = 0;
55+
for (int i = 0; i < blocksPerGrid; i++)
56+
c += partial_c[i];
57+
}

Codes/raytracingexample.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#include "cuda.h"
2+
#include "../common/book.h"
3+
#include "../common/cpu_bitmap.h"
4+
5+
#define rnd(x) (x*rand() / RAND_MAX)
6+
#define SPHERES 20
7+
8+
#define INF 2e10f
9+
10+
struct Sphere {
11+
float r, g, b;
12+
float radius;
13+
float x, y, z;
14+
__device__ float hit(float ox, float oy, float *n) {
15+
float dx = ox - x;
16+
float dy = oy - y;
17+
if(dx*dx + dy*dy < radius*radius) {
18+
float dz = sqrtf(radius*radius - dx*dx - dy*dy);
19+
*n = dz / sqrtf(radius*radius);
20+
return dz + z;
21+
}
22+
return -INF;
23+
}
24+
};
25+
26+
__global__ void kernel(unsigned char *ptr)
27+
{
28+
int x = threadIdx.x + blockIdx.x * blockDim.x;
29+
int x = threadIdx.y + blockIdx.y * blockDim.y;
30+
int offset = x + y * blockDim.x * gridDim.x;
31+
float ox = (x-DIM/2);
32+
float oy = (y-DIM/2);
33+
34+
float r = 0, g = 0, b = 0;
35+
float maxz = -INF;
36+
for(int i = 0; i < SPHERES; i++)
37+
{
38+
float n;
39+
float t = s[i].hit(ox, oy, &n);
40+
if(t > maxz) {
41+
float fscale = n;
42+
r = s[i].r * fscale;
43+
r = s[i].g * fscale;
44+
r = s[i].b * fscale;
45+
maxz = t;
46+
}
47+
}
48+
49+
ptr[offset*4 + 0] = (int)(r*255);
50+
ptr[offset*4 + 1] = (int)(g*255);
51+
ptr[offset*4 + 2] = (int)(b*255);
52+
ptr[offset*4 + 3] = 255;
53+
}
54+
55+
int main(void) {
56+
DataBlock data;
57+
cudaEvent_t start, stop;
58+
cudaEventCreate(&start);
59+
cudaEventCreate(&stop);
60+
cudaEventRecord(start, 0);
61+
62+
CPUBitmap bitmap(DIM, DIM, &data);
63+
unsigned char *dev_bitmap;
64+
Sphere *s;
65+
66+
cudaMalloc((void**)&dev_bitmap, bitmap.image_size());
67+
cudaMalloc((void**)&s, sizeof(Sphere) * SPHERES);
68+
69+
Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere) * SPHERES);
70+
for(int i = 0; i < SPHERES; i++)
71+
{
72+
temp_s[i].r = rnd(1.0f);
73+
temp_s[i].g = rnd(1.0f);
74+
temp_s[i].b = rnd(1.0f);
75+
temp_s[i].x = rnd(1000.0f) - 500;
76+
temp_s[i].y = rnd(1000.0f) - 500;
77+
temp_s[i].z = rnd(1000.0f) - 500;
78+
temp_s[i].radius = rnd(100.0f) + 20;
79+
}
80+
81+
cudaMemcpyToSymbol(s, temp_s, sizeof(Sphere) * SPHERES);
82+
free(temp_s);
83+
84+
dim3 grids(DIM/16, DIM/16);
85+
dim3 threads(16, 16);
86+
kernel<<<grids, threads>>>(s, dev_bitmap);
87+
88+
cudaMemcpy(bitmap.get_ptr(), dev_bitmap, bitmap.image_size(), cudaMemcpyDeviceToHost);
89+
bitmap.display_and_exit();
90+
91+
cudaFree(dev_bitmap);
92+
cudaFree(s);
93+
}

0 commit comments

Comments
 (0)