-
Notifications
You must be signed in to change notification settings - Fork 71
Expand file tree
/
Copy pathtiny_bvh_minimal_gpu.cpp
More file actions
101 lines (88 loc) · 3.27 KB
/
tiny_bvh_minimal_gpu.cpp
File metadata and controls
101 lines (88 loc) · 3.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// Minimal GPU example for TinyBVH.
#define TINYBVH_IMPLEMENTATION
#include "tiny_bvh.h"
using namespace tinybvh;
// This application uses tinyocl - And this file will include the implementation.
#define TINY_OCL_IMPLEMENTATION
#include "tiny_ocl.h"
#include <cstdlib>
#include <cstdio>
#define TRIANGLE_COUNT 8192
// CPU-side data
bvhvec4 triangles[TRIANGLE_COUNT * 3]; // must be 16 byte!
// RNG convenience
float uniform_rand() { return (float)rand() / (float)RAND_MAX; }
// Application entry point
int main()
{
// Create a scene consisting of some random small triangles.
for (int i = 0; i < TRIANGLE_COUNT; i++)
{
// create a random triangle
bvhvec4& v0 = triangles[i * 3 + 0];
bvhvec4& v1 = triangles[i * 3 + 1];
bvhvec4& v2 = triangles[i * 3 + 2];
// triangle position, x/y/z = 0..1
float x = uniform_rand();
float y = uniform_rand();
float z = uniform_rand();
// set first vertex
v0.x = x + 0.1f * uniform_rand();
v0.y = y + 0.1f * uniform_rand();
v0.z = z + 0.1f * uniform_rand();
// set second vertex
v1.x = x + 0.1f * uniform_rand();
v1.y = y + 0.1f * uniform_rand();
v1.z = z + 0.1f * uniform_rand();
// set third vertex
v2.x = x + 0.1f * uniform_rand();
v2.y = y + 0.1f * uniform_rand();
v2.z = z + 0.1f * uniform_rand();
}
// Build a BVH over the scene. We use the BVH_GPU layout.
tinybvh::BVH_GPU gpubvh;
gpubvh.Build( triangles, TRIANGLE_COUNT );
// Load and compile the OpenCL kernel.
tinyocl::Kernel ailalaine_kernel( "traverse.cl", "batch_ailalaine" );
// Create and populate the OpenCL buffers.
// 1. Triangle data: For each triangle, 3 times a vec4 vertex.
tinyocl::Buffer* triData = new tinyocl::Buffer( TRIANGLE_COUNT * 3 * sizeof( bvhvec4 ), triangles );
// 2. BVH node data: Taken from gpubvh.bvhNode; count is gpubvh.usedNodes.
// If the tree is rebuilt per frame, use gpubvh.allocatedNodes instead.
tinyocl::Buffer* gpuNodes = new tinyocl::Buffer( gpubvh.usedNodes * sizeof( BVH_GPU::BVHNode ), gpubvh.bvhNode );
// 3. Triangle index data, used in BVH leafs. This is taken from the base BVH.
tinyocl::Buffer* idxData = new tinyocl::Buffer( gpubvh.idxCount * sizeof( uint32_t ), gpubvh.bvh.primIdx );
// 4. Ray buffer. We will always trace batches of rays, for efficiency.
// For GPU code, a ray is 64 bytes. On the CPU it has extra data, so copy carefully.
tinyocl::Buffer* rayData = new tinyocl::Buffer( 1024 * 64 );
unsigned char* hostData = (unsigned char*)rayData->GetHostPtr();
for (int i = 0; i < 1024; i++)
{
bvhvec3 O( 0.5f, 0.5f, -1 );
bvhvec3 D( 0.1f, uniform_rand() - 0.5f, 2 );
Ray ray( O, D );
memcpy( hostData + 64 * i, &ray, 64 /* just the first 64 bytes! */ );
}
// 5. Sync all data to the GPU. Repeat if anything changes.
triData->CopyToDevice();
gpuNodes->CopyToDevice();
idxData->CopyToDevice();
rayData->CopyToDevice();
// Invoke the kernel.
ailalaine_kernel.SetArguments( gpuNodes, idxData, triData, rayData );
ailalaine_kernel.Run( 1024 /* a thread per ray, make it a multiple of 64. */ );
// Obtain traversal result.
rayData->CopyFromDevice();
for (int i = 0; i < 1024; i++)
{
Ray ray;
memcpy( &ray, hostData + 64 * i, 64 );
printf( "ray %i, nearest intersection: %f\n", i, ray.hit.t );
}
// All done.
delete triData;
delete gpuNodes;
delete idxData;
delete rayData;
return 0;
}