Skip to content

Commit 5aef4fb

Browse files
committed
major refactor
- make it MT friendly (at cost mem) - split code into files for push/sort/draw - wrap sse calls into macro
1 parent d02f211 commit 5aef4fb

10 files changed

+1094
-997
lines changed

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ bgfx.cmake
99
occlusion.cpp
1010
rasterizer.h
1111
rasterizer.cpp
12+
rasterizer_box.cpp
13+
rasterizer_sort.cpp
14+
rasterizer_draw.cpp
1215
rasterizer_math.h
1316
rasterizer_tile.h
1417
rasterizer_tile.cpp
@@ -22,7 +25,7 @@ enkiTS/src/TaskScheduler.cpp
2225
if(MSVC)
2326
add_compile_options(/W4)
2427
else()
25-
add_compile_options(-Wall -Wextra -Wshadow -Wno-missing-field-initializers)
28+
add_compile_options(-ffast-math -Wall -Wextra -Wshadow -Wno-missing-field-initializers)
2629
endif()
2730

2831
include(bgfx.cmake)

occlusion.cpp

Lines changed: 34 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,11 @@ class ExampleOcclusionCulling : public entry::AppI
251251

252252
debug_coverage = bgfx::createTexture2D(Rasterizer::g_total_width, Rasterizer::g_total_height, false, 1, bgfx::TextureFormat::R8);
253253

254-
Rasterizer::Init();
255-
256254
m_Scheduler.Initialize(std::thread::hardware_concurrency()-1);
257255
int workersCount = (int)m_Scheduler.GetNumTaskThreads();
258256
printf("Scheduler started, %d workers\n", workersCount);
257+
258+
m_Rasterizer.Init(workersCount);
259259
}
260260

261261
int shutdown() override
@@ -287,7 +287,7 @@ class ExampleOcclusionCulling : public entry::AppI
287287
if (m_Visibility[i] == 0)
288288
continue;
289289

290-
encoder->setTransform(m_Transforms[i].data());
290+
encoder->setTransform((float*)&m_Objects[i].transform);
291291
encoder->setVertexBuffer(0, m_vbh);
292292
encoder->setIndexBuffer(m_ibh);
293293

@@ -325,20 +325,7 @@ class ExampleOcclusionCulling : public entry::AppI
325325

326326
void ExecuteRange(enki::TaskSetPartition range, uint32_t thread_index) override
327327
{
328-
vec4_t box_min = {-1.f, -1.f, -1.f, 1.f};
329-
vec4_t box_max = {+1.f, +1.f, +1.f, 1.f};
330-
for (uint32_t index = range.start, wrapped_index = 0; index < range.end; ++index)
331-
{
332-
bool occludee = index >= m_parent->m_Visibility.size();
333-
wrapped_index = occludee ? index - (uint32_t)m_parent->m_Visibility.size() : index;
334-
if (occludee)
335-
m_parent->m_Visibility[wrapped_index] = 0;
336-
m_parent->m_Rasterizer.push_object(MatrixSet(m_parent->m_Transforms[wrapped_index].data()),
337-
box_min, box_max,
338-
s_cubeIndices, sizeof(s_cubeIndices) / sizeof(s_cubeIndices[0]),
339-
s_cubeVerticesSIMD, sizeof(s_cubeVerticesSIMD) / sizeof(s_cubeVerticesSIMD[0]),
340-
occludee ? &m_parent->m_Visibility[wrapped_index] : nullptr);
341-
}
328+
m_parent->m_Rasterizer.push_objects(m_parent->m_Objects.data() + range.start, range.end - range.start, thread_index);
342329
}
343330

344331
ExampleOcclusionCulling* m_parent = nullptr;
@@ -350,19 +337,17 @@ class ExampleOcclusionCulling : public entry::AppI
350337
: m_parent(parent)
351338
{
352339
this->m_SetSize = Rasterizer::g_width*Rasterizer::g_height;
353-
m_sort.resize(this->m_SetSize);
354340
}
355341

356342
void ExecuteRange(enki::TaskSetPartition range, uint32_t thread_index) override
357343
{
358344
for (uint32_t index = range.start; index < range.end; ++index)
359345
{
360-
m_parent->m_Rasterizer.sort_triangles(index, m_sort[index]);
346+
m_parent->m_Rasterizer.sort_triangles(index, thread_index);
361347
}
362348
}
363349

364350
ExampleOcclusionCulling* m_parent = nullptr;
365-
stl::vector<stl::vector<uint64_t>> m_sort;
366351
};
367352

368353
struct DrawTask : enki::ITaskSet
@@ -373,7 +358,7 @@ class ExampleOcclusionCulling : public entry::AppI
373358
this->m_SetSize = Rasterizer::g_width*Rasterizer::g_height;
374359
}
375360

376-
void ExecuteRange(enki::TaskSetPartition range, uint32_t thread_index) override
361+
void ExecuteRange(enki::TaskSetPartition range, uint32_t) override
377362
{
378363
for (uint32_t index = range.start; index < range.end; ++index)
379364
{
@@ -467,9 +452,9 @@ class ExampleOcclusionCulling : public entry::AppI
467452
{
468453
for (auto & t : tiles)
469454
{
470-
if (ImGui::TreeNode(std::to_string(t.m_x + t.m_y*Rasterizer::g_width).c_str(), "Tile %d (%d/%d) %s", t.m_x, (uint32_t)t.m_triangle_count, t.m_triangles_drawn_total, t.m_mask == ~0u ? "full" : ""))
455+
if (ImGui::TreeNode(std::to_string(t.m_x + t.m_y*Rasterizer::g_width).c_str(), "Tile %d (%d/%d) %s", t.m_x, /*(uint32_t)t.m_triangle_count*/0, t.m_triangles_drawn_total, t.m_mask == ~0u ? "full" : ""))
471456
{
472-
ImGui::Text("total sorted triangles %d", (uint32_t)t.m_triangles.size());
457+
ImGui::Text("total sorted triangles %d", /*(uint32_t)t.m_triangles.size()*/0);
473458
ImGui::Text("total drawn triangles %d", t.m_triangles_drawn_total);
474459
ImGui::Text("total drawn occluder triangles %d", t.m_triangles_drawn_occluder_total);
475460
ImGui::Text("total drawn occludee triangles %d", t.m_triangles_drawn_occludee_total);
@@ -528,8 +513,9 @@ class ExampleOcclusionCulling : public entry::AppI
528513
pos[1] = -step*m_dim / 2.0f;
529514
pos[2] = -15.0;
530515

531-
m_Transforms.resize(m_dim*m_dim*m_dim);
532-
m_Visibility.resize(m_dim*m_dim*m_dim);
516+
uint32_t max_drawcalls = m_dim*m_dim*m_dim;
517+
m_Objects.resize(max_drawcalls*2);
518+
m_Visibility.resize(max_drawcalls);
533519
for (uint32_t zz = 0; zz < uint32_t(m_dim); ++zz)
534520
{
535521
for (uint32_t yy = 0; yy < uint32_t(m_dim); ++yy)
@@ -550,8 +536,24 @@ class ExampleOcclusionCulling : public entry::AppI
550536
mtx[13] = pos[1] + float(yy)*step;
551537
mtx[14] = pos[2] + float(zz)*step;
552538

553-
m_Transforms[xx + yy*m_dim + zz*m_dim*m_dim].resize(16);
554-
memcpy(m_Transforms[xx + yy*m_dim + zz*m_dim*m_dim].data(), mtx, sizeof(mtx));
539+
uint32_t idx = xx + yy*m_dim + zz*m_dim*m_dim;
540+
m_Objects[idx].transform = MatrixSet(mtx);
541+
m_Objects[idx].indices = s_cubeIndices;
542+
m_Objects[idx].index_count = sizeof(s_cubeIndices) / sizeof(s_cubeIndices[0]);
543+
m_Objects[idx].vertices = s_cubeVerticesSIMD;
544+
m_Objects[idx].vertex_count = sizeof(s_cubeVerticesSIMD) / sizeof(s_cubeVerticesSIMD[0]);
545+
m_Objects[idx].visibility = &m_Visibility[xx + yy*m_dim + zz*m_dim*m_dim];
546+
m_Objects[idx].bound_min = {-1.f, -1.f, -1.f, 1.f};
547+
m_Objects[idx].bound_max = {1.f, 1.f, 1.f, 1.f};
548+
549+
m_Objects[idx+max_drawcalls].transform = MatrixSet(mtx);
550+
m_Objects[idx+max_drawcalls].indices = s_cubeIndices;
551+
m_Objects[idx+max_drawcalls].index_count = sizeof(s_cubeIndices) / sizeof(s_cubeIndices[0]);
552+
m_Objects[idx+max_drawcalls].vertices = s_cubeVerticesSIMD;
553+
m_Objects[idx+max_drawcalls].vertex_count = sizeof(s_cubeVerticesSIMD) / sizeof(s_cubeVerticesSIMD[0]);
554+
m_Objects[idx+max_drawcalls].visibility = nullptr;
555+
m_Objects[idx+max_drawcalls].bound_min = {-1.f, -1.f, -1.f, 1.f};
556+
m_Objects[idx+max_drawcalls].bound_max = {1.f, 1.f, 1.f, 1.f};
555557
}
556558
}
557559
}
@@ -561,43 +563,18 @@ class ExampleOcclusionCulling : public entry::AppI
561563
m_Rasterizer.begin(view_mat * proj_mat * MatrixScaling(0.5f, -0.5f, 1.0f) * MatrixTranslation(Vector4( .5f, 0.5f, 0.0f, 1.0f )) * MatrixScaling( (float)Rasterizer::g_total_width, (float)Rasterizer::g_total_height, 1.0f));
562564
if (m_Occlusion)
563565
{
564-
vec4_t box_min = {-1.f, -1.f, -1.f, 1.f};
565-
vec4_t box_max = {+1.f, +1.f, +1.f, 1.f};
566-
567566
m_Rasterizer.setMT(m_MT);
568567

569568
int64_t occlusion_start = bx::getHPCounter();
570-
uint32_t total_count = uint32_t(m_dim)*uint32_t(m_dim)*uint32_t(m_dim);
571569
if (m_MT)
572570
{
573-
m_PushTasks.setCount(total_count*2);
571+
m_PushTasks.setCount(m_Objects.size());
574572
m_Scheduler.AddTaskSetToPipe(&m_PushTasks);
575573
m_Scheduler.WaitforAll();
576574
}
577575
else
578576
{
579-
for (uint32_t i = 0; i < total_count; ++i )
580-
{
581-
m_Visibility[i] = 0;
582-
if (m_UseBox)
583-
{
584-
m_Rasterizer.push_box(MatrixSet(m_Transforms[i].data()), &m_Visibility[i]);
585-
m_Rasterizer.push_box(MatrixSet(m_Transforms[i].data()), nullptr);
586-
}
587-
else
588-
{
589-
m_Rasterizer.push_object(MatrixSet(m_Transforms[i].data()),
590-
box_min, box_max,
591-
s_cubeIndices, sizeof(s_cubeIndices) / sizeof(s_cubeIndices[0]),
592-
s_cubeVerticesSIMD, sizeof(s_cubeVerticesSIMD) / sizeof(s_cubeVerticesSIMD[0]),
593-
&m_Visibility[i]);
594-
m_Rasterizer.push_object(MatrixSet(m_Transforms[i].data()),
595-
box_min, box_max,
596-
s_cubeIndices, sizeof(s_cubeIndices) / sizeof(s_cubeIndices[0]),
597-
s_cubeVerticesSIMD, sizeof(s_cubeVerticesSIMD) / sizeof(s_cubeVerticesSIMD[0]),
598-
nullptr);
599-
}
600-
}
577+
m_Rasterizer.push_objects(m_Objects.data(), m_Objects.size());
601578
}
602579
int64_t occlusion_mid = bx::getHPCounter();
603580
occlusion_push_time = occlusion_mid - occlusion_start;
@@ -635,7 +612,7 @@ class ExampleOcclusionCulling : public entry::AppI
635612
for ( int y = 0; y < Tile::g_tile_height; ++y )
636613
for ( int x = 0; x < Tile::g_tile_width; ++x )
637614
{
638-
__m128i buf = m_Rasterizer.get_framebuffer(j + i*Rasterizer::g_width)[y];
615+
vec4i_t buf = m_Rasterizer.get_framebuffer(j + i*Rasterizer::g_width)[y];
639616
unsigned int mask = ( (unsigned int*)( &buf ) )[ x >> 5 ];
640617
int bit = mask & ( 1 << ( x & 31 ) );
641618
data[j*Tile::g_tile_width + 127 - x + (y + i*Tile::g_tile_height)*Rasterizer::g_total_width] = bit ? 255 : 0;
@@ -674,8 +651,8 @@ class ExampleOcclusionCulling : public entry::AppI
674651

675652
float m_Spacing = 60.f;
676653

677-
stl::vector<stl::vector<float>> m_Transforms;
678-
stl::vector<int> m_Visibility;
654+
stl::vector<Rasterizer::Object> m_Objects;
655+
stl::vector<uint32_t> m_Visibility;
679656

680657
bool m_Wireframe = false;
681658
bool m_Occlusion = false;

0 commit comments

Comments
 (0)