A simple C implementation of 2D Gaussian rendering, obtained using image-gs / gsplat, which does not require CUDA.
- SDL
- OpenGL
- Python (for embedding shaders into the code)
- gsplat_cs.c - rendering using compute shaders, an exact port of the CUDA version
- gsplat_sprites.c - rendering using regular sprites
- gsplat_test.py - a very slow port to Python (requires torch)
- gsplat_cpt_to_tsv.py - converter from gsplat checkpoint to tsv
git clone https://github.com/peko/gs-renderer
cd gs-renderer
make
./gsplat_cs data/data_01.tsvYou will need PyTorch installed to read and convert the checkpoint.
python gsplat_cpt_to_tsv.py --cpt {path_to_your/checkpoint.pt} --out data/my_gaussians.tsv The checkpoint does not contain information about the original image size, so you can manually add it to the first line of the resulting tsv file in the format {width}\t{height}\t{features}. See examples in the data/*.tsv folder. Default size is 512x512.
image-gs/gsplat/cudacsrc/forward.cu
__global__ void nd_rasterize_forward_no_tiles(
const dim3 img_size,
const unsigned channels,
const unsigned num_points,
const float2* __restrict__ xys,
const float3* __restrict__ conics,
const float* __restrict__ colors,
float* __restrict__ out_img,
int* __restrict__ pixel_topk
) {
auto block = cg::this_thread_block();
unsigned i =
block.group_index().y * block.group_dim().y + block.thread_index().y;
unsigned j =
block.group_index().x * block.group_dim().x + block.thread_index().x;
float px = (float)j;
float py = (float)i;
int32_t pix_id = i * img_size.x + j;
bool inside = (i < img_size.y && j < img_size.x);
bool done = !inside;
// **** max 12 channels for speed ***
float pix_out[MAX_CHANNELS] = {0.f};
// top k Gaussian ids
int32_t topk[TOP_K];
float topk_vals[TOP_K] = {0.f};
for (int k = 0; k < TOP_K; ++k)
topk[k] = -1;
for (int g = 0; g < num_points; ++g) {
const float3 conic = conics[g];
const float2 xy = xys[g];
const float2 delta = {xy.x - px, xy.y - py};
const float sigma = 0.5f * (conic.x * delta.x * delta.x +
conic.z * delta.y * delta.y) +
conic.y * delta.x * delta.y;
if (sigma < 0.f || isnan(sigma) || isinf(sigma)) {
continue;
}
const float alpha = __expf(-sigma);
int32_t g_id = g;
// find the minimum value in topk
int32_t min_topk_id = -1;
float min_topk_val = 1e30f;
for (int32_t k = 0; k < TOP_K; ++k) {
if (topk[k] < 0) {
min_topk_id = k;
min_topk_val = -1.0f;
break;
} else if (topk_vals[k] < min_topk_val) {
min_topk_id = k;
min_topk_val = topk_vals[k];
}
}
if (alpha > min_topk_val) {
topk[min_topk_id] = g_id;
topk_vals[min_topk_id] = alpha;
}
}
for (int c = 0; c < channels; ++c) {
float sum_val = 0.f;
for (int k = 0; k < TOP_K; ++k) {
if (topk[k] < 0) continue;
sum_val += topk_vals[k];
}
for (int k = 0; k < TOP_K; ++k) {
int32_t g = topk[k];
if (g < 0) continue;
// normalize by sum of topk values
float vis = topk_vals[k] / (sum_val + EPS_no_tiles);
pix_out[c] += colors[g * channels + c] * vis;
}
}
if (inside) {
for (int c = 0; c < channels; ++c) {
out_img[pix_id * channels + c] = pix_out[c];
}
for (int k = 0; k < TOP_K; ++k) {
pixel_topk[pix_id * TOP_K + k] = topk[k];
}
}
}