@@ -124,16 +124,16 @@ __KERNEL__ void FilterTemporal(
124124 h_buffer.moments [index] = Vec4 (moments.x , moments.y , variance, history);
125125}
126126
127- __KERNEL__ void EstimateVariance (GBuffer g_buffer, HistoryBuffer h_buffer, Point2i res)
127+ __KERNEL__ void EstimateVariance (GBuffer g_buffer, HistoryBuffer h_buffer, HistoryBuffer out_h_buffer, Point2i res)
128128{
129129 int x = threadIdx .x + blockIdx .x * blockDim .x ;
130130 int y = threadIdx .y + blockIdx .y * blockDim .y ;
131131 if (x >= res.x || y >= res.y ) return ;
132-
133132 const int32 index = y * res.x + x;
134133
135- int32 history = int32 ( h_buffer.moments [index]. w ) ;
134+ out_h_buffer. moments [index] = h_buffer.moments [index];
136135
136+ int32 history = int32 (h_buffer.moments [index].w );
137137 if (history > 3 )
138138 {
139139 // Just go with temporally estimated variance
@@ -207,7 +207,42 @@ __KERNEL__ void EstimateVariance(GBuffer g_buffer, HistoryBuffer h_buffer, Point
207207 // Spatially estimated variance
208208 Float variance = fmax (0 .0f , sum_moments.y - sum_moments.x * sum_moments.x );
209209
210- h_buffer.moments [index].z = variance;
210+ out_h_buffer.moments [index].z = variance;
211+ }
212+
213+ __KERNEL__ void FilterVariance (HistoryBuffer h_buffer, HistoryBuffer out_h_buffer, Point2i res)
214+ {
215+ int x = threadIdx .x + blockIdx .x * blockDim .x ;
216+ int y = threadIdx .y + blockIdx .y * blockDim .y ;
217+ if (x >= res.x || y >= res.y ) return ;
218+ const int32 index = y * res.x + x;
219+
220+ // Eq. 5 Filter variance using 3x3 gaussian kernel
221+
222+ constexpr Float gaussian3x3[] = {
223+ 1 / 16 .0f , 1 / 8 .0f , 1 / 16 .0f , 1 / 8 .0f , 1 / 4 .0f , 1 / 8 .0f , 1 / 16 .0f , 1 / 8 .0f , 1 / 16 .0f ,
224+ };
225+
226+ Float variance = 0 ;
227+
228+ const int32 r = 1 ;
229+ for (int32 j = -r; j <= r; ++j)
230+ {
231+ for (int32 i = -r; i <= r; ++i)
232+ {
233+ Point2i q (x + i, y + j);
234+ if (q.x < 0 || q.x >= res.x || q.y < 0 || q.y >= res.y )
235+ {
236+ continue ;
237+ }
238+
239+ int32 index_q = q.x + q.y * res.x ;
240+ int32 kernel_index = (j + 1 ) * 3 + (i + 1 );
241+ variance += gaussian3x3[kernel_index] * h_buffer.moments [index_q].z ;
242+ }
243+ }
244+
245+ out_h_buffer.moments [index].z = variance;
211246}
212247
213248} // namespace cuwfrt
0 commit comments