51
51
#define MAX_VAL 32767
52
52
53
53
void calcDisp (__local short * costFunc , __global short * disp , int uniquenessRatio /*, int textureTreshold, short textsum*/ ,
54
- int mindisp , int ndisp , int w , __local short * dispbuf , int d )
54
+ int mindisp , int ndisp , int w , __local short * dispbuf , int d , int x , int y , int cols , int rows , int wsz2 )
55
55
{
56
56
short FILTERED = (mindisp - 1 )<<4 ;
57
57
short best_disp = FILTERED , best_cost = MAX_VAL - 1 ;
@@ -73,6 +73,7 @@ void calcDisp(__local short * costFunc, __global short * disp, int uniquenessRat
73
73
}
74
74
best_disp = ndisp - dispbuf [0 ] - 1 ;
75
75
best_cost = costFunc [(ndisp - best_disp - 1 )* w ];
76
+ barrier (CLK_LOCAL_MEM_FENCE );
76
77
77
78
int thresh = best_cost + (best_cost * uniquenessRatio /100 );
78
79
dispbuf [d ] = ( (cost [d * w ] <= thresh ) && (d < (ndisp - best_disp - 2 ) || d > (ndisp - best_disp ) ) ) ? FILTERED : best_disp ;
@@ -90,7 +91,7 @@ void calcDisp(__local short * costFunc, __global short * disp, int uniquenessRat
90
91
91
92
// best_disp = (textsum < textureTreshold) ? FILTERED : best_disp;
92
93
93
- if ( dispbuf [0 ] != FILTERED )
94
+ if ( dispbuf [0 ] != FILTERED && x < cols - wsz2 - mindisp && y < rows - wsz2 )
94
95
{
95
96
cost = & costFunc [0 ] + (ndisp - best_disp - 1 )* w ;
96
97
int y3 = ((ndisp - best_disp - 1 ) > 0 ) ? cost [- w ] : cost [w ],
@@ -179,68 +180,71 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar
179
180
cost = costFunc + costIdx ;
180
181
181
182
short tempcost = 0 ;
182
- for ( int i = 0 ; i < wsz ; i ++ )
183
+ if ( x < cols - wsz2 - mindisp && y < rows - wsz2 )
183
184
{
184
- int idx = mad24 (y - wsz2 + i * nthread , cols , x - wsz2 + i * (1 - nthread ));
185
- left = leftptr + idx ;
186
- right = rightptr + (idx - d );
187
- short costdiff = 0 ;
188
-
189
- for (int j = 0 ; j < wsz ; j ++ )
190
- {
191
- costdiff += abs ( left [0 ] - right [0 ] );
192
- left += 1 * nthread + cols * (1 - nthread );
193
- right += 1 * nthread + cols * (1 - nthread );// maybe use ? operator
194
- }
195
- if (nthread == 1 )
185
+ for (int i = 0 ; i < wsz ; i ++ )
196
186
{
197
- tempcost += costdiff ;
187
+ int idx = mad24 (y - wsz2 + i * nthread , cols , x - wsz2 + i * (1 - nthread ));
188
+ left = leftptr + idx ;
189
+ right = rightptr + (idx - d );
190
+ short costdiff = 0 ;
191
+
192
+ for (int j = 0 ; j < wsz ; j ++ )
193
+ {
194
+ costdiff += abs ( left [0 ] - right [0 ] );
195
+ left += 1 * nthread + cols * (1 - nthread );
196
+ right += 1 * nthread + cols * (1 - nthread );// maybe use ? operator
197
+ }
198
+ if (nthread == 1 )
199
+ {
200
+ tempcost += costdiff ;
201
+ }
202
+ costbuf [head ] = costdiff ;
203
+ head ++ ;
198
204
}
199
- costbuf [head ] = costdiff ;
200
- head ++ ;
201
205
}
202
206
barrier (CLK_LOCAL_MEM_FENCE );
203
207
cost [0 ] = tempcost ;
204
208
205
- if (x < cols - wsz2 - mindisp && y < rows - wsz2 && nthread == 1 )
206
- {
207
- int dispIdx = mad24 (gy , disp_step , disp_offset + gx * (int )sizeof (short ));
208
- disp = (__global short * )(dispptr + dispIdx );
209
- calcDisp (& costFunc [sizeY - 1 + lx - ly ], disp , uniquenessRatio , /*textureTreshold, textsum,*/
210
- mindisp , ndisp , 2 * sizeY , & dispbuf [nthread * tsize /2 ], d );
211
- }
209
+ int dispIdx = mad24 (gy , disp_step , disp_offset + gx * (int )sizeof (short ));
210
+ disp = (__global short * )(dispptr + dispIdx );
211
+ calcDisp (& costFunc [sizeY - 1 + lx - ly ], disp , uniquenessRatio , /*textureTreshold, textsum,*/
212
+ mindisp , ndisp , 2 * sizeY , & dispbuf [nthread * tsize /2 ], d , x , y , cols , rows , wsz2 );
212
213
barrier (CLK_LOCAL_MEM_FENCE );
213
214
214
215
lx = 1 - nthread ;
215
216
ly = nthread ;
216
217
217
- while (lx < sizeX && ly < sizeY )
218
+ while (lx < sizeX || ly < sizeY )
218
219
{
219
- x = gx + shiftX + lx ;
220
- y = gy + shiftY + ly ;
220
+ x = ( lx < sizeX ) ? gx + shiftX + lx : cols ;
221
+ y = ( ly < sizeY ) ? gy + shiftY + ly : rows ;
221
222
222
223
costIdx = calcLocalIdx (lx , ly , d , sizeY );
223
224
cost = costFunc + costIdx ;
224
- cost [0 ] = ( ly * (1 - nthread ) + lx * nthread == 0 ) ?
225
- calcCostBorder (leftptr , rightptr , x , y , nthread , wsz2 , costbuf , & head , cols , d ,
226
- costFunc [calcLocalIdx (lx - 1 * (1 - nthread ), ly - 1 * nthread , d , sizeY )]) :
227
- calcCostInside (leftptr , rightptr , x , y , wsz2 , cols , d ,
228
- costFunc [calcLocalIdx (lx - 1 , ly - 1 , d , sizeY )],
229
- costFunc [calcLocalIdx (lx , ly - 1 , d , sizeY )],
230
- costFunc [calcLocalIdx (lx - 1 , ly , d , sizeY )]);
231
- barrier (CLK_LOCAL_MEM_FENCE );
232
-
233
- if (x < cols - mindisp - wsz2 && y < rows - wsz2 )
225
+ if (x < cols - wsz2 - mindisp && y < rows - wsz2 )
234
226
{
235
- int dispIdx = mad24 (gy + ly , disp_step , disp_offset + (gx + lx )* (int )sizeof (short ));
236
- disp = (__global short * )(dispptr + dispIdx );
237
- calcDisp (& costFunc [sizeY - 1 - ly + lx ], disp , uniquenessRatio , //textureTreshold, textsum,
238
- mindisp , ndisp , 2 * sizeY , & dispbuf [nthread * tsize /2 ], d );
227
+ cost [0 ] = ( ly * (1 - nthread ) + lx * nthread == 0 ) ?
228
+ calcCostBorder (leftptr , rightptr , x , y , nthread , wsz2 , costbuf , & head , cols , d ,
229
+ costFunc [calcLocalIdx (lx - 1 * (1 - nthread ), ly - 1 * nthread , d , sizeY )]) :
230
+ calcCostInside (leftptr , rightptr , x , y , wsz2 , cols , d ,
231
+ costFunc [calcLocalIdx (lx - 1 , ly - 1 , d , sizeY )],
232
+ costFunc [calcLocalIdx (lx , ly - 1 , d , sizeY )],
233
+ costFunc [calcLocalIdx (lx - 1 , ly , d , sizeY )]);
239
234
}
240
235
barrier (CLK_LOCAL_MEM_FENCE );
241
236
237
+ int dispIdx = mad24 (gy + ly , disp_step , disp_offset + (gx + lx )* (int )sizeof (short ));
238
+ disp = (__global short * )(dispptr + dispIdx );
239
+ calcDisp (& costFunc [sizeY - 1 - ly + lx ], disp , uniquenessRatio , //textureTreshold, textsum,
240
+ mindisp , ndisp , 2 * sizeY , & dispbuf [nthread * tsize /2 ], d , x , y , cols , rows , wsz2 );
241
+ barrier (CLK_LOCAL_MEM_FENCE );
242
+
242
243
calcNewCoordinates (& lx , & ly , nthread );
243
244
}
245
+
246
+
247
+
244
248
}
245
249
246
250
#endif
0 commit comments