Skip to content

Commit 2eb6dda

Browse files
committed
Fix yolo models download and usage
1 parent 8688c3a commit 2eb6dda

File tree

7 files changed

+578
-367
lines changed

7 files changed

+578
-367
lines changed

autoit-addon/addon.cpp

Lines changed: 112 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ void AKAZE_homograpy_check(
8181
}
8282
}
8383

84-
#define UNSUPPORTED_YOLO_VERSION "Unsupported yolo version. Supported versions are v3, v5, v8."
84+
#define UNSUPPORTED_YOLO_VERSION "Unsupported yolo version. Supported versions are v3, v4, v5, v6, v7, v8."
8585

8686
void yolo_postprocess(
8787
const int spatial_width,
@@ -104,7 +104,7 @@ void yolo_postprocess(
104104
for (auto out : outs)
105105
{
106106
int offset;
107-
float scale_x, scale_y;
107+
float box_scale_w, box_scale_h;
108108

109109
if (out.dims != 2 && out.dims != 3) {
110110
CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.dims != 2 && out.dims != 3");
@@ -120,17 +120,17 @@ void yolo_postprocess(
120120
}
121121

122122
// relative coordinates
123-
scale_x = (float)img_width * scale;
124-
scale_y = (float)img_height * scale;
123+
box_scale_w = (float)img_width * scale;
124+
box_scale_h = (float)img_height * scale;
125125
}
126126
else {
127127
if (out.size[0] != 1) {
128128
CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.size[0] != 1");
129129
}
130130

131131
out = out.reshape(1, out.size[1]);
132-
scale_x = (float)img_width / spatial_width * scale;
133-
scale_y = (float)img_height / spatial_height * scale;
132+
box_scale_w = (float)img_width / spatial_width * scale;
133+
box_scale_h = (float)img_height / spatial_height * scale;
134134

135135
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
136136
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
@@ -169,10 +169,10 @@ void yolo_postprocess(
169169

170170
if (maxScore >= score_threshold)
171171
{
172-
double centerX = (double)data[0] * scale_x;
173-
double centerY = (double)data[1] * scale_y;
174-
double width = (double)data[2] * scale_x;
175-
double height = (double)data[3] * scale_y;
172+
double centerX = (double)data[0] * box_scale_w;
173+
double centerY = (double)data[1] * box_scale_h;
174+
double width = (double)data[2] * box_scale_w;
175+
double height = (double)data[3] * box_scale_h;
176176
double left = centerX - width / 2;
177177
double top = centerY - height / 2;
178178

@@ -184,6 +184,58 @@ void yolo_postprocess(
184184
}
185185
}
186186

187+
namespace {
188+
void yolo_object_detection_postprocess(
189+
const float box_scale_w,
190+
const float box_scale_h,
191+
const float confidence_threshold,
192+
const cv::Mat& out,
193+
cv::Mat& classes_scores,
194+
std::vector<int>& class_ids,
195+
std::vector<float>& confidences,
196+
std::vector<cv::Rect2d>& bboxes,
197+
const int offset,
198+
const int background_label_id
199+
)
200+
{
201+
classes_scores.cols = out.cols - offset;
202+
203+
// Scan through all the bounding boxes output from the network and keep only the
204+
// ones with high confidence scores. Assign the box's class label as the class
205+
// with the highest score for the box.
206+
207+
float* detection = (float*) out.data;
208+
209+
for (int i = 0; i < out.rows; ++i, detection += out.cols)
210+
{
211+
if (background_label_id < 0 && offset == 5 && detection[4] < confidence_threshold) {
212+
continue;
213+
}
214+
215+
classes_scores.data = reinterpret_cast<uchar*>(detection + offset);
216+
217+
// Get the value and location of the maximum score
218+
double confidence;
219+
Point maxClassLoc;
220+
minMaxLoc(classes_scores, 0, &confidence, 0, &maxClassLoc);
221+
if (confidence <= confidence_threshold) {
222+
continue;
223+
}
224+
225+
double centerX = (double)detection[0] * box_scale_w;
226+
double centerY = (double)detection[1] * box_scale_h;
227+
double width = (double)detection[2] * box_scale_w;
228+
double height = (double)detection[3] * box_scale_h;
229+
double left = centerX - width / 2;
230+
double top = centerY - height / 2;
231+
232+
class_ids.push_back(maxClassLoc.x);
233+
confidences.push_back((float)confidence);
234+
bboxes.push_back(Rect2d(left, top, width, height));
235+
}
236+
}
237+
}
238+
187239
void object_detection_postprocess(
188240
const cv::dnn::Net& net,
189241
const int inpWidth,
@@ -204,7 +256,7 @@ void object_detection_postprocess(
204256
auto outLayerType = lastLayer->type;
205257

206258
Mat classes_scores(1, 0, CV_32FC1);
207-
float scale_x, scale_y;
259+
float box_scale_w, box_scale_h;
208260

209261
if (outLayerType == "DetectionOutput")
210262
{
@@ -228,19 +280,19 @@ void object_detection_postprocess(
228280

229281
if (data[i + 5] - data[i + 3] < 1) {
230282
// relative coordinates
231-
scale_x = inpWidth * imgScale;
232-
scale_y = inpHeight * imgScale;
283+
box_scale_w = inpWidth * imgScale;
284+
box_scale_h = inpHeight * imgScale;
233285
}
234286
else {
235287
// absolute coordinate
236-
scale_x = imgScale;
237-
scale_y = imgScale;
288+
box_scale_w = imgScale;
289+
box_scale_h = imgScale;
238290
}
239291

240-
double left = (double)data[i + 3] * scale_x;
241-
double top = (double)data[i + 4] * scale_y;
242-
double width = (double)data[i + 5] * scale_x - left + 1;
243-
double height = (double)data[i + 6] * scale_y - top + 1;
292+
double left = (double)data[i + 3] * box_scale_w;
293+
double top = (double)data[i + 4] * box_scale_h;
294+
double width = (double)data[i + 5] * box_scale_w - left + 1;
295+
double height = (double)data[i + 6] * box_scale_h - top + 1;
244296

245297
int class_id = (int)(data[i + 1]);
246298
if (background_label_id >= 0 && background_label_id <= class_id) {
@@ -254,125 +306,77 @@ void object_detection_postprocess(
254306
}
255307
else if (outLayerType == "Region")
256308
{
257-
// yolo v4
309+
// yolo v3, v4
258310

259311
// relative coordinates
260-
scale_x = inpWidth * imgScale;
261-
scale_y = inpHeight * imgScale;
312+
box_scale_w = inpWidth * imgScale;
313+
box_scale_h = inpHeight * imgScale;
314+
int offset = 5;
262315

263316
// Network produces output blob with a shape NxC where N is a number of
264317
// detected objects and C is a number of classes + 4 where the first 4
265318
// numbers are [center_x, center_y, width, height]
266319
for (auto out : outs)
267320
{
268-
classes_scores.cols = out.cols - 5;
269-
float* data = (float*)out.data;
270-
271-
for (int j = 0; j < out.rows; ++j, data += out.cols)
272-
{
273-
classes_scores.data = reinterpret_cast<uchar*>(data + 5);
274-
275-
// Get the value and location of the maximum score
276-
double confidence;
277-
Point maxClassLoc;
278-
minMaxLoc(classes_scores, 0, &confidence, 0, &maxClassLoc);
279-
if (confidence <= confidence_threshold) {
280-
continue;
281-
}
282-
283-
double centerX = (double)data[0] * scale_x;
284-
double centerY = (double)data[1] * scale_y;
285-
double width = (double)data[2] * scale_x;
286-
double height = (double)data[3] * scale_y;
287-
double left = centerX - width / 2;
288-
double top = centerY - height / 2;
289-
290-
class_ids.push_back(maxClassLoc.x);
291-
confidences.push_back((float)confidence);
292-
bboxes.push_back(Rect2d(left, top, width, height));
293-
}
321+
yolo_object_detection_postprocess(
322+
box_scale_w,
323+
box_scale_h,
324+
confidence_threshold,
325+
out,
326+
classes_scores,
327+
class_ids,
328+
confidences,
329+
bboxes,
330+
offset,
331+
background_label_id
332+
);
294333
}
295334
}
296335
else if (outLayerType == "Identity") {
297336
for (auto out : outs)
298337
{
299338
int offset;
300-
float scale_x, scale_y;
301-
302-
if (out.dims != 2 && out.dims != 3) {
303-
CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.dims != 2 && out.dims != 3");
304-
}
339+
float box_scale_w, box_scale_h;
305340

306-
if (out.dims == 2) {
307-
// yolo v3
308-
offset = 5;
309-
310-
// relative coordinates
311-
scale_x = inpWidth * imgScale;
312-
scale_y = inpHeight * imgScale;
313-
}
314-
else {
341+
if (out.dims == 3) {
315342
if (out.size[0] != 1) {
316343
CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.size[0] != 1");
317344
}
318345

319346
out = out.reshape(1, out.size[1]);
320347

321348
// absolute coordinate
322-
scale_x = imgScale;
323-
scale_y = imgScale;
349+
box_scale_w = imgScale;
350+
box_scale_h = imgScale;
324351

325-
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
326-
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
327-
if (out.rows == num_classes + 4) {
328-
// yolo v8
352+
if (out.cols == num_classes + 5) {
353+
// yolo v5, v6, v7 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
354+
offset = 5;
355+
}
356+
else if (out.rows == num_classes + 4) {
357+
// yolo v8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
329358
offset = 4;
330359
cv::transpose(out, out);
331360
}
332-
else if (out.cols == num_classes + 5) {
333-
// yolo v5
334-
offset = 5;
335-
}
336361
else {
337-
CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION);
362+
CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.rows != num_classes + 4 && out.cols != num_classes + 5");
338363
}
364+
} else {
365+
CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.dims != 3");
339366
}
340367

341-
classes_scores.cols = out.cols - offset;
342-
343-
// Scan through all the bounding boxes output from the network and keep only the
344-
// ones with high confidence scores. Assign the box's class label as the class
345-
// with the highest score for the box.
346-
347-
float* data = (float*)out.data;
348-
349-
for (int i = 0; i < out.rows; ++i, data += out.cols)
350-
{
351-
if (offset == 5 && data[4] < confidence_threshold) {
352-
continue;
353-
}
354-
355-
classes_scores.data = reinterpret_cast<uchar*>(data + offset);
356-
357-
// Get the value and location of the maximum score
358-
double confidence;
359-
Point maxClassLoc;
360-
minMaxLoc(classes_scores, 0, &confidence, 0, &maxClassLoc);
361-
362-
if (confidence >= confidence_threshold)
363-
{
364-
double centerX = (double)data[0] * scale_x;
365-
double centerY = (double)data[1] * scale_y;
366-
double width = (double)data[2] * scale_x;
367-
double height = (double)data[3] * scale_y;
368-
double left = centerX - width / 2;
369-
double top = centerY - height / 2;
370-
371-
class_ids.push_back(maxClassLoc.x);
372-
confidences.push_back((float)confidence);
373-
bboxes.push_back(Rect2d(left, top, width, height));
374-
}
375-
}
368+
yolo_object_detection_postprocess(
369+
box_scale_w,
370+
box_scale_h,
371+
confidence_threshold,
372+
out,
373+
classes_scores,
374+
class_ids,
375+
confidences,
376+
bboxes,
377+
offset,
378+
background_label_id
379+
);
376380
}
377381
}
378382
else {

samples/dnn/object_detection/download_model.ps1

Lines changed: 7 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,37 +25,6 @@ if ([string]::IsNullOrEmpty($Zoo)) {
2525
$Zoo = Join-Path $PSScriptRoot "models.yml"
2626
}
2727

28-
function DownloadYOLOv5() {
29-
$onnx =[System.IO.Path]::GetFullPath("$Destination/$Model.onnx")
30-
31-
if ((Test-Path -Path "$onnx") -and -not $Force) {
32-
$onnx
33-
return
34-
}
35-
36-
if (!(Test-Path -Path "$Destination/yolov5")) {
37-
git clone "https://github.com/ultralytics/yolov5" "$Destination/yolov5"
38-
& cd "$Destination/yolov5"
39-
python -m pip install -r requirements.txt
40-
}
41-
42-
python "$Destination/yolov5/export.py" --include onnx --opset 12 --weights "$Model.pt"
43-
$onnx
44-
}
45-
46-
function DownloadYOLOv8() {
47-
$onnx =[System.IO.Path]::GetFullPath("$Destination/$Model.onnx")
48-
49-
if ((Test-Path -Path "$onnx") -and -not $Force) {
50-
$onnx
51-
return
52-
}
53-
54-
pip install ultralytics
55-
yolo export model=$Model.pt imgsz=640 format=onnx opset=12
56-
$onnx
57-
}
58-
5928
$DNN_ROOT_PATH = _OpenCV_FindFile -Path "samples/dnn" -SearchPaths @(
6029
"opencv\sources"
6130
"opencv-4.9.0-*\opencv\sources"
@@ -64,15 +33,12 @@ $DNN_ROOT_PATH = _OpenCV_FindFile -Path "samples/dnn" -SearchPaths @(
6433
$SAMPLES_PATH = _OpenCV_FindFile -Path "samples"
6534
$PYTHON_VENV_PATH = Join-Path $SAMPLES_PATH ".venv"
6635

67-
foreach($exe in (where.exe "$Python")) {
68-
$PythonCmd = Get-Command "$exe"
69-
# Torch is not yet supported on windows python 3.11
70-
if ($PythonCmd.Version.Major -ne 3 -or $PythonCmd.Version.Minor -ne 11) {
36+
if (!(Test-Path -Path $PYTHON_VENV_PATH)) {
37+
foreach($exe in (where.exe "$Python")) {
38+
$PythonCmd = Get-Command "$exe"
7139
break
7240
}
73-
}
7441

75-
if (!(Test-Path -Path $PYTHON_VENV_PATH)) {
7642
Write-Host "$($PythonCmd.Source) -m venv $PYTHON_VENV_PATH"
7743
& $PythonCmd.Source -m venv "$PYTHON_VENV_PATH"
7844
attrib +h "$PYTHON_VENV_PATH"
@@ -81,7 +47,7 @@ if (!(Test-Path -Path $PYTHON_VENV_PATH)) {
8147
& "$PYTHON_VENV_PATH\Scripts\Activate.ps1"
8248

8349
python -m pip install --upgrade pip
84-
pip install opencv-python PyYAML requests
50+
python -m pip install --upgrade opencv-python PyYAML requests
8551
} else {
8652
# Activate venv
8753
& "$PYTHON_VENV_PATH\Scripts\Activate.ps1"
@@ -92,12 +58,6 @@ if (!(Test-Path -Path $Destination)) {
9258
}
9359
cd "$Destination"
9460

95-
if ($Model.StartsWith("yolov5")) {
96-
DownloadYOLOv5
97-
} elseif ($Model.StartsWith("yolov8")) {
98-
DownloadYOLOv8
99-
} else {
100-
$Env:PYTHONPATH = "$DNN_ROOT_PATH"
101-
$script = Join-Path $PSScriptRoot download_model.py
102-
python $script $Model --zoo $Zoo
103-
}
61+
$Env:PYTHONPATH = "$DNN_ROOT_PATH"
62+
$script = Join-Path $PSScriptRoot download_model.py
63+
python $script $Model --zoo $Zoo

0 commit comments

Comments
 (0)