smbape
diff --git a/‎autoit-addon/addon.cpp‎
Lines changed: 112 additions & 108 deletions b/‎autoit-addon/addon.cpp‎
Lines changed: 112 additions & 108 deletions
diff --git a/‎samples/dnn/object_detection/download_model.ps1‎
Lines changed: 7 additions & 47 deletions b/‎samples/dnn/object_detection/download_model.ps1‎
Lines changed: 7 additions & 47 deletions
@@ -81,7 +81,7 @@ void AKAZE_homograpy_check(
 	}
 }
 
-#define UNSUPPORTED_YOLO_VERSION "Unsupported yolo version. Supported versions are v3, v5, v8."
+#define UNSUPPORTED_YOLO_VERSION "Unsupported yolo version. Supported versions are v3, v4, v5, v6, v7, v8."
 
 void yolo_postprocess(
 	const int spatial_width,
@@ -104,7 +104,7 @@ void yolo_postprocess(
 	for (auto out : outs)
 	{
 		int offset;
-		float scale_x, scale_y;
+		float box_scale_w, box_scale_h;
 
 		if (out.dims != 2 && out.dims != 3) {
 			CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.dims != 2 && out.dims != 3");
@@ -120,17 +120,17 @@ void yolo_postprocess(
 			}
 
 			// relative coordinates
-			scale_x = (float)img_width * scale;
-			scale_y = (float)img_height * scale;
+			box_scale_w = (float)img_width * scale;
+			box_scale_h = (float)img_height * scale;
 		}
 		else {
 			if (out.size[0] != 1) {
 				CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.size[0] != 1");
 			}
 
 			out = out.reshape(1, out.size[1]);
-			scale_x = (float)img_width / spatial_width * scale;
-			scale_y = (float)img_height / spatial_height * scale;
+			box_scale_w = (float)img_width / spatial_width * scale;
+			box_scale_h = (float)img_height / spatial_height * scale;
 
 			// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
 			// yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
@@ -169,10 +169,10 @@ void yolo_postprocess(
 
 			if (maxScore >= score_threshold)
 			{
-				double centerX = (double)data[0] * scale_x;
-				double centerY = (double)data[1] * scale_y;
-				double width = (double)data[2] * scale_x;
-				double height = (double)data[3] * scale_y;
+				double centerX = (double)data[0] * box_scale_w;
+				double centerY = (double)data[1] * box_scale_h;
+				double width = (double)data[2] * box_scale_w;
+				double height = (double)data[3] * box_scale_h;
 				double left = centerX - width / 2;
 				double top = centerY - height / 2;
 
@@ -184,6 +184,58 @@ void yolo_postprocess(
 	}
 }
 
+namespace {
+	void yolo_object_detection_postprocess(
+		const float box_scale_w,
+		const float box_scale_h,
+		const float confidence_threshold,
+		const cv::Mat& out,
+		cv::Mat& classes_scores,
+		std::vector<int>& class_ids,
+		std::vector<float>& confidences,
+		std::vector<cv::Rect2d>& bboxes,
+		const int offset,
+		const int background_label_id
+	)
+	{
+		classes_scores.cols = out.cols - offset;
+
+		// Scan through all the bounding boxes output from the network and keep only the
+		// ones with high confidence scores. Assign the box's class label as the class
+		// with the highest score for the box.
+
+		float* detection = (float*) out.data;
+
+		for (int i = 0; i < out.rows; ++i, detection += out.cols)
+		{
+			if (background_label_id < 0 && offset == 5 && detection[4] < confidence_threshold) {
+				continue;
+			}
+
+			classes_scores.data = reinterpret_cast<uchar*>(detection + offset);
+
+			// Get the value and location of the maximum score
+			double confidence;
+			Point maxClassLoc;
+			minMaxLoc(classes_scores, 0, &confidence, 0, &maxClassLoc);
+			if (confidence <= confidence_threshold) {
+				continue;
+			}
+
+			double centerX = (double)detection[0] * box_scale_w;
+			double centerY = (double)detection[1] * box_scale_h;
+			double width = (double)detection[2] * box_scale_w;
+			double height = (double)detection[3] * box_scale_h;
+			double left = centerX - width / 2;
+			double top = centerY - height / 2;
+
+			class_ids.push_back(maxClassLoc.x);
+			confidences.push_back((float)confidence);
+			bboxes.push_back(Rect2d(left, top, width, height));
+		}
+	}
+}
+
 void object_detection_postprocess(
 	const cv::dnn::Net& net,
 	const int inpWidth,
@@ -204,7 +256,7 @@ void object_detection_postprocess(
 	auto outLayerType = lastLayer->type;
 
 	Mat classes_scores(1, 0, CV_32FC1);
-	float scale_x, scale_y;
+	float box_scale_w, box_scale_h;
 
 	if (outLayerType == "DetectionOutput")
 	{
@@ -228,19 +280,19 @@ void object_detection_postprocess(
 
 				if (data[i + 5] - data[i + 3] < 1) {
 					// relative coordinates
-					scale_x = inpWidth * imgScale;
-					scale_y = inpHeight * imgScale;
+					box_scale_w = inpWidth * imgScale;
+					box_scale_h = inpHeight * imgScale;
 				}
 				else {
 					// absolute coordinate
-					scale_x = imgScale;
-					scale_y = imgScale;
+					box_scale_w = imgScale;
+					box_scale_h = imgScale;
 				}
 
-				double left = (double)data[i + 3] * scale_x;
-				double top = (double)data[i + 4] * scale_y;
-				double width = (double)data[i + 5] * scale_x - left + 1;
-				double height = (double)data[i + 6] * scale_y - top + 1;
+				double left = (double)data[i + 3] * box_scale_w;
+				double top = (double)data[i + 4] * box_scale_h;
+				double width = (double)data[i + 5] * box_scale_w - left + 1;
+				double height = (double)data[i + 6] * box_scale_h - top + 1;
 
 				int class_id = (int)(data[i + 1]);
 				if (background_label_id >= 0 && background_label_id <= class_id) {
@@ -254,125 +306,77 @@ void object_detection_postprocess(
 	}
 	else if (outLayerType == "Region")
 	{
-		// yolo v4
+		// yolo v3, v4
 
 		// relative coordinates
-		scale_x = inpWidth * imgScale;
-		scale_y = inpHeight * imgScale;
+		box_scale_w = inpWidth * imgScale;
+		box_scale_h = inpHeight * imgScale;
+		int offset = 5;
 
 		// Network produces output blob with a shape NxC where N is a number of
 		// detected objects and C is a number of classes + 4 where the first 4
 		// numbers are [center_x, center_y, width, height]
 		for (auto out : outs)
 		{
-			classes_scores.cols = out.cols - 5;
-			float* data = (float*)out.data;
-
-			for (int j = 0; j < out.rows; ++j, data += out.cols)
-			{
-				classes_scores.data = reinterpret_cast<uchar*>(data + 5);
-
-				// Get the value and location of the maximum score
-				double confidence;
-				Point maxClassLoc;
-				minMaxLoc(classes_scores, 0, &confidence, 0, &maxClassLoc);
-				if (confidence <= confidence_threshold) {
-					continue;
-				}
-
-				double centerX = (double)data[0] * scale_x;
-				double centerY = (double)data[1] * scale_y;
-				double width = (double)data[2] * scale_x;
-				double height = (double)data[3] * scale_y;
-				double left = centerX - width / 2;
-				double top = centerY - height / 2;
-
-				class_ids.push_back(maxClassLoc.x);
-				confidences.push_back((float)confidence);
-				bboxes.push_back(Rect2d(left, top, width, height));
-			}
+			yolo_object_detection_postprocess(
+				box_scale_w,
+				box_scale_h,
+				confidence_threshold,
+				out,
+				classes_scores,
+				class_ids,
+				confidences,
+				bboxes,
+				offset,
+				background_label_id
+			);
 		}
 	}
 	else if (outLayerType == "Identity") {
 		for (auto out : outs)
 		{
 			int offset;
-			float scale_x, scale_y;
-
-			if (out.dims != 2 && out.dims != 3) {
-				CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.dims != 2 && out.dims != 3");
-			}
+			float box_scale_w, box_scale_h;
 
-			if (out.dims == 2) {
-				// yolo v3
-				offset = 5;
-
-				// relative coordinates
-				scale_x = inpWidth * imgScale;
-				scale_y = inpHeight * imgScale;
-			}
-			else {
+			if (out.dims == 3) {
 				if (out.size[0] != 1) {
 					CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.size[0] != 1");
 				}
 
 				out = out.reshape(1, out.size[1]);
 
 				// absolute coordinate
-				scale_x = imgScale;
-				scale_y = imgScale;
+				box_scale_w = imgScale;
+				box_scale_h = imgScale;
 
-				// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
-				// yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
-				if (out.rows == num_classes + 4) {
-					// yolo v8
+				if (out.cols == num_classes + 5) {
+					// yolo v5, v6, v7 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
+					offset = 5;
+				}
+				else if (out.rows == num_classes + 4) {
+					// yolo v8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
 					offset = 4;
 					cv::transpose(out, out);
 				}
-				else if (out.cols == num_classes + 5) {
-					// yolo v5
-					offset = 5;
-				}
 				else {
-					CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION);
+					CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.rows != num_classes + 4 && out.cols != num_classes + 5");
 				}
+			} else {
+				CV_Error(cv::Error::StsAssert, UNSUPPORTED_YOLO_VERSION " out.dims != 3");
 			}
 
-			classes_scores.cols = out.cols - offset;
-
-			// Scan through all the bounding boxes output from the network and keep only the
-			// ones with high confidence scores. Assign the box's class label as the class
-			// with the highest score for the box.
-
-			float* data = (float*)out.data;
-
-			for (int i = 0; i < out.rows; ++i, data += out.cols)
-			{
-				if (offset == 5 && data[4] < confidence_threshold) {
-					continue;
-				}
-
-				classes_scores.data = reinterpret_cast<uchar*>(data + offset);
-
-				// Get the value and location of the maximum score
-				double confidence;
-				Point maxClassLoc;
-				minMaxLoc(classes_scores, 0, &confidence, 0, &maxClassLoc);
-
-				if (confidence >= confidence_threshold)
-				{
-					double centerX = (double)data[0] * scale_x;
-					double centerY = (double)data[1] * scale_y;
-					double width = (double)data[2] * scale_x;
-					double height = (double)data[3] * scale_y;
-					double left = centerX - width / 2;
-					double top = centerY - height / 2;
-
-					class_ids.push_back(maxClassLoc.x);
-					confidences.push_back((float)confidence);
-					bboxes.push_back(Rect2d(left, top, width, height));
-				}
-			}
+			yolo_object_detection_postprocess(
+				box_scale_w,
+				box_scale_h,
+				confidence_threshold,
+				out,
+				classes_scores,
+				class_ids,
+				confidences,
+				bboxes,
+				offset,
+				background_label_id
+			);
 		}
 	}
 	else {
 
@@ -25,37 +25,6 @@ if ([string]::IsNullOrEmpty($Zoo)) {
     $Zoo = Join-Path $PSScriptRoot "models.yml"
 }
 
-function DownloadYOLOv5() {
-    $onnx =[System.IO.Path]::GetFullPath("$Destination/$Model.onnx")
-
-    if ((Test-Path -Path "$onnx") -and -not $Force) {
-        $onnx
-        return
-    }
-
-    if (!(Test-Path -Path "$Destination/yolov5")) {
-        git clone "https://github.com/ultralytics/yolov5" "$Destination/yolov5"
-        & cd "$Destination/yolov5"
-        python -m pip install -r requirements.txt
-    }
-
-    python "$Destination/yolov5/export.py" --include onnx --opset 12 --weights "$Model.pt"
-    $onnx
-}
-
-function DownloadYOLOv8() {
-    $onnx =[System.IO.Path]::GetFullPath("$Destination/$Model.onnx")
-
-    if ((Test-Path -Path "$onnx") -and -not $Force) {
-        $onnx
-        return
-    }
-
-    pip install ultralytics
-    yolo export model=$Model.pt imgsz=640 format=onnx opset=12
-    $onnx
-}
-
 $DNN_ROOT_PATH = _OpenCV_FindFile -Path "samples/dnn" -SearchPaths @(
     "opencv\sources"
     "opencv-4.9.0-*\opencv\sources"
@@ -64,15 +33,12 @@ $DNN_ROOT_PATH = _OpenCV_FindFile -Path "samples/dnn" -SearchPaths @(
 $SAMPLES_PATH = _OpenCV_FindFile -Path "samples"
 $PYTHON_VENV_PATH = Join-Path $SAMPLES_PATH ".venv"
 
-foreach($exe in (where.exe "$Python")) {
-    $PythonCmd = Get-Command "$exe"
-    # Torch is not yet supported on windows python 3.11
-    if ($PythonCmd.Version.Major -ne 3 -or $PythonCmd.Version.Minor -ne 11) {
+if (!(Test-Path -Path $PYTHON_VENV_PATH)) {
+    foreach($exe in (where.exe "$Python")) {
+        $PythonCmd = Get-Command "$exe"
         break
     }
-}
 
-if (!(Test-Path -Path $PYTHON_VENV_PATH)) {
     Write-Host "$($PythonCmd.Source) -m venv $PYTHON_VENV_PATH"
     & $PythonCmd.Source -m venv "$PYTHON_VENV_PATH"
     attrib +h "$PYTHON_VENV_PATH"
@@ -81,7 +47,7 @@ if (!(Test-Path -Path $PYTHON_VENV_PATH)) {
     & "$PYTHON_VENV_PATH\Scripts\Activate.ps1"
 
     python -m pip install --upgrade pip
-    pip install opencv-python PyYAML requests
+    python -m pip install --upgrade opencv-python PyYAML requests
 } else {
     # Activate venv
     & "$PYTHON_VENV_PATH\Scripts\Activate.ps1"
@@ -92,12 +58,6 @@ if (!(Test-Path -Path $Destination)) {
 }
 cd "$Destination"
 
-if ($Model.StartsWith("yolov5")) {
-    DownloadYOLOv5
-} elseif ($Model.StartsWith("yolov8")) {
-    DownloadYOLOv8
-} else {
-    $Env:PYTHONPATH = "$DNN_ROOT_PATH"
-    $script = Join-Path $PSScriptRoot download_model.py
-    python $script $Model --zoo $Zoo
-}
+$Env:PYTHONPATH = "$DNN_ROOT_PATH"
+$script = Join-Path $PSScriptRoot download_model.py
+python $script $Model --zoo $Zoo