cardboardcode
diff --git a/‎README.md
Lines changed: 19 additions & 0 deletions b/‎README.md
Lines changed: 19 additions & 0 deletions
diff --git a/‎data/images/odaiba.jpg
188 KB b/‎data/images/odaiba.jpg
188 KB
diff --git a/‎data/images/odaiba_result.jpg
340 KB b/‎data/images/odaiba_result.jpg
340 KB
diff --git a/‎data/images/sample_city_scapes.png
2.12 MB b/‎data/images/sample_city_scapes.png
2.12 MB
diff --git a/‎data/images/sample_city_scapes_result.jpg
357 KB b/‎data/images/sample_city_scapes_result.jpg
357 KB
diff --git a/‎examples/CMakeLists.txt
Lines changed: 18 additions & 0 deletions b/‎examples/CMakeLists.txt
Lines changed: 18 additions & 0 deletions
diff --git a/‎examples/SemanticSegmentationPaddleSegBisenetv2.cpp
Lines changed: 37 additions & 0 deletions b/‎examples/SemanticSegmentationPaddleSegBisenetv2.cpp
Lines changed: 37 additions & 0 deletions
diff --git a/‎examples/SemanticSegmentationPaddleSegBisenetv2.hpp
Lines changed: 37 additions & 0 deletions b/‎examples/SemanticSegmentationPaddleSegBisenetv2.hpp
Lines changed: 37 additions & 0 deletions
diff --git a/‎examples/SemanticSegmentationPaddleSegBisenetv2App.cpp
Lines changed: 87 additions & 0 deletions b/‎examples/SemanticSegmentationPaddleSegBisenetv2App.cpp
Lines changed: 87 additions & 0 deletions
diff --git a/‎include/ort_utility/Constants.hpp
Lines changed: 11 additions & 0 deletions b/‎include/ort_utility/Constants.hpp
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/OrtSessionHandler.cpp
Lines changed: 6 additions & 2 deletions b/‎src/OrtSessionHandler.cpp
Lines changed: 6 additions & 2 deletions
@@ -112,6 +112,7 @@ the following result can be obtained
 ![ultra light weight face result](./data/images/endgame_result.jpg)
 
 ### [YoloX: high-performance anchor-free YOLO by Megvii](https://github.com/Megvii-BaseDetection/YOLOX)
+***
 
 - Download onnx model trained on COCO dataset from [HERE](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime)
 ```bash
@@ -126,3 +127,21 @@ wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yo
 ```
 - Test results:
 ![yolox result](./data/images/matrix_result.jpg)
+
+### [Semantic Segmentation Paddle Seg](https://github.com/PaddlePaddle/PaddleSeg)
+***
+
+- Download PaddleSeg's bisenetv2 trained on cityscapes dataset that has been converted to onnx [HERE](https://drive.google.com/file/d/1e-anuWG_ppDXmoy0sQ0sgrdutCTGlk95/view?usp=sharing)
+
+```bash
+./build/examples/semantic_segmentation_duc ./data/bisenetv2_cityscapes.onnx ./sample_city_scapes.png
+./build/examples/semantic_segmentation_duc ./data/bisenetv2_cityscapes.onnx ./odaiba.jpg
+```
+
+- Test results:
+    + test result on sample image of cityscapes dataset (this model is trained on cityscapes dataset)
+
+![paddleseg city scapes](./data/images/sample_city_scapes_result.jpg)
+
+    + test result on a new scene at Odaiba, Tokyo, Japan
+![paddleseg odaiba](./data/images/odaiba_result.jpg)
@@ -123,3 +123,21 @@ target_include_directories(yolox
   PUBLIC
     ${OpenCV_INCLUDE_DIRS}
 )
+
+# ---------------------------------------------------------
+
+add_executable(semantic_segmentation_paddleseg_bisenetv2
+  ${CMAKE_CURRENT_LIST_DIR}/SemanticSegmentationPaddleSegBisenetv2.cpp
+  ${CMAKE_CURRENT_LIST_DIR}/SemanticSegmentationPaddleSegBisenetv2App.cpp
+)
+
+target_link_libraries(semantic_segmentation_paddleseg_bisenetv2
+  PUBLIC
+    ${PROJECT_NAME}
+    ${OpenCV_LIBS}
+)
+
+target_include_directories(semantic_segmentation_paddleseg_bisenetv2
+  PUBLIC
+    ${OpenCV_INCLUDE_DIRS}
+)
@@ -0,0 +1,37 @@
+/**
+ * @file    SemanticSegmentationPaddleSegBisenetv2.cpp
+ *
+ * @author  btran
+ *
+ */
+
+#include "SemanticSegmentationPaddleSegBisenetv2.hpp"
+
+namespace Ort
+{
+SemanticSegmentationPaddleSegBisenetv2::SemanticSegmentationPaddleSegBisenetv2(
+    const uint16_t numClasses,     //
+    const std::string& modelPath,  //
+    const std::optional<size_t>& gpuIdx, const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
+    : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes)
+{
+}
+
+void SemanticSegmentationPaddleSegBisenetv2::preprocess(float* dst,                         //
+                                                        const unsigned char* src,           //
+                                                        int64_t targetImgWidth,             //
+                                                        int64_t targetImgHeight,            //
+                                                        int numChannels,                    //
+                                                        const std::vector<float>& meanVal,  //
+                                                        const std::vector<float>& stdVal) const
+{
+    for (int i = 0; i < targetImgHeight; ++i) {
+        for (int j = 0; j < targetImgWidth; ++j) {
+            for (int c = 0; c < numChannels; ++c) {
+                dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
+                    (src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0 - meanVal[c]) / stdVal[c];
+            }
+        }
+    }
+}
+}  // namespace Ort
@@ -0,0 +1,37 @@
+/**
+ * @file    SemanticSegmentationPaddleSegBisenetv2.hpp
+ *
+ * @author  btran
+ *
+ */
+
+#pragma once
+
+#include <opencv2/opencv.hpp>
+
+#include <ort_utility/ort_utility.hpp>
+
+namespace Ort
+{
+class SemanticSegmentationPaddleSegBisenetv2 : public ImageRecognitionOrtSessionHandlerBase
+{
+ public:
+    static constexpr int64_t IMG_H = 1024;
+    static constexpr int64_t IMG_W = 1024;
+    static constexpr int64_t IMG_CHANNEL = 3;
+
+    SemanticSegmentationPaddleSegBisenetv2(
+        const uint16_t numClasses,                           //
+        const std::string& modelPath,                        //
+        const std::optional<size_t>& gpuIdx = std::nullopt,  //
+        const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
+
+    void preprocess(float* dst,                                           //
+                    const unsigned char* src,                             //
+                    int64_t targetImgWidth,                               //
+                    int64_t targetImgHeight,                              //
+                    int numChannels,                                      //
+                    const std::vector<float>& meanVal = {0.5, 0.5, 0.5},  //
+                    const std::vector<float>& stdVal = {0.5, 0.5, 0.5}) const;
+};
+};  // namespace Ort
@@ -0,0 +1,87 @@
+/**
+ * @file    SemanticSegmentationPaddleSegBisenetv2App.cpp
+ *
+ * @author  btran
+ *
+ */
+
+#include <ort_utility/ort_utility.hpp>
+
+#include "SemanticSegmentationPaddleSegBisenetv2.hpp"
+#include "Utility.hpp"
+
+namespace
+{
+cv::Mat processOneFrame(Ort::SemanticSegmentationPaddleSegBisenetv2& osh, const cv::Mat& inputImg, float* dst,
+                        float alpha = 0.4);
+static const std::vector<cv::Scalar> COLORS = toCvScalarColors(Ort::CITY_SCAPES_COLOR_CHART);
+}  // namespace
+
+int main(int argc, char* argv[])
+{
+    if (argc != 3) {
+        std::cerr << "Usage: [apps] [path/to/onnx/semantic/segmentation] [path/to/image]" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    const std::string ONNX_MODEL_PATH = argv[1];
+    const std::string IMAGE_PATH = argv[2];
+
+    cv::Mat img = cv::imread(IMAGE_PATH);
+
+    if (img.empty()) {
+        std::cerr << "Failed to read input image" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    Ort::SemanticSegmentationPaddleSegBisenetv2 osh(
+        Ort::CITY_SCAPES_NUM_CLASSES, ONNX_MODEL_PATH, 0,
+        std::vector<std::vector<int64_t>>{{1, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_CHANNEL,
+                                           Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H,
+                                           Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W}});
+
+    osh.initClassNames(Ort::CITY_SCAPES_CLASSES);
+    std::vector<float> dst(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_CHANNEL *
+                           Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H *
+                           Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W);
+
+    auto result = processOneFrame(osh, img, dst.data());
+    cv::imwrite("result.jpg", result);
+
+    return EXIT_SUCCESS;
+}
+
+namespace
+{
+cv::Mat processOneFrame(Ort::SemanticSegmentationPaddleSegBisenetv2& osh, const cv::Mat& inputImg, float* dst,
+                        float alpha)
+{
+    int origW = inputImg.cols, origH = inputImg.rows;
+    std::vector<float> originImageSize{static_cast<float>(origH), static_cast<float>(origW)};
+    cv::Mat scaledImg = inputImg.clone();
+    cv::resize(inputImg, scaledImg,
+               cv::Size(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W,
+                        Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H),
+               0, 0, cv::INTER_CUBIC);
+    cv::cvtColor(scaledImg, scaledImg, cv::COLOR_BGR2RGB);
+    osh.preprocess(dst, scaledImg.data, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W,
+                   Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H, 3);
+    auto inferenceOutput = osh({dst});
+
+    // tips: when you have done all the tricks but still get the wrong output result, try checking the type of inferenceOutput
+    int64_t* data = reinterpret_cast<int64_t*>(inferenceOutput[0].first);
+    cv::Mat segm(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W,
+                 CV_8UC(3));
+    for (int i = 0; i < Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H; ++i) {
+        cv::Vec3b* ptrSegm = segm.ptr<cv::Vec3b>(i);
+        for (int j = 0; j < Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W; ++j) {
+            const auto& color = COLORS[data[i * Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W + j]];
+            ptrSegm[j] = cv::Vec3b(color[0], color[1], color[2]);
+        }
+    }
+
+    cv::resize(segm, segm, inputImg.size(), 0, 0, cv::INTER_NEAREST);
+    segm = (1 - alpha) * inputImg + alpha * segm;
+    return segm;
+}
+}  // namespace
@@ -1053,4 +1053,15 @@ static const std::vector<std::string> VOC_CLASSES = {
 static constexpr int64_t VOC_NUM_CLASSES = 20;
 
 static const std::vector<std::array<int, 3>> VOC_COLOR_CHART = generateColorCharts(VOC_NUM_CLASSES);
+
+static const std::vector<std::string> CITY_SCAPES_CLASSES = {
+    "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light", "traffic sign", "vegetation", "terrain",
+    "sky",  "person",   "rider",    "car",  "truck", "bus",  "train",         "motorcycle",   "bicycle"};
+
+static constexpr int64_t CITY_SCAPES_NUM_CLASSES = 19;
+
+static const std::vector<std::array<int, 3>> CITY_SCAPES_COLOR_CHART = {
+    {128, 64, 128}, {244, 35, 232}, {70, 70, 70},    {102, 102, 156}, {190, 153, 153}, {153, 153, 153}, {250, 170, 30},
+    {220, 220, 0},  {107, 142, 35}, {152, 251, 152}, {70, 130, 180},  {220, 20, 60},   {255, 0, 0},     {0, 0, 142},
+    {0, 0, 70},     {0, 60, 100},   {0, 80, 100},    {0, 0, 230},     {119, 11, 32}};
 }  // namespace Ort
@@ -189,7 +189,11 @@ OrtSessionHandler::OrtSessionHandlerIml::~OrtSessionHandlerIml()
 
 void OrtSessionHandler::OrtSessionHandlerIml::initSession()
 {
+#if ENABLE_DEBUG
     m_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "test");
+#else
+    m_env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "test");
+#endif
     Ort::SessionOptions sessionOptions;
 
     // TODO: need to take care of the following line as it is related to CPU
@@ -269,8 +273,8 @@ void OrtSessionHandler::OrtSessionHandlerIml::initModelInfo()
     }
 }
 
-std::vector<OrtSessionHandler::DataOutputType> OrtSessionHandler::OrtSessionHandlerIml::
-operator()(const std::vector<float*>& inputData)
+std::vector<OrtSessionHandler::DataOutputType>
+OrtSessionHandler::OrtSessionHandlerIml::operator()(const std::vector<float*>& inputData)
 {
     if (m_numInputs != inputData.size()) {
         throw std::runtime_error("Mismatch size of input data\n");
Original file line number	Diff line number	Diff line change
`@@ -189,7 +189,11 @@ OrtSessionHandler::OrtSessionHandlerIml::~OrtSessionHandlerIml()`
`189`	`189`
`190`	`190`	`void OrtSessionHandler::OrtSessionHandlerIml::initSession()`
`191`	`191`	`{`
	`192`	`+#if ENABLE_DEBUG`
`192`	`193`	`m_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "test");`
	`194`	`+#else`
	`195`	`+ m_env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "test");`
	`196`	`+#endif`
`193`	`197`	`Ort::SessionOptions sessionOptions;`
`194`	`198`
`195`	`199`	`// TODO: need to take care of the following line as it is related to CPU`
`@@ -269,8 +273,8 @@ void OrtSessionHandler::OrtSessionHandlerIml::initModelInfo()`
`269`	`273`	`}`
`270`	`274`	`}`
`271`	`275`
`272`		`-std::vector<OrtSessionHandler::DataOutputType> OrtSessionHandler::OrtSessionHandlerIml::`
`273`		`-operator()(const std::vector<float*>& inputData)`
	`276`	`+std::vector<OrtSessionHandler::DataOutputType>`
	`277`	`+OrtSessionHandler::OrtSessionHandlerIml::operator()(const std::vector<float*>& inputData)`
`274`	`278`	`{`
`275`	`279`	`if (m_numInputs != inputData.size()) {`
`276`	`280`	`throw std::runtime_error("Mismatch size of input data\n");`