Skip to content

Commit b1eaae2

Browse files
committed
paddleseg bisenetv2
1 parent 6c622e4 commit b1eaae2

11 files changed

+215
-2
lines changed

README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ the following result can be obtained
112112
![ultra light weight face result](./data/images/endgame_result.jpg)
113113

114114
### [YoloX: high-performance anchor-free YOLO by Megvii](https://github.com/Megvii-BaseDetection/YOLOX)
115+
***
115116

116117
- Download onnx model trained on COCO dataset from [HERE](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime)
117118
```bash
@@ -126,3 +127,21 @@ wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yo
126127
```
127128
- Test results:
128129
![yolox result](./data/images/matrix_result.jpg)
130+
131+
### [Semantic Segmentation Paddle Seg](https://github.com/PaddlePaddle/PaddleSeg)
132+
***
133+
134+
- Download PaddleSeg's bisenetv2 trained on cityscapes dataset that has been converted to onnx [HERE](https://drive.google.com/file/d/1e-anuWG_ppDXmoy0sQ0sgrdutCTGlk95/view?usp=sharing)
135+
136+
```bash
137+
./build/examples/semantic_segmentation_duc ./data/bisenetv2_cityscapes.onnx ./sample_city_scapes.png
138+
./build/examples/semantic_segmentation_duc ./data/bisenetv2_cityscapes.onnx ./odaiba.jpg
139+
```
140+
141+
- Test results:
142+
+ test result on sample image of cityscapes dataset (this model is trained on cityscapes dataset)
143+
144+
![paddleseg city scapes](./data/images/sample_city_scapes_result.jpg)
145+
146+
+ test result on a new scene at Odaiba, Tokyo, Japan
147+
![paddleseg odaiba](./data/images/odaiba_result.jpg)

data/images/odaiba.jpg

188 KB
Loading

data/images/odaiba_result.jpg

340 KB
Loading

data/images/sample_city_scapes.png

2.12 MB
Loading
357 KB
Loading

examples/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,21 @@ target_include_directories(yolox
123123
PUBLIC
124124
${OpenCV_INCLUDE_DIRS}
125125
)
126+
127+
# ---------------------------------------------------------
128+
129+
add_executable(semantic_segmentation_paddleseg_bisenetv2
130+
${CMAKE_CURRENT_LIST_DIR}/SemanticSegmentationPaddleSegBisenetv2.cpp
131+
${CMAKE_CURRENT_LIST_DIR}/SemanticSegmentationPaddleSegBisenetv2App.cpp
132+
)
133+
134+
target_link_libraries(semantic_segmentation_paddleseg_bisenetv2
135+
PUBLIC
136+
${PROJECT_NAME}
137+
${OpenCV_LIBS}
138+
)
139+
140+
target_include_directories(semantic_segmentation_paddleseg_bisenetv2
141+
PUBLIC
142+
${OpenCV_INCLUDE_DIRS}
143+
)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/**
2+
* @file SemanticSegmentationPaddleSegBisenetv2.cpp
3+
*
4+
* @author btran
5+
*
6+
*/
7+
8+
#include "SemanticSegmentationPaddleSegBisenetv2.hpp"
9+
10+
namespace Ort
11+
{
12+
SemanticSegmentationPaddleSegBisenetv2::SemanticSegmentationPaddleSegBisenetv2(
13+
const uint16_t numClasses, //
14+
const std::string& modelPath, //
15+
const std::optional<size_t>& gpuIdx, const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
16+
: ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes)
17+
{
18+
}
19+
20+
void SemanticSegmentationPaddleSegBisenetv2::preprocess(float* dst, //
21+
const unsigned char* src, //
22+
int64_t targetImgWidth, //
23+
int64_t targetImgHeight, //
24+
int numChannels, //
25+
const std::vector<float>& meanVal, //
26+
const std::vector<float>& stdVal) const
27+
{
28+
for (int i = 0; i < targetImgHeight; ++i) {
29+
for (int j = 0; j < targetImgWidth; ++j) {
30+
for (int c = 0; c < numChannels; ++c) {
31+
dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
32+
(src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0 - meanVal[c]) / stdVal[c];
33+
}
34+
}
35+
}
36+
}
37+
} // namespace Ort
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/**
2+
* @file SemanticSegmentationPaddleSegBisenetv2.hpp
3+
*
4+
* @author btran
5+
*
6+
*/
7+
8+
#pragma once
9+
10+
#include <opencv2/opencv.hpp>
11+
12+
#include <ort_utility/ort_utility.hpp>
13+
14+
namespace Ort
15+
{
16+
class SemanticSegmentationPaddleSegBisenetv2 : public ImageRecognitionOrtSessionHandlerBase
17+
{
18+
public:
19+
static constexpr int64_t IMG_H = 1024;
20+
static constexpr int64_t IMG_W = 1024;
21+
static constexpr int64_t IMG_CHANNEL = 3;
22+
23+
SemanticSegmentationPaddleSegBisenetv2(
24+
const uint16_t numClasses, //
25+
const std::string& modelPath, //
26+
const std::optional<size_t>& gpuIdx = std::nullopt, //
27+
const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
28+
29+
void preprocess(float* dst, //
30+
const unsigned char* src, //
31+
int64_t targetImgWidth, //
32+
int64_t targetImgHeight, //
33+
int numChannels, //
34+
const std::vector<float>& meanVal = {0.5, 0.5, 0.5}, //
35+
const std::vector<float>& stdVal = {0.5, 0.5, 0.5}) const;
36+
};
37+
}; // namespace Ort
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* @file SemanticSegmentationPaddleSegBisenetv2App.cpp
3+
*
4+
* @author btran
5+
*
6+
*/
7+
8+
#include <ort_utility/ort_utility.hpp>
9+
10+
#include "SemanticSegmentationPaddleSegBisenetv2.hpp"
11+
#include "Utility.hpp"
12+
13+
namespace
14+
{
15+
cv::Mat processOneFrame(Ort::SemanticSegmentationPaddleSegBisenetv2& osh, const cv::Mat& inputImg, float* dst,
16+
float alpha = 0.4);
17+
static const std::vector<cv::Scalar> COLORS = toCvScalarColors(Ort::CITY_SCAPES_COLOR_CHART);
18+
} // namespace
19+
20+
int main(int argc, char* argv[])
21+
{
22+
if (argc != 3) {
23+
std::cerr << "Usage: [apps] [path/to/onnx/semantic/segmentation] [path/to/image]" << std::endl;
24+
return EXIT_FAILURE;
25+
}
26+
27+
const std::string ONNX_MODEL_PATH = argv[1];
28+
const std::string IMAGE_PATH = argv[2];
29+
30+
cv::Mat img = cv::imread(IMAGE_PATH);
31+
32+
if (img.empty()) {
33+
std::cerr << "Failed to read input image" << std::endl;
34+
return EXIT_FAILURE;
35+
}
36+
37+
Ort::SemanticSegmentationPaddleSegBisenetv2 osh(
38+
Ort::CITY_SCAPES_NUM_CLASSES, ONNX_MODEL_PATH, 0,
39+
std::vector<std::vector<int64_t>>{{1, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_CHANNEL,
40+
Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H,
41+
Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W}});
42+
43+
osh.initClassNames(Ort::CITY_SCAPES_CLASSES);
44+
std::vector<float> dst(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_CHANNEL *
45+
Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H *
46+
Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W);
47+
48+
auto result = processOneFrame(osh, img, dst.data());
49+
cv::imwrite("result.jpg", result);
50+
51+
return EXIT_SUCCESS;
52+
}
53+
54+
namespace
55+
{
56+
cv::Mat processOneFrame(Ort::SemanticSegmentationPaddleSegBisenetv2& osh, const cv::Mat& inputImg, float* dst,
57+
float alpha)
58+
{
59+
int origW = inputImg.cols, origH = inputImg.rows;
60+
std::vector<float> originImageSize{static_cast<float>(origH), static_cast<float>(origW)};
61+
cv::Mat scaledImg = inputImg.clone();
62+
cv::resize(inputImg, scaledImg,
63+
cv::Size(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W,
64+
Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H),
65+
0, 0, cv::INTER_CUBIC);
66+
cv::cvtColor(scaledImg, scaledImg, cv::COLOR_BGR2RGB);
67+
osh.preprocess(dst, scaledImg.data, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W,
68+
Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H, 3);
69+
auto inferenceOutput = osh({dst});
70+
71+
// tips: when you have done all the tricks but still get the wrong output result, try checking the type of inferenceOutput
72+
int64_t* data = reinterpret_cast<int64_t*>(inferenceOutput[0].first);
73+
cv::Mat segm(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W,
74+
CV_8UC(3));
75+
for (int i = 0; i < Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H; ++i) {
76+
cv::Vec3b* ptrSegm = segm.ptr<cv::Vec3b>(i);
77+
for (int j = 0; j < Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W; ++j) {
78+
const auto& color = COLORS[data[i * Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W + j]];
79+
ptrSegm[j] = cv::Vec3b(color[0], color[1], color[2]);
80+
}
81+
}
82+
83+
cv::resize(segm, segm, inputImg.size(), 0, 0, cv::INTER_NEAREST);
84+
segm = (1 - alpha) * inputImg + alpha * segm;
85+
return segm;
86+
}
87+
} // namespace

include/ort_utility/Constants.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,4 +1053,15 @@ static const std::vector<std::string> VOC_CLASSES = {
10531053
static constexpr int64_t VOC_NUM_CLASSES = 20;
10541054

10551055
static const std::vector<std::array<int, 3>> VOC_COLOR_CHART = generateColorCharts(VOC_NUM_CLASSES);
1056+
1057+
static const std::vector<std::string> CITY_SCAPES_CLASSES = {
1058+
"road", "sidewalk", "building", "wall", "fence", "pole", "traffic light", "traffic sign", "vegetation", "terrain",
1059+
"sky", "person", "rider", "car", "truck", "bus", "train", "motorcycle", "bicycle"};
1060+
1061+
static constexpr int64_t CITY_SCAPES_NUM_CLASSES = 19;
1062+
1063+
static const std::vector<std::array<int, 3>> CITY_SCAPES_COLOR_CHART = {
1064+
{128, 64, 128}, {244, 35, 232}, {70, 70, 70}, {102, 102, 156}, {190, 153, 153}, {153, 153, 153}, {250, 170, 30},
1065+
{220, 220, 0}, {107, 142, 35}, {152, 251, 152}, {70, 130, 180}, {220, 20, 60}, {255, 0, 0}, {0, 0, 142},
1066+
{0, 0, 70}, {0, 60, 100}, {0, 80, 100}, {0, 0, 230}, {119, 11, 32}};
10561067
} // namespace Ort

src/OrtSessionHandler.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,11 @@ OrtSessionHandler::OrtSessionHandlerIml::~OrtSessionHandlerIml()
189189

190190
void OrtSessionHandler::OrtSessionHandlerIml::initSession()
191191
{
192+
#if ENABLE_DEBUG
192193
m_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "test");
194+
#else
195+
m_env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "test");
196+
#endif
193197
Ort::SessionOptions sessionOptions;
194198

195199
// TODO: need to take care of the following line as it is related to CPU
@@ -269,8 +273,8 @@ void OrtSessionHandler::OrtSessionHandlerIml::initModelInfo()
269273
}
270274
}
271275

272-
std::vector<OrtSessionHandler::DataOutputType> OrtSessionHandler::OrtSessionHandlerIml::
273-
operator()(const std::vector<float*>& inputData)
276+
std::vector<OrtSessionHandler::DataOutputType>
277+
OrtSessionHandler::OrtSessionHandlerIml::operator()(const std::vector<float*>& inputData)
274278
{
275279
if (m_numInputs != inputData.size()) {
276280
throw std::runtime_error("Mismatch size of input data\n");

0 commit comments

Comments
 (0)