1+ #define _CRT_SECURE_NO_WARNINGS
2+ #include < iostream>
3+ #include < fstream>
4+ #include < string>
5+ #include < math.h>
6+ #include < opencv2/imgproc.hpp>
7+ #include < opencv2/highgui.hpp>
8+ // #include <cuda_provider_factory.h>
9+ #include < onnxruntime_cxx_api.h>
10+
11+ using namespace cv ;
12+ using namespace std ;
13+ using namespace Ort ;
14+
15+ typedef struct BoxInfo
16+ {
17+ float x1;
18+ float y1;
19+ float x2;
20+ float y2;
21+ float score;
22+ int label;
23+ } BoxInfo;
24+
25+ class NanoDet_Plus
26+ {
27+ public:
28+ NanoDet_Plus (string model_path, string classesFile, float nms_threshold, float objThreshold);
29+ void detect (Mat& cv_image);
30+ private:
31+ float score_threshold = 0.5 ;
32+ float nms_threshold = 0.5 ;
33+ vector<string> class_names;
34+ int num_class;
35+
36+ Mat resize_image (Mat srcimg, int *newh, int *neww, int *top, int *left);
37+ vector<float > input_image_;
38+ void normalize_ (Mat img);
39+ void softmax_ (const float * x, float * y, int length);
40+ void generate_proposal (vector<BoxInfo>& generate_boxes, const float * preds);
41+ void nms (vector<BoxInfo>& input_boxes);
42+ const bool keep_ratio = false ;
43+ int inpWidth;
44+ int inpHeight;
45+ int reg_max;
46+ const int num_stages = 4 ;
47+ const int stride[4 ] = { 8 ,16 ,32 ,64 };
48+ const float mean[3 ] = { 103.53 , 116.28 , 123.675 };
49+ const float stds[3 ] = { 57.375 , 57.12 , 58.395 };
50+
51+ Env env = Env(ORT_LOGGING_LEVEL_ERROR, " nanodetplus" );
52+ Ort::Session *ort_session = nullptr ;
53+ SessionOptions sessionOptions = SessionOptions();
54+ vector<char *> input_names;
55+ vector<char *> output_names;
56+ vector<vector<int64_t >> input_node_dims; // >=1 outputs
57+ vector<vector<int64_t >> output_node_dims; // >=1 outputs
58+ };
59+
60+ NanoDet_Plus::NanoDet_Plus (string model_path, string classesFile, float nms_threshold, float objThreshold)
61+ {
62+ ifstream ifs (classesFile.c_str ());
63+ string line;
64+ while (getline (ifs, line)) this ->class_names .push_back (line);
65+ this ->num_class = class_names.size ();
66+ this ->nms_threshold = nms_threshold;
67+ this ->score_threshold = objThreshold;
68+
69+ std::wstring widestr = std::wstring (model_path.begin (), model_path.end ());
70+ // OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);
71+ sessionOptions.SetGraphOptimizationLevel (ORT_ENABLE_BASIC);
72+ ort_session = new Session (env, widestr.c_str (), sessionOptions);
73+ size_t numInputNodes = ort_session->GetInputCount ();
74+ size_t numOutputNodes = ort_session->GetOutputCount ();
75+ AllocatorWithDefaultOptions allocator;
76+ for (int i = 0 ; i < numInputNodes; i++)
77+ {
78+ input_names.push_back (ort_session->GetInputName (i, allocator));
79+ Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo (i);
80+ auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo ();
81+ auto input_dims = input_tensor_info.GetShape ();
82+ input_node_dims.push_back (input_dims);
83+ }
84+ for (int i = 0 ; i < numOutputNodes; i++)
85+ {
86+ output_names.push_back (ort_session->GetOutputName (i, allocator));
87+ Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo (i);
88+ auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo ();
89+ auto output_dims = output_tensor_info.GetShape ();
90+ output_node_dims.push_back (output_dims);
91+ /* for (int j = 0; j < output_dims.size(); j++)
92+ {
93+ cout << output_dims[j] << ",";
94+ }
95+ cout << endl;*/
96+ }
97+ this ->inpHeight = input_node_dims[0 ][2 ];
98+ this ->inpWidth = input_node_dims[0 ][3 ];
99+ this ->reg_max = (output_node_dims[0 ][output_node_dims[0 ].size () - 1 ] - this ->num_class ) / 4 - 1 ;
100+ }
101+
102+ Mat NanoDet_Plus::resize_image (Mat srcimg, int *newh, int *neww, int *top, int *left)
103+ {
104+ int srch = srcimg.rows , srcw = srcimg.cols ;
105+ *newh = this ->inpHeight ;
106+ *neww = this ->inpWidth ;
107+ Mat dstimg;
108+ if (this ->keep_ratio && srch != srcw) {
109+ float hw_scale = (float )srch / srcw;
110+ if (hw_scale > 1 ) {
111+ *newh = this ->inpHeight ;
112+ *neww = int (this ->inpWidth / hw_scale);
113+ resize (srcimg, dstimg, Size (*neww, *newh), INTER_AREA);
114+ *left = int ((this ->inpWidth - *neww) * 0.5 );
115+ copyMakeBorder (dstimg, dstimg, 0 , 0 , *left, this ->inpWidth - *neww - *left, BORDER_CONSTANT, 0 );
116+ }
117+ else {
118+ *newh = (int )this ->inpHeight * hw_scale;
119+ *neww = this ->inpWidth ;
120+ resize (srcimg, dstimg, Size (*neww, *newh), INTER_AREA);
121+ *top = (int )(this ->inpHeight - *newh) * 0.5 ;
122+ copyMakeBorder (dstimg, dstimg, *top, this ->inpHeight - *newh - *top, 0 , 0 , BORDER_CONSTANT, 0 );
123+ }
124+ }
125+ else {
126+ resize (srcimg, dstimg, Size (*neww, *newh), INTER_AREA);
127+ }
128+ return dstimg;
129+ }
130+
131+ void NanoDet_Plus::normalize_ (Mat img)
132+ {
133+ // img.convertTo(img, CV_32F);
134+ int row = img.rows ;
135+ int col = img.cols ;
136+ this ->input_image_ .resize (row * col * img.channels ());
137+ for (int c = 0 ; c < 3 ; c++)
138+ {
139+ for (int i = 0 ; i < row; i++)
140+ {
141+ for (int j = 0 ; j < col; j++)
142+ {
143+ float pix = img.ptr <uchar>(i)[j * 3 + c];
144+ // this->input_image_[c * row * col + i * col + j] = (pix / 255.0 - mean[c] / 255.0) / (stds[c] / 255.0);
145+ this ->input_image_ [c * row * col + i * col + j] = (pix - mean[c]) / stds[c];
146+ }
147+ }
148+ }
149+ }
150+
151+ void NanoDet_Plus::softmax_ (const float * x, float * y, int length)
152+ {
153+ float sum = 0 ;
154+ int i = 0 ;
155+ for (i = 0 ; i < length; i++)
156+ {
157+ y[i] = exp (x[i]);
158+ sum += y[i];
159+ }
160+ for (i = 0 ; i < length; i++)
161+ {
162+ y[i] /= sum;
163+ }
164+ }
165+
166+ void NanoDet_Plus::generate_proposal (vector<BoxInfo>& generate_boxes, const float * preds)
167+ {
168+ const int reg_1max = reg_max + 1 ;
169+ const int len = this ->num_class + 4 * reg_1max;
170+ for (int n = 0 ; n < this ->num_stages ; n++)
171+ {
172+ const int stride_ = this ->stride [n];
173+ const int num_grid_y = (int )ceil ((float )this ->inpHeight / stride_);
174+ const int num_grid_x = (int )ceil ((float )this ->inpWidth / stride_);
175+ // //cout << "num_grid_x=" << num_grid_x << ",num_grid_y=" << num_grid_y << endl;
176+
177+ for (int i = 0 ; i < num_grid_y; i++)
178+ {
179+ for (int j = 0 ; j < num_grid_x; j++)
180+ {
181+ int max_ind = 0 ;
182+ float max_score = 0 ;
183+ for (int k = 0 ; k < num_class; k++)
184+ {
185+ if (preds[k] > max_score)
186+ {
187+ max_score = preds[k];
188+ max_ind = k;
189+ }
190+ }
191+ if (max_score >= score_threshold)
192+ {
193+ const float * pbox = preds + this ->num_class ;
194+ float dis_pred[4 ];
195+ float * y = new float [reg_1max];
196+ for (int k = 0 ; k < 4 ; k++)
197+ {
198+ softmax_ (pbox + k * reg_1max, y, reg_1max);
199+ float dis = 0 .f ;
200+ for (int l = 0 ; l < reg_1max; l++)
201+ {
202+ dis += l * y[l];
203+ }
204+ dis_pred[k] = dis * stride_;
205+ }
206+ delete[] y;
207+ /* float pb_cx = (j + 0.5f) * stride_ - 0.5;
208+ float pb_cy = (i + 0.5f) * stride_ - 0.5;*/
209+ float pb_cx = j * stride_ ;
210+ float pb_cy = i * stride_;
211+ float x0 = pb_cx - dis_pred[0 ];
212+ float y0 = pb_cy - dis_pred[1 ];
213+ float x1 = pb_cx + dis_pred[2 ];
214+ float y1 = pb_cy + dis_pred[3 ];
215+ generate_boxes.push_back (BoxInfo{ x0, y0, x1, y1, max_score, max_ind });
216+ }
217+ preds += len;
218+ }
219+ }
220+ }
221+
222+ }
223+
224+ void NanoDet_Plus::nms (vector<BoxInfo>& input_boxes)
225+ {
226+ sort (input_boxes.begin (), input_boxes.end (), [](BoxInfo a, BoxInfo b) { return a.score > b.score ; });
227+ vector<float > vArea (input_boxes.size ());
228+ for (int i = 0 ; i < int (input_boxes.size ()); ++i)
229+ {
230+ vArea[i] = (input_boxes.at (i).x2 - input_boxes.at (i).x1 + 1 )
231+ * (input_boxes.at (i).y2 - input_boxes.at (i).y1 + 1 );
232+ }
233+
234+ vector<bool > isSuppressed (input_boxes.size (), false );
235+ for (int i = 0 ; i < int (input_boxes.size ()); ++i)
236+ {
237+ if (isSuppressed[i]) { continue ; }
238+ for (int j = i + 1 ; j < int (input_boxes.size ()); ++j)
239+ {
240+ if (isSuppressed[j]) { continue ; }
241+ float xx1 = (max)(input_boxes[i].x1 , input_boxes[j].x1 );
242+ float yy1 = (max)(input_boxes[i].y1 , input_boxes[j].y1 );
243+ float xx2 = (min)(input_boxes[i].x2 , input_boxes[j].x2 );
244+ float yy2 = (min)(input_boxes[i].y2 , input_boxes[j].y2 );
245+
246+ float w = (max)(float (0 ), xx2 - xx1 + 1 );
247+ float h = (max)(float (0 ), yy2 - yy1 + 1 );
248+ float inter = w * h;
249+ float ovr = inter / (vArea[i] + vArea[j] - inter);
250+
251+ if (ovr >= this ->nms_threshold )
252+ {
253+ isSuppressed[j] = true ;
254+ }
255+ }
256+ }
257+ // return post_nms;
258+ int idx_t = 0 ;
259+ input_boxes.erase (remove_if (input_boxes.begin (), input_boxes.end (), [&idx_t , &isSuppressed](const BoxInfo& f) { return isSuppressed[idx_t ++]; }), input_boxes.end ());
260+ }
261+
262+ void NanoDet_Plus::detect (Mat& srcimg)
263+ {
264+ int newh = 0 , neww = 0 , top = 0 , left = 0 ;
265+ Mat cv_image = srcimg.clone ();
266+ Mat dst = this ->resize_image (cv_image, &newh, &neww, &top, &left);
267+ this ->normalize_ (dst);
268+ array<int64_t , 4 > input_shape_{ 1 , 3 , this ->inpHeight , this ->inpWidth };
269+
270+ auto allocator_info = MemoryInfo::CreateCpu (OrtDeviceAllocator, OrtMemTypeCPU);
271+ Value input_tensor_ = Value::CreateTensor<float >(allocator_info, input_image_.data (), input_image_.size (), input_shape_.data (), input_shape_.size ());
272+
273+ // ¿ªÊ¼ÍÆÀí
274+ vector<Value> ort_outputs = ort_session->Run (RunOptions{ nullptr }, &input_names[0 ], &input_tensor_, 1 , output_names.data (), output_names.size ()); // ¿ªÊ¼ÍÆÀí
275+ // ///generate proposals
276+ vector<BoxInfo> generate_boxes;
277+ const float * preds = ort_outputs[0 ].GetTensorMutableData <float >();
278+ generate_proposal (generate_boxes, preds);
279+
280+ // // Perform non maximum suppression to eliminate redundant overlapping boxes with
281+ // // lower confidences
282+ nms (generate_boxes);
283+ float ratioh = (float )cv_image.rows / newh;
284+ float ratiow = (float )cv_image.cols / neww;
285+ for (size_t i = 0 ; i < generate_boxes.size (); ++i)
286+ {
287+ int xmin = (int )max ((generate_boxes[i].x1 - left)*ratiow, 0 .f );
288+ int ymin = (int )max ((generate_boxes[i].y1 - top)*ratioh, 0 .f );
289+ int xmax = (int )min ((generate_boxes[i].x2 - left)*ratiow, (float )cv_image.cols );
290+ int ymax = (int )min ((generate_boxes[i].y2 - top)*ratioh, (float )cv_image.rows );
291+ rectangle (srcimg, Point (xmin, ymin), Point (xmax, ymax), Scalar (0 , 0 , 255 ), 2 );
292+ string label = format (" %.2f" , generate_boxes[i].score );
293+ label = this ->class_names [generate_boxes[i].label ] + " :" + label;
294+ putText (srcimg, label, Point (xmin, ymin - 5 ), FONT_HERSHEY_SIMPLEX, 0.75 , Scalar (0 , 255 , 0 ), 1 );
295+ }
296+ }
297+
298+ int main ()
299+ {
300+ NanoDet_Plus mynet (" onnxmodel/nanodet-plus-m_320.onnx" , " onnxmodel/coco.names" , 0.5 , 0.5 ); // / choice = ["picodet_m_320_coco.onnx", "picodet_m_416_coco.onnx", "picodet_s_320_coco.onnx", "picodet_s_416_coco.onnx"]
301+ string imgpath = " imgs/person.jpg" ;
302+ Mat srcimg = imread (imgpath);
303+ mynet.detect (srcimg);
304+
305+ static const string kWinName = " Deep learning object detection in ONNXRuntime" ;
306+ namedWindow (kWinName , WINDOW_NORMAL);
307+ imshow (kWinName , srcimg);
308+ waitKey (0 );
309+ destroyAllWindows ();
310+ }
0 commit comments