Fix outputs order of tensorrt (#18)

* fix trt output order * Update trt_backend.cc
PaddlePaddle · Jul 14, 2022 · 90061e1 · 90061e1
1 parent de7c06a
commit 90061e1
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 24 deletions.
diff --git a/external/paddle2onnx.cmake b/external/paddle2onnx.cmake
@@ -43,7 +43,7 @@ else()
 endif(WIN32)
 
 set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/paddle2onnx/libs/")
-set(PADDLE2ONNX_VERSION "0.9.9")
+set(PADDLE2ONNX_VERSION "1.0.0rc1")
 if(WIN32)
   set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
 elseif(APPLE)

diff --git a/fastdeploy/backends/tensorrt/trt_backend.cc b/fastdeploy/backends/tensorrt/trt_backend.cc
@@ -52,7 +52,7 @@ std::vector<int> toVec(const nvinfer1::Dims& dim) {
   return out;
 }
 
-bool TrtBackend::InitFromTrt(const std::string& trt_engine_file, 
+bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
                              const TrtBackendOption& option) {
   if (initialized_) {
     FDERROR << "TrtBackend is already initlized, cannot initialize again."
@@ -139,17 +139,6 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
   }
   cudaSetDevice(option.gpu_id);
 
-  if (option.serialize_file != "") {
-    std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
-    if (fin) {
-      FDLogger() << "Detect serialized TensorRT Engine file in "
-                 << option.serialize_file << ", will load it directly."
-                 << std::endl;
-      fin.close();
-      return InitFromTrt(option.serialize_file);
-    }
-  }
-
   std::string onnx_content = "";
   if (!from_memory_buffer) {
     std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
@@ -167,6 +156,29 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
     onnx_content = model_file;
   }
 
+  // This part of code will record the original outputs order
+  // because the converted tensorrt network may exist wrong order of outputs
+  outputs_order_.clear();
+  auto onnx_reader =
+      paddle2onnx::OnnxReader(onnx_content.c_str(), onnx_content.size());
+  for (int i = 0; i < onnx_reader.NumOutputs(); ++i) {
+    std::string name(
+        onnx_reader.output_names[i],
+        onnx_reader.output_names[i] + strlen(onnx_reader.output_names[i]));
+    outputs_order_[name] = i;
+  }
+
+  if (option.serialize_file != "") {
+    std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
+    if (fin) {
+      FDLogger() << "Detect serialized TensorRT Engine file in "
+                 << option.serialize_file << ", will load it directly."
+                 << std::endl;
+      fin.close();
+      return InitFromTrt(option.serialize_file);
+    }
+  }
+
   if (!CreateTrtEngine(onnx_content, option)) {
     return false;
   }
@@ -251,13 +263,20 @@ void TrtBackend::AllocateBufferInDynamicShape(
   for (size_t i = 0; i < outputs_desc_.size(); ++i) {
     auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
     auto output_dims = context_->getBindingDimensions(idx);
-    (*outputs)[i].dtype = GetFDDataType(outputs_desc_[i].dtype);
-    (*outputs)[i].shape.assign(output_dims.d,
-                               output_dims.d + output_dims.nbDims);
-    (*outputs)[i].name = outputs_desc_[i].name;
-    (*outputs)[i].data.resize(volume(output_dims) *
-                              TrtDataTypeSize(outputs_desc_[i].dtype));
-    if ((*outputs)[i].Nbytes() >
+
+    // find the original index of output
+    auto iter = outputs_order_.find(outputs_desc_[i].name);
+    FDASSERT(iter != outputs_order_.end(),
+             "Cannot find output:" + outputs_desc_[i].name +
+                 " of tensorrt network from the original model.");
+    auto ori_idx = iter->second;
+    (*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
+    (*outputs)[ori_idx].shape.assign(output_dims.d,
+                                     output_dims.d + output_dims.nbDims);
+    (*outputs)[ori_idx].name = outputs_desc_[i].name;
+    (*outputs)[ori_idx].data.resize(volume(output_dims) *
+                                    TrtDataTypeSize(outputs_desc_[i].dtype));
+    if ((*outputs)[ori_idx].Nbytes() >
         outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
       outputs_buffer_[outputs_desc_[i].name].resize(output_dims);
       bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data();

diff --git a/fastdeploy/backends/tensorrt/trt_backend.h b/fastdeploy/backends/tensorrt/trt_backend.h
@@ -28,8 +28,8 @@
 #include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h"
 #include "fastdeploy/backends/tensorrt/common/sampleUtils.h"
 
-#include "NvInfer.h"
 #include <cuda_runtime_api.h>
+#include "NvInfer.h"
 
 namespace fastdeploy {
 using namespace samplesCommon;
@@ -69,7 +69,7 @@ class TrtBackend : public BaseBackend {
   bool InitFromOnnx(const std::string& model_file,
                     const TrtBackendOption& option = TrtBackendOption(),
                     bool from_memory_buffer = false);
-  bool InitFromTrt(const std::string& trt_engine_file, 
+  bool InitFromTrt(const std::string& trt_engine_file,
                    const TrtBackendOption& option = TrtBackendOption());
 
   bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
@@ -89,11 +89,18 @@ class TrtBackend : public BaseBackend {
   std::map<std::string, DeviceBuffer> inputs_buffer_;
   std::map<std::string, DeviceBuffer> outputs_buffer_;
 
+  // Sometimes while the number of outputs > 1
+  // the output order of tensorrt may not be same
+  // with the original onnx model
+  // So this parameter will record to origin outputs
+  // order, to help recover the rigt order
+  std::map<std::string, int> outputs_order_;
+
   void GetInputOutputInfo();
   void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs,
                                     std::vector<FDTensor>* outputs);
   bool CreateTrtEngine(const std::string& onnx_model,
                        const TrtBackendOption& option);
 };
 
-} // namespace fastdeploy
+}  // namespace fastdeploy
diff --git a/model_zoo/vision/yolov5/cpp/CMakeLists.txt b/model_zoo/vision/yolov5/cpp/CMakeLists.txt
@@ -5,7 +5,7 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
 # add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
 
 # 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.0.3/)
+set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
 
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)