Support TensorRT reformat-free I/O

nvpohanh · nvpohanh · commit aa1f1e518700 · 2019-12-09T15:19:34.000-08:00
diff --git a/src/backends/tensorrt/plan_backend.cc b/src/backends/tensorrt/plan_backend.cc
@@ -490,12 +490,11 @@ PlanBackend::Context::InitializeInputBinding(
 
     MemoryFormat fmt =
         ConvertTrtFmtToFmt(engine_->getBindingFormat(binding_index));
-    if (fmt != MemoryFormat::LINEAR) {
+    if (fmt == MemoryFormat::INVALID) {
       return Status(
           RequestStatusCode::INVALID_ARG,
-          "unexpected tensor format " + MemoryFormat_Name(fmt) +
-              " for input '" + input_name +
-              "'. Only LINEAR memory format is supported at present.");
+          "unexpected tensor format " + MemoryFormat_Name(fmt) + " for input '" +
+              input_name + "'.");
     }
 
     nvinfer1::Dims engine_dims = engine_->getBindingDimensions(binding_index);
@@ -507,7 +506,7 @@ PlanBackend::Context::InitializeInputBinding(
     if (!(is_control && is_dynamic_)) {
       RETURN_IF_ERROR(CompareDimsSupported(
           name_, input_name, engine_dims, model_config_dims, support_batching,
-          is_dynamic_));
+          is_dynamic_, fmt));
     } else {
       Status status = ValidateControlDimsDynamic(engine_dims, support_batching);
       if (!status.IsOk()) {
@@ -704,12 +703,11 @@ PlanBackend::Context::InitializeConfigOutputBindings(
 
       MemoryFormat fmt =
           ConvertTrtFmtToFmt(engine_->getBindingFormat(binding_index));
-      if (fmt != MemoryFormat::LINEAR) {
+      if (fmt == MemoryFormat::INVALID) {
         return Status(
             RequestStatusCode::INVALID_ARG,
             "unexpected tensor format " + MemoryFormat_Name(fmt) +
-                " for output '" + io.name() +
-                "'. Only LINEAR memory format is supported at present.");
+                " for output '" + io.name() + "'.");
       }
 
       const DimsList& model_config_dims =
@@ -718,7 +716,7 @@ PlanBackend::Context::InitializeConfigOutputBindings(
       nvinfer1::Dims engine_dims = engine_->getBindingDimensions(binding_index);
       RETURN_IF_ERROR(CompareDimsSupported(
           name_, io.name(), engine_dims, model_config_dims, support_batching,
-          is_dynamic_));
+          is_dynamic_, fmt));
 
       int64_t byte_size;
       if (!is_dynamic_) {
diff --git a/src/backends/tensorrt/plan_utils.cc b/src/backends/tensorrt/plan_utils.cc
@@ -56,7 +56,7 @@ ConvertTrtFmtToFmt(nvinfer1::TensorFormat trt_fmt)
     case nvinfer1::TensorFormat::kCHW4:
       return MemoryFormat::CHW4;
     case nvinfer1::TensorFormat::kHWC8:
-      return MemoryFormat::HCW8;
+      return MemoryFormat::HWC8;
     case nvinfer1::TensorFormat::kCHW16:
       return MemoryFormat::CHW16;
     case nvinfer1::TensorFormat::kCHW32:
@@ -76,8 +76,8 @@ MemoryFormat_Name(MemoryFormat fmt)
       return "CHW2";
     case MemoryFormat::CHW4:
       return "CHW4";
-    case MemoryFormat::HCW8:
-      return "HCW8";
+    case MemoryFormat::HWC8:
+      return "HWC8";
     case MemoryFormat::CHW16:
       return "CHW16";
     case MemoryFormat::CHW32:
@@ -89,6 +89,36 @@ MemoryFormat_Name(MemoryFormat fmt)
   return "INVALID";
 }
 
+int
+MemoryFormat_VectorSize(MemoryFormat fmt)
+{
+  unsigned int vector_size = 1;
+  switch(fmt) {
+    case MemoryFormat::LINEAR:
+      vector_size = 1;
+      break;
+    case MemoryFormat::CHW2:
+      vector_size = 2;
+      break;
+    case MemoryFormat::CHW4:
+      vector_size = 4;
+      break;
+    case MemoryFormat::HWC8:
+      vector_size = 8;
+      break;
+    case MemoryFormat::CHW16:
+      vector_size = 16;
+      break;
+    case MemoryFormat::CHW32:
+      vector_size = 32;
+      break;
+    default:
+      vector_size = 1; // In the default case, assume LINEAR
+      break;
+  }
+  return vector_size;
+}
+
 std::pair<bool, nvinfer1::DataType>
 ConvertDataTypeToTrtType(const DataType& dtype)
 {
@@ -132,7 +162,8 @@ Status
 CompareDimsSupported(
     const std::string& model_name, const std::string& binding_name,
     const nvinfer1::Dims& model_dims, const DimsList& dims,
-    const bool supports_batching, const bool is_dynamic)
+    const bool supports_batching, const bool is_dynamic,
+    const MemoryFormat fmt)
 {
   // If the model configuration expects batching support in the model,
   // then the first dimension must be -1.
@@ -166,6 +197,15 @@ CompareDimsSupported(
       continue;
     }
 
+    // Pad channel dimension if necessary.
+    if (i == dims.size() - 3) {
+      int vector_size = MemoryFormat_VectorSize(fmt);
+      if (vector_size > 1)
+      {
+        model_dim = model_dim + vector_size - ((model_dim + vector_size) % vector_size);
+      }
+    }
+
     if (model_dim != dims[i]) {
       return Status(
           RequestStatusCode::INVALID_ARG,
diff --git a/src/backends/tensorrt/plan_utils.h b/src/backends/tensorrt/plan_utils.h
@@ -41,7 +41,7 @@ enum class MemoryFormat {
   // Four wide channel vectorized row major format.
   CHW4,
   // Eight channel format where C is padded to a multiple of 8.
-  HCW8,
+  HWC8,
   // Sixteen wide channel vectorized row major format.
   CHW16,
   // Thirty-two wide channel vectorized row major format.
@@ -53,6 +53,7 @@ enum class MemoryFormat {
 MemoryFormat ConvertTrtFmtToFmt(nvinfer1::TensorFormat trt_fmt);
 
 const std::string MemoryFormat_Name(MemoryFormat fmt);
+int MemoryFormat_VectorSize(MemoryFormat fmt);
 
 DataType ConvertTrtTypeToDataType(nvinfer1::DataType trt_type);
 
@@ -72,7 +73,8 @@ Status ValidateDimension(
 Status CompareDimsSupported(
     const std::string& model_name, const std::string& tensor_name,
     const nvinfer1::Dims& model_dims, const DimsList& dims,
-    const bool supports_batching, const bool is_dynamic);
+    const bool supports_batching, const bool is_dynamic,
+    const MemoryFormat fmt);
 
 Status ValidateControlDimsDynamic(
     const nvinfer1::Dims& dims, const bool support_batching);