Autofill for TensorRT Plan

pangge · Nov 14, 2018 · 653dfd8 · 653dfd8
1 parent 215afdf
commit 653dfd8
Show file tree

Hide file tree

Showing 45 changed files with 871 additions and 180 deletions.
diff --git a/src/core/BUILD b/src/core/BUILD
@@ -120,15 +120,25 @@ serving_proto_library_py(
     ],
 )
 
+cc_library(
+    name = "autofill_header",
+    hdrs = ["autofill.h"],
+    deps = [
+        ":model_config",
+        "@org_tensorflow//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "autofill",
     srcs = ["autofill.cc"],
-    hdrs = ["autofill.h"],
     deps = [
+        ":autofill_header",
         ":constants",
         ":logging",
         ":model_config",
         ":model_config_proto",
+        "//src/servables/tensorrt:autofill",
         "@org_tensorflow//tensorflow/cc/saved_model:tag_constants",
         "@org_tensorflow//tensorflow/c:c_api",
         "@org_tensorflow//tensorflow/core:lib",

diff --git a/src/core/autofill.cc b/src/core/autofill.cc
@@ -29,6 +29,7 @@
 #include "src/core/constants.h"
 #include "src/core/logging.h"
 #include "src/core/model_config.h"
+#include "src/servables/tensorrt/autofill.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/cc/saved_model/loader.h"
 #include "tensorflow/cc/saved_model/tag_constants.h"
@@ -382,73 +383,6 @@ AutoFillGraphDef::Fix(ModelConfig* config)
   return tensorflow::Status::OK();
 }
 
-//
-// AutoFillPlan
-//
-class AutoFillPlan : public AutoFill {
- public:
-  static tensorflow::Status Create(
-    const std::string& model_name, const std::string& model_path,
-    std::unique_ptr<AutoFillPlan>* autofill);
-  tensorflow::Status Fix(ModelConfig* config);
-
- private:
-  AutoFillPlan(const std::string& model_name) : AutoFill(model_name) {}
-};
-
-tensorflow::Status
-AutoFillPlan::Create(
-  const std::string& model_name, const std::string& model_path,
-  std::unique_ptr<AutoFillPlan>* autofill)
-{
-  std::set<std::string> version_dirs;
-  TF_RETURN_IF_ERROR(GetSubdirs(model_path, &version_dirs));
-
-  // There must be at least one version directory that we can inspect
-  // to attempt to determine the platform. For now we only handle the
-  // case where there is one version directory.
-  if (version_dirs.size() != 1) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name, "' due to multiple versions");
-  }
-
-  const auto version_path =
-    tensorflow::io::JoinPath(model_path, *(version_dirs.begin()));
-
-  // There must be a single plan file within the version directory...
-  std::set<std::string> plan_files;
-  TF_RETURN_IF_ERROR(GetFiles(version_path, &plan_files));
-  if (plan_files.size() != 1) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name, "', unable to find plan file");
-  }
-
-  const std::string plan_file = *(plan_files.begin());
-  const auto plan_path = tensorflow::io::JoinPath(version_path, plan_file);
-
-  // FIXME better than just recognize by name
-  if (plan_file != kTensorRTPlanFilename) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name,
-      "', unable to find plan file named '", kTensorRTPlanFilename, "'");
-  }
-
-  autofill->reset(new AutoFillPlan(model_name));
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-AutoFillPlan::Fix(ModelConfig* config)
-{
-  // Set name if not already set.
-  if (config->name().empty()) {
-    config->set_name(model_name_);
-  }
-
-  config->set_platform(kTensorRTPlanPlatform);
-  return tensorflow::Status::OK();
-}
-
 //
 // AutoFillNetDef
 //

diff --git a/src/servables/caffe2/BUILD b/src/servables/caffe2/BUILD
@@ -100,7 +100,8 @@ cc_test(
         "-lcaffe2",
         "-lnvinfer",
         "-L/usr/local/cuda/lib64/stubs",
-        "-lnvidia-ml"
+        "-lnvidia-ml",
+        "-lnvonnxparser_runtime",
     ],
     data = [
         ":caffe2_testdata"

diff --git a/src/servables/tensorflow/BUILD b/src/servables/tensorflow/BUILD
@@ -146,7 +146,8 @@ cc_test(
     linkopts = [
         "-lnvinfer",
         "-L/usr/local/cuda/lib64/stubs",
-        "-lnvidia-ml"
+        "-lnvidia-ml",
+        "-lnvonnxparser_runtime",
     ],
     data = [
          ":tf_testdata"
@@ -167,7 +168,8 @@ cc_test(
     linkopts = [
         "-lnvinfer",
         "-L/usr/local/cuda/lib64/stubs",
-        "-lnvidia-ml"
+        "-lnvidia-ml",
+        "-lnvonnxparser_runtime",
     ],
     data = [
          ":tf_testdata"

diff --git a/src/servables/tensorrt/BUILD b/src/servables/tensorrt/BUILD
@@ -40,6 +40,28 @@ serving_proto_library(
     ],
 )
 
+cc_library(
+    name = "autofill",
+    srcs = ["autofill.cc"],
+    hdrs = ["autofill.h"],
+    deps = [
+        ":loader",
+        ":plan_utils",
+        "//src/core:autofill_header",
+        "@org_tensorflow//tensorflow/core:lib",
+    ],
+)
+
+cc_library(
+    name = "loader",
+    srcs = ["loader.cc"],
+    hdrs = ["loader.h"],
+    deps = [
+        ":logging",
+        "@org_tensorflow//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "logging",
     srcs = ["logging.cc"],
@@ -49,17 +71,26 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "plan_utils",
+    srcs = ["plan_utils.cc"],
+    hdrs = ["plan_utils.h"],
+    deps = [
+        "//src/core:model_config",
+    ],
+)
+
 cc_library(
     name = "plan_bundle",
     srcs = ["plan_bundle.cc"],
     hdrs = ["plan_bundle.h"],
     deps = [
-        ":logging",
+        ":loader",
+        ":plan_utils",
         "//src/core:constants",
         "//src/core:infer",
         "//src/core:label_provider",
         "//src/core:model_config_proto",
-        "//src/core:model_config",
         "//src/core:server_status",
         "//src/core:utils",
         "@org_tensorflow//tensorflow/c:c_api",

diff --git a/src/servables/tensorrt/autofill.cc b/src/servables/tensorrt/autofill.cc
@@ -0,0 +1,154 @@
+// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "src/servables/tensorrt/autofill.h"
+
+#include <NvInfer.h>
+#include "src/core/constants.h"
+#include "src/core/logging.h"
+#include "src/core/model_config.h"
+#include "src/servables/tensorrt/loader.h"
+#include "src/servables/tensorrt/plan_utils.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
+
+namespace nvidia { namespace inferenceserver {
+
+tensorflow::Status
+AutoFillPlan::Create(
+  const std::string& model_name, const std::string& model_path,
+  std::unique_ptr<AutoFillPlan>* autofill)
+{
+  std::set<std::string> version_dirs;
+  TF_RETURN_IF_ERROR(GetSubdirs(model_path, &version_dirs));
+
+  // There must be at least one version directory that we can inspect
+  // to attempt to determine the platform. For now we only handle the
+  // case where there is one version directory.
+  if (version_dirs.size() != 1) {
+    return tensorflow::errors::Internal(
+      "unable to autofill for '", model_name, "' due to multiple versions");
+  }
+
+  const auto version_path =
+    tensorflow::io::JoinPath(model_path, *(version_dirs.begin()));
+
+  // There must be a single plan file within the version directory...
+  std::set<std::string> plan_files;
+  TF_RETURN_IF_ERROR(GetFiles(version_path, &plan_files));
+  if (plan_files.size() != 1) {
+    return tensorflow::errors::Internal(
+      "unable to autofill for '", model_name, "', unable to find plan file");
+  }
+
+  const std::string plan_file = *(plan_files.begin());
+  const auto plan_path = tensorflow::io::JoinPath(version_path, plan_file);
+
+  tensorflow::string plan_data_str;
+  TF_RETURN_IF_ERROR(tensorflow::ReadFileToString(
+    tensorflow::Env::Default(), plan_path, &plan_data_str));
+  std::vector<char> plan_data(plan_data_str.begin(), plan_data_str.end());
+
+  nvinfer1::IRuntime* runtime = nullptr;
+  nvinfer1::ICudaEngine* engine = nullptr;
+  if (!LoadPlan(plan_data, &runtime, &engine).ok()) {
+    if (engine != nullptr) {
+      engine->destroy();
+    }
+    if (runtime != nullptr) {
+      runtime->destroy();
+    }
+    return tensorflow::errors::Internal(
+      "unable to autofill for '", model_name,
+      "', unable to create TensorRT runtime and engine");
+  }
+
+  const int32_t max_batch_size = engine->getMaxBatchSize();
+
+  // Inputs and outputs.
+  ModelConfig config;
+  for (int i = 0; i < engine->getNbBindings(); ++i) {
+    if (engine->bindingIsInput(i)) {
+      ModelInput* config_input = config.add_input();
+      config_input->set_name(engine->getBindingName(i));
+      config_input->set_data_type(
+        ConvertDatatype(engine->getBindingDataType(i)));
+      nvinfer1::Dims dims = engine->getBindingDimensions(i);
+      for (int didx = 0; didx < dims.nbDims; ++didx) {
+        config_input->mutable_dims()->Add(dims.d[didx]);
+      }
+    } else {
+      ModelOutput* config_output = config.add_output();
+      config_output->set_name(engine->getBindingName(i));
+      config_output->set_data_type(
+        ConvertDatatype(engine->getBindingDataType(i)));
+      nvinfer1::Dims dims = engine->getBindingDimensions(i);
+      for (int didx = 0; didx < dims.nbDims; ++didx) {
+        config_output->mutable_dims()->Add(dims.d[didx]);
+      }
+    }
+  }
+
+  engine->destroy();
+  runtime->destroy();
+
+  autofill->reset(
+    new AutoFillPlan(model_name, plan_file, max_batch_size, config));
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status
+AutoFillPlan::Fix(ModelConfig* config)
+{
+  config->set_platform(kTensorRTPlanPlatform);
+
+  // Set name if not already set.
+  if (config->name().empty()) {
+    config->set_name(model_name_);
+  }
+
+  if (config->default_model_filename().empty()) {
+    config->set_default_model_filename(plan_filename_);
+  }
+
+  if (config->max_batch_size() == 0) {
+    config->set_max_batch_size(max_batch_size_);
+  }
+
+  // Inputs
+  if (config->input().size() == 0) {
+    config->mutable_input()->CopyFrom(config_.input());
+  }
+
+  // Outputs
+  if (config->output().size() == 0) {
+    config->mutable_output()->CopyFrom(config_.output());
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}}  // namespace nvidia::inferenceserver