Rename AllocateDeviceBuffer, update docstrings

apache · comaniac · Nov 7, 2020 · Nov 5, 2020 · Nov 7, 2020 · Nov 7, 2020
commit 5a02216002455bee1896d20e99b78594c8ad6744
diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc
@@ -177,10 +177,10 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() {
   // Allocate I/O buffers on GPU for TVM inputs which are on a different context.
   std::vector<runtime::NDArray> device_buffers(engine->getNbBindings());
   for (size_t i = 0; i < network_input_names_.size(); ++i) {
-    AllocateDeviceBufferIfNeeded(engine, network_input_names_[i], &device_buffers);
+    AllocateDeviceBuffer(engine, network_input_names_[i], &device_buffers);
   }
   for (size_t i = 0; i < network_output_names_.size(); ++i) {
-    AllocateDeviceBufferIfNeeded(engine, network_output_names_[i], &device_buffers);
+    AllocateDeviceBuffer(engine, network_output_names_[i], &device_buffers);
   }
   return {engine, context, network_input_names_, network_output_names_, device_buffers};
 }
@@ -231,9 +231,8 @@ void TensorRTBuilder::CleanUp() {
   }
 }
 
-void TensorRTBuilder::AllocateDeviceBufferIfNeeded(nvinfer1::ICudaEngine* engine,
-                                                   const std::string& name,
-                                                   std::vector<runtime::NDArray>* device_buffers) {
+void TensorRTBuilder::AllocateDeviceBuffer(nvinfer1::ICudaEngine* engine, const std::string& name,
+                                           std::vector<runtime::NDArray>* device_buffers) {
   const uint32_t entry_id = entry_id_map_[name];
   if (data_entry_[entry_id]->ctx.device_type != kDLGPU) {
     const int binding_index = engine->getBindingIndex(name.c_str());

diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h
@@ -123,9 +123,11 @@ class TensorRTBuilder {
   /*! \brief Clean up resources used to create engine. */
   void CleanUp();
 
-  /*! \brief If the input DLTensor is not on the GPU, allocate a buffer for it. */
-  void AllocateDeviceBufferIfNeeded(nvinfer1::ICudaEngine* engine, const std::string& name,
-                                    std::vector<runtime::NDArray>* device_buffers);
+  /*! \brief Allocate a GPU buffer for input or output DLTensor, only if the context is not GPU
+   * already. Inputs that are already on the GPU can be passed directly to TensorRT and will not
+   * need a buffer. */
+  void AllocateDeviceBuffer(nvinfer1::ICudaEngine* engine, const std::string& name,
+                            std::vector<runtime::NDArray>* device_buffers);
 
   /*! \brief Maps a node to its outputs. */
   std::unordered_map<int, std::vector<TensorRTOpInput>> node_output_map_;

diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
@@ -165,7 +165,8 @@ class TensorRTRuntime : public JSONRuntimeBase {
 
  private:
   /*!
-   * \brief Build TensorRT engine from JSON representation.
+   * \brief Build TensorRT engine from JSON representation and cache it. If engine is already built,
+   * do nothing.
    */
   void BuildEngine() {
     if (trt_engine_cache_.count(symbol_name_)) return;