Fix cuda graph support when max_batch_size is 0 (triton-inference-ser…

…ver#2506)
ChrisGao001 · Feb 11, 2021 · 43ea7e4 · 43ea7e4
1 parent f3c661d
commit 43ea7e4
Showing 1 changed file with 10 additions and 1 deletion.
diff --git a/src/backends/tensorrt/plan_backend.cc b/src/backends/tensorrt/plan_backend.cc
@@ -1673,7 +1673,13 @@ PlanBackend::InitializeGraphSpecs(
     // default build for batch sizes 1, 2, 3, 4, 6, 8, 12, 16, 'max_batch_size'.
     // If preferred batch size is specified, then the batch sizes will be
     // 1, preferred batch sizes, 'max_batch_size'.
-    std::set<int> cuda_graph_batch_sizes{1};
+    std::set<int> cuda_graph_batch_sizes;
+    if (Config().max_batch_size() == 0) {
+      cuda_graph_batch_sizes = {0};
+    } else {
+      cuda_graph_batch_sizes = {1};
+    }
+
     if (Config().has_dynamic_batching()) {
       for (const auto bs : Config().dynamic_batching().preferred_batch_size()) {
         cuda_graph_batch_sizes.emplace(bs);
@@ -1687,6 +1693,9 @@ PlanBackend::InitializeGraphSpecs(
       }
     } else {
       cuda_graph_batch_sizes = {1, 2, 3, 4, 6, 8, 12, 16};
+      if (Config().max_batch_size() == 0) {
+        cuda_graph_batch_sizes.emplace(0);
+      }
     }
     if (Config().max_batch_size() > 0) {
       cuda_graph_batch_sizes.emplace(Config().max_batch_size());