Merge pull request #1796 from handong0123:loading-unloading-model-threads

tensorflow-copybara · tensorflow-copybara · commit d81cbb3a8f52 · 2021-01-15T14:23:05.000-08:00
PiperOrigin-RevId: 352083787
diff --git a/tensorflow_serving/model_servers/main.cc b/tensorflow_serving/model_servers/main.cc
@@ -105,6 +105,18 @@ int main(int argc, char** argv) {
       tensorflow::Flag("model_base_path", &options.model_base_path,
                        "path to export (ignored if --model_config_file flag "
                        "is set, otherwise required)"),
+      tensorflow::Flag("num_load_threads", &options.num_load_threads,
+                       "The number of threads in the thread-pool used to load "
+                       "servables. If set as 0, we don't use a thread-pool, "
+                       "and servable loads are performed serially in the "
+                       "manager's main work loop, may casue the Serving "
+                       "request to be delayed. Default: 0"),
+      tensorflow::Flag("num_unload_threads", &options.num_unload_threads,
+                       "The number of threads in the thread-pool used to "
+                       "unload servables. If set as 0, we don't use a "
+                       "thread-pool, and servable loads are performed serially "
+                       "in the manager's main work loop, may casue the Serving "
+                       "request to be delayed. Default: 0"),
       tensorflow::Flag("max_num_load_retries", &options.max_num_load_retries,
                        "maximum number of times it retries loading a model "
                        "after the first failure, before giving up. "
diff --git a/tensorflow_serving/model_servers/server.cc b/tensorflow_serving/model_servers/server.cc
@@ -289,6 +289,8 @@ Status Server::BuildAndStart(const Options& server_options) {
   options.custom_model_config_loader = &LoadCustomModelConfig;
   options.aspired_version_policy =
       std::unique_ptr<AspiredVersionPolicy>(new AvailabilityPreservingPolicy);
+  options.num_load_threads = server_options.num_load_threads;
+  options.num_unload_threads = server_options.num_unload_threads;
   options.max_num_load_retries = server_options.max_num_load_retries;
   options.load_retry_interval_micros =
       server_options.load_retry_interval_micros;
diff --git a/tensorflow_serving/model_servers/server.h b/tensorflow_serving/model_servers/server.h
@@ -60,6 +60,8 @@ class Server {
     float per_process_gpu_memory_fraction = 0;
     tensorflow::string batching_parameters_file;
     tensorflow::string model_name;
+    tensorflow::int32 num_load_threads = 0;
+    tensorflow::int32 num_unload_threads = 0;
     tensorflow::int32 max_num_load_retries = 5;
     tensorflow::int64 load_retry_interval_micros = 1LL * 60 * 1000 * 1000;
     tensorflow::int32 file_system_poll_wait_seconds = 1;