Skip to content

Commit d81cbb3

Browse files
Merge pull request #1796 from handong0123:loading-unloading-model-threads
PiperOrigin-RevId: 352083787
2 parents 50a7ef3 + 238d8f4 commit d81cbb3

File tree

3 files changed

+16
-0
lines changed

3 files changed

+16
-0
lines changed

tensorflow_serving/model_servers/main.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,18 @@ int main(int argc, char** argv) {
105105
tensorflow::Flag("model_base_path", &options.model_base_path,
106106
"path to export (ignored if --model_config_file flag "
107107
"is set, otherwise required)"),
108+
tensorflow::Flag("num_load_threads", &options.num_load_threads,
109+
"The number of threads in the thread-pool used to load "
110+
"servables. If set as 0, we don't use a thread-pool, "
111+
"and servable loads are performed serially in the "
112+
"manager's main work loop, may casue the Serving "
113+
"request to be delayed. Default: 0"),
114+
tensorflow::Flag("num_unload_threads", &options.num_unload_threads,
115+
"The number of threads in the thread-pool used to "
116+
"unload servables. If set as 0, we don't use a "
117+
"thread-pool, and servable loads are performed serially "
118+
"in the manager's main work loop, may casue the Serving "
119+
"request to be delayed. Default: 0"),
108120
tensorflow::Flag("max_num_load_retries", &options.max_num_load_retries,
109121
"maximum number of times it retries loading a model "
110122
"after the first failure, before giving up. "

tensorflow_serving/model_servers/server.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,8 @@ Status Server::BuildAndStart(const Options& server_options) {
289289
options.custom_model_config_loader = &LoadCustomModelConfig;
290290
options.aspired_version_policy =
291291
std::unique_ptr<AspiredVersionPolicy>(new AvailabilityPreservingPolicy);
292+
options.num_load_threads = server_options.num_load_threads;
293+
options.num_unload_threads = server_options.num_unload_threads;
292294
options.max_num_load_retries = server_options.max_num_load_retries;
293295
options.load_retry_interval_micros =
294296
server_options.load_retry_interval_micros;

tensorflow_serving/model_servers/server.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ class Server {
6060
float per_process_gpu_memory_fraction = 0;
6161
tensorflow::string batching_parameters_file;
6262
tensorflow::string model_name;
63+
tensorflow::int32 num_load_threads = 0;
64+
tensorflow::int32 num_unload_threads = 0;
6365
tensorflow::int32 max_num_load_retries = 5;
6466
tensorflow::int64 load_retry_interval_micros = 1LL * 60 * 1000 * 1000;
6567
tensorflow::int32 file_system_poll_wait_seconds = 1;

0 commit comments

Comments
 (0)