Skip to content

Commit 238d8f4

Browse files
committed
add two flags(num_load_threads and num_unload_threads) to the server, by setting these two flags, to solve the problem of the serving request being delayed when the model is switched.
1 parent 803dd42 commit 238d8f4

File tree

6 files changed

+22
-6
lines changed

6 files changed

+22
-6
lines changed

tensorflow_serving/core/aspired_versions_manager.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,13 @@ class AspiredVersionsManager : public Manager,
110110
///
111111
/// If set as 0, we don't use a thread-pool, and servable loads are
112112
/// performed serially in the manager's main work loop.
113-
uint32 num_load_threads = 1;
113+
uint32 num_load_threads = 0;
114114

115115
/// The number of threads in the thread-pool used to unload servables.
116116
///
117117
/// If set as 0, we don't use a thread-pool, and servable unloads are
118118
/// performed serially in the manager's main work loop.
119-
uint32 num_unload_threads = 1;
119+
uint32 num_unload_threads = 0;
120120

121121
/// Maximum number of times we retry loading a servable, after the first
122122
/// failure, before we give up.

tensorflow_serving/core/basic_manager.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,12 @@ class BasicManager : public Manager {
118118
// The number of threads in the thread-pool used to load servables.
119119
//
120120
// If set as 0, we don't use a thread-pool, and LoadServable() blocks.
121-
uint32 num_load_threads = 1;
121+
uint32 num_load_threads = 0;
122122

123123
// The number of threads in the thread-pool used to unload servables.
124124
//
125125
// If set as 0, we don't use a thread-pool, and UnloadServable() blocks.
126-
uint32 num_unload_threads = 1;
126+
uint32 num_unload_threads = 0;
127127

128128
// EventBus to publish servable state changes. This is optional, if unset,
129129
// we don't publish.

tensorflow_serving/core/caching_manager.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ class CachingManager : public Manager {
5454
// The number of threads in the thread-pool used to load servables.
5555
//
5656
// If set as 0, we don't use a thread-pool, and LoadServable() blocks.
57-
uint32 num_load_threads = 1;
57+
uint32 num_load_threads = 0;
5858

5959
// The number of threads in the thread-pool used to unload servables.
6060
//
6161
// If set as 0, we don't use a thread-pool.
62-
uint32 num_unload_threads = 1;
62+
uint32 num_unload_threads = 0;
6363

6464
// EventBus to publish servable state changes. This is optional, if unset,
6565
// we don't publish.

tensorflow_serving/model_servers/main.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,18 @@ int main(int argc, char** argv) {
105105
tensorflow::Flag("model_base_path", &options.model_base_path,
106106
"path to export (ignored if --model_config_file flag "
107107
"is set, otherwise required)"),
108+
tensorflow::Flag("num_load_threads", &options.num_load_threads,
109+
"The number of threads in the thread-pool used to load servables."
110+
"If set as 0, we don't use a thread-pool, and servable loads are "
111+
"performed serially in the manager's main work loop, "
112+
"may casue the Serving request to be delayed. "
113+
"Default: 0"),
114+
tensorflow::Flag("num_unload_threads", &options.num_unload_threads,
115+
"The number of threads in the thread-pool used to unload servables."
116+
"If set as 0, we don't use a thread-pool, and servable loads are "
117+
"performed serially in the manager's main work loop, "
118+
"may casue the Serving request to be delayed. "
119+
"Default: 0"),
108120
tensorflow::Flag("max_num_load_retries", &options.max_num_load_retries,
109121
"maximum number of times it retries loading a model "
110122
"after the first failure, before giving up. "

tensorflow_serving/model_servers/server.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,8 @@ Status Server::BuildAndStart(const Options& server_options) {
289289
options.custom_model_config_loader = &LoadCustomModelConfig;
290290
options.aspired_version_policy =
291291
std::unique_ptr<AspiredVersionPolicy>(new AvailabilityPreservingPolicy);
292+
options.num_load_threads = server_options.num_load_threads;
293+
options.num_unload_threads = server_options.num_unload_threads;
292294
options.max_num_load_retries = server_options.max_num_load_retries;
293295
options.load_retry_interval_micros =
294296
server_options.load_retry_interval_micros;

tensorflow_serving/model_servers/server.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ class Server {
6060
float per_process_gpu_memory_fraction = 0;
6161
tensorflow::string batching_parameters_file;
6262
tensorflow::string model_name;
63+
tensorflow::int32 num_load_threads = 0;
64+
tensorflow::int32 num_unload_threads = 0;
6365
tensorflow::int32 max_num_load_retries = 5;
6466
tensorflow::int64 load_retry_interval_micros = 1LL * 60 * 1000 * 1000;
6567
tensorflow::int32 file_system_poll_wait_seconds = 1;

0 commit comments

Comments
 (0)