Skip to content

Commit 157ec72

Browse files
authored
HTTP live connections on server shutdown (triton-inference-server#6986)
* Wait for HTTP connection when shutting down * Add test for shutdown with live HTTP connection * Use TRITONSERVER_ServerSetExitTimeout() API * Variable name update * Stop HTTP service immediately after all connections close * Remove unused include * Remove accept new connection check * Adjust test for 'Remove accept new connection check' and add testing for countdown restart * Fix non exit timeout supported endpoints * Improve existing shutdown test reliability on extra http shutdown delay * Fix gap between decided to close socket and actually close socket * Start rejecting new connections once shutdown polling start * Group checking logic
1 parent 726b764 commit 157ec72

File tree

5 files changed

+153
-8
lines changed

5 files changed

+153
-8
lines changed

qa/L0_lifecycle/lifecycle_test.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2662,6 +2662,7 @@ def callback(user_data, result, error):
26622662

26632663
# Send signal to shutdown the server
26642664
os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
2665+
time.sleep(0.5)
26652666

26662667
# Send more requests and should be rejected
26672668
try:
@@ -2721,6 +2722,7 @@ def callback(user_data, result, error):
27212722

27222723
# Send signal to shutdown the server
27232724
os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
2725+
time.sleep(0.5)
27242726

27252727
# Send requests with different characteristic
27262728
# 1: New sequence with new sequence ID
@@ -2808,6 +2810,7 @@ def callback(user_data, result, error):
28082810

28092811
# Send signal to shutdown the server
28102812
os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
2813+
time.sleep(0.5)
28112814

28122815
# Send more requests and should be rejected
28132816
try:
@@ -3360,6 +3363,48 @@ def test_shutdown_while_loading(self):
33603363
# The server will shutdown after this sub-test exits. The server must shutdown
33613364
# without any hang or runtime error.
33623365

3366+
def test_shutdown_with_live_connection(self):
3367+
model_name = "add_sub"
3368+
model_shape = (16,)
3369+
from geventhttpclient.response import HTTPConnectionClosed
3370+
3371+
input_data = np.ones(shape=model_shape, dtype=np.float32)
3372+
inputs = [
3373+
httpclient.InferInput("INPUT0", model_shape, "FP32"),
3374+
httpclient.InferInput("INPUT1", model_shape, "FP32"),
3375+
]
3376+
inputs[0].set_data_from_numpy(input_data)
3377+
inputs[1].set_data_from_numpy(input_data)
3378+
3379+
# start connection
3380+
conn = httpclient.InferenceServerClient("localhost:8000", verbose=True)
3381+
conn.infer(model_name, inputs)
3382+
3383+
# shutdown the server
3384+
os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
3385+
time.sleep(2)
3386+
3387+
# connection should still work
3388+
conn.infer(model_name, inputs)
3389+
3390+
# close connection
3391+
conn.close()
3392+
time.sleep(2)
3393+
3394+
# check exit timeout countdown did not restart
3395+
with open(os.environ["SERVER_LOG"]) as f:
3396+
server_log = f.read()
3397+
self.assertIn(
3398+
"Waiting for in-flight requests to complete.",
3399+
server_log,
3400+
"precondition not met - core shutdown did not begin",
3401+
)
3402+
self.assertEqual(
3403+
server_log.count("Timeout 30: "),
3404+
1,
3405+
"exit timeout countdown restart detected",
3406+
)
3407+
33633408

33643409
if __name__ == "__main__":
33653410
unittest.main()

qa/L0_lifecycle/test.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2102,6 +2102,35 @@ if [ "$ACTUAL_LOAD_UNLOAD_ORDER" != "$EXPECTED_LOAD_UNLOAD_ORDER" ]; then
21022102
RET=1
21032103
fi
21042104

2105+
LOG_IDX=$((LOG_IDX+1))
2106+
2107+
# LifeCycleTest.test_shutdown_with_live_connection
2108+
rm -rf models
2109+
mkdir models
2110+
cp -r ../python_models/add_sub models/ && (cd models/add_sub && \
2111+
mkdir 1 && mv model.py 1)
2112+
2113+
SERVER_ARGS="--model-repository=`pwd`/models"
2114+
SERVER_LOG="./inference_server_$LOG_IDX.log"
2115+
run_server
2116+
if [ "$SERVER_PID" == "0" ]; then
2117+
echo -e "\n***\n*** Failed to start $SERVER\n***"
2118+
cat $SERVER_LOG
2119+
exit 1
2120+
fi
2121+
2122+
set +e
2123+
SERVER_PID=$SERVER_PID SERVER_LOG=$SERVER_LOG python $LC_TEST LifeCycleTest.test_shutdown_with_live_connection >>$CLIENT_LOG 2>&1
2124+
if [ $? -ne 0 ]; then
2125+
cat $CLIENT_LOG
2126+
echo -e "\n***\n*** Test Failed\n***"
2127+
RET=1
2128+
fi
2129+
set -e
2130+
2131+
kill $SERVER_PID
2132+
wait $SERVER_PID
2133+
21052134
if [ $RET -eq 0 ]; then
21062135
echo -e "\n***\n*** Test Passed\n***"
21072136
else

src/http_server.cc

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ HTTPServer::Start()
212212
evhtp_enable_flag(htp_, EVHTP_FLAG_ENABLE_REUSEPORT);
213213
}
214214
evhtp_set_gencb(htp_, HTTPServer::Dispatch, this);
215+
evhtp_set_pre_accept_cb(htp_, HTTPServer::NewConnection, this);
215216
evhtp_use_threads_wexit(htp_, NULL, NULL, thread_cnt_, NULL);
216217
if (evhtp_bind_socket(htp_, address_.c_str(), port_, 1024) != 0) {
217218
return TRITONSERVER_ErrorNew(
@@ -235,8 +236,22 @@ HTTPServer::Start()
235236
}
236237

237238
TRITONSERVER_Error*
238-
HTTPServer::Stop()
239+
HTTPServer::Stop(uint32_t* exit_timeout_secs, const std::string& service_name)
239240
{
241+
{
242+
std::lock_guard<std::mutex> lock(conn_mu_);
243+
accepting_new_conn_ = false;
244+
}
245+
if (exit_timeout_secs != nullptr) {
246+
// Note: conn_cnt_ can only decrease
247+
while (*exit_timeout_secs > 0 && conn_cnt_ > 0) {
248+
LOG_INFO << "Timeout " << *exit_timeout_secs << ": Found " << conn_cnt_
249+
<< " " << service_name << " service connections";
250+
std::this_thread::sleep_for(std::chrono::seconds(1));
251+
(*exit_timeout_secs)--;
252+
}
253+
}
254+
240255
if (worker_.joinable()) {
241256
// Notify event loop to break via fd write
242257
send(fds_[1], (const char*)&evbase_, sizeof(event_base*), 0);
@@ -249,7 +264,6 @@ HTTPServer::Stop()
249264
event_base_free(evbase_);
250265
return nullptr;
251266
}
252-
253267
return TRITONSERVER_ErrorNew(
254268
TRITONSERVER_ERROR_UNAVAILABLE, "HTTP server is not running.");
255269
}
@@ -267,6 +281,34 @@ HTTPServer::Dispatch(evhtp_request_t* req, void* arg)
267281
(static_cast<HTTPServer*>(arg))->Handle(req);
268282
}
269283

284+
evhtp_res
285+
HTTPServer::NewConnection(evhtp_connection_t* conn, void* arg)
286+
{
287+
HTTPServer* server = static_cast<HTTPServer*>(arg);
288+
{
289+
std::lock_guard<std::mutex> lock(server->conn_mu_);
290+
if (!server->accepting_new_conn_) {
291+
return EVHTP_RES_SERVUNAVAIL; // reset connection
292+
}
293+
server->conn_cnt_++;
294+
}
295+
evhtp_connection_set_hook(
296+
conn, evhtp_hook_on_connection_fini,
297+
(evhtp_hook)(void*)HTTPServer::EndConnection, arg);
298+
return EVHTP_RES_OK;
299+
}
300+
301+
evhtp_res
302+
HTTPServer::EndConnection(evhtp_connection_t* conn, void* arg)
303+
{
304+
HTTPServer* server = static_cast<HTTPServer*>(arg);
305+
{
306+
std::lock_guard<std::mutex> lock(server->conn_mu_);
307+
server->conn_cnt_--;
308+
}
309+
return EVHTP_RES_OK;
310+
}
311+
270312
#ifdef TRITON_ENABLE_METRICS
271313

272314
void

src/http_server.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <list>
3232
#include <map>
3333
#include <memory>
34+
#include <mutex>
3435
#include <queue>
3536
#include <string>
3637
#include <thread>
@@ -80,15 +81,18 @@ class HTTPServer {
8081
virtual ~HTTPServer() { IGNORE_ERR(Stop()); }
8182

8283
TRITONSERVER_Error* Start();
83-
TRITONSERVER_Error* Stop();
84+
TRITONSERVER_Error* Stop(
85+
uint32_t* exit_timeout_secs = nullptr,
86+
const std::string& service_name = "HTTP");
8487

8588
protected:
8689
explicit HTTPServer(
8790
const int32_t port, const bool reuse_port, const std::string& address,
8891
const std::string& header_forward_pattern, const int thread_cnt)
8992
: port_(port), reuse_port_(reuse_port), address_(address),
9093
header_forward_pattern_(header_forward_pattern),
91-
thread_cnt_(thread_cnt), header_forward_regex_(header_forward_pattern_)
94+
thread_cnt_(thread_cnt), header_forward_regex_(header_forward_pattern_),
95+
conn_cnt_(0), accepting_new_conn_(true)
9296
{
9397
}
9498

@@ -100,6 +104,9 @@ class HTTPServer {
100104

101105
static void StopCallback(evutil_socket_t sock, short events, void* arg);
102106

107+
static evhtp_res NewConnection(evhtp_connection_t* conn, void* arg);
108+
static evhtp_res EndConnection(evhtp_connection_t* conn, void* arg);
109+
103110
int32_t port_;
104111
bool reuse_port_;
105112
std::string address_;
@@ -112,6 +119,10 @@ class HTTPServer {
112119
std::thread worker_;
113120
evutil_socket_t fds_[2];
114121
event* break_ev_;
122+
123+
std::mutex conn_mu_;
124+
uint32_t conn_cnt_;
125+
bool accepting_new_conn_;
115126
};
116127

117128
#ifdef TRITON_ENABLE_METRICS

src/main.cc

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -300,13 +300,13 @@ StartEndpoints(
300300
}
301301

302302
bool
303-
StopEndpoints()
303+
StopEndpoints(uint32_t* exit_timeout_secs)
304304
{
305305
bool ret = true;
306306

307307
#ifdef TRITON_ENABLE_HTTP
308308
if (g_http_service) {
309-
TRITONSERVER_Error* err = g_http_service->Stop();
309+
TRITONSERVER_Error* err = g_http_service->Stop(exit_timeout_secs);
310310
if (err != nullptr) {
311311
LOG_TRITONSERVER_ERROR(err, "failed to stop HTTP service");
312312
ret = false;
@@ -316,6 +316,17 @@ StopEndpoints()
316316
}
317317
#endif // TRITON_ENABLE_HTTP
318318

319+
return ret;
320+
}
321+
322+
bool
323+
StopEndpoints()
324+
{
325+
bool ret = true;
326+
327+
// TODO: Add support for 'exit_timeout_secs' to the endpoints below and move
328+
// them to the 'StopEndpoints(uint32_t* exit_timeout_secs)' function above.
329+
319330
#ifdef TRITON_ENABLE_GRPC
320331
if (g_grpc_service) {
321332
TRITONSERVER_Error* err = g_grpc_service->Stop();
@@ -509,6 +520,11 @@ main(int argc, char** argv)
509520
triton::server::signal_exit_cv_.wait_for(lock, wait_timeout);
510521
}
511522

523+
// Stop the HTTP[, gRPC, and metrics] endpoints, and update exit timeout.
524+
uint32_t exit_timeout_secs = g_triton_params.exit_timeout_secs_;
525+
StopEndpoints(&exit_timeout_secs);
526+
TRITONSERVER_ServerSetExitTimeout(server_ptr, exit_timeout_secs);
527+
512528
TRITONSERVER_Error* stop_err = TRITONSERVER_ServerStop(server_ptr);
513529

514530
// If unable to gracefully stop the server then Triton threads and
@@ -519,8 +535,10 @@ main(int argc, char** argv)
519535
exit(1);
520536
}
521537

522-
// Stop tracing and the HTTP, GRPC, and metrics endpoints.
538+
// Stop gRPC and metrics endpoints that do not yet support exit timeout.
523539
StopEndpoints();
540+
541+
// Stop tracing.
524542
StopTracing(&trace_manager);
525543

526544
#ifdef TRITON_ENABLE_ASAN

0 commit comments

Comments
 (0)