Add timeout to client apis and tests (#6546)

Client PR: triton-inference-server/client#429
triton-inference-server · Nov 16, 2023 · fb5bc9f · fb5bc9f
1 parent 1c20826
commit fb5bc9f
Show file tree

Hide file tree

Showing 5 changed files with 474 additions and 47 deletions.
diff --git a/Dockerfile.sdk b/Dockerfile.sdk
@@ -34,7 +34,6 @@ ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.10-py3-min
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_COMMON_REPO_TAG=main
 ARG TRITON_CORE_REPO_TAG=main
-ARG TRITON_BACKEND_REPO_TAG=main
 ARG TRITON_THIRD_PARTY_REPO_TAG=main
 ARG TRITON_MODEL_ANALYZER_REPO_TAG=main
 ARG TRITON_ENABLE_GPU=ON
@@ -107,7 +106,6 @@ RUN rm -f /usr/bin/python && \
 ARG TRITON_CLIENT_REPO_SUBDIR
 ARG TRITON_COMMON_REPO_TAG
 ARG TRITON_CORE_REPO_TAG
-ARG TRITON_BACKEND_REPO_TAG
 ARG TRITON_THIRD_PARTY_REPO_TAG
 ARG TRITON_ENABLE_GPU
 ARG JAVA_BINDINGS_MAVEN_VERSION
@@ -123,7 +121,6 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
           -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
           -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
           -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
-          -DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
           -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
           -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
           -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \

diff --git a/qa/L0_client_timeout/client_timeout_test.py → ...ient_timeout/client_infer_timeout_test.py b/qa/L0_client_timeout/client_timeout_test.py → ...ient_timeout/client_infer_timeout_test.py
@@ -37,9 +37,9 @@
 
 import numpy as np
 import test_util as tu
-import tritongrpcclient as grpcclient
-import tritonhttpclient as httpclient
-from tritonclientutils import InferenceServerException
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from tritonclient.utils import InferenceServerException
 
 
 class UserData:
@@ -54,10 +54,12 @@ def callback(user_data, result, error):
         user_data._completed_requests.put(result)
 
 
-class ClientTimeoutTest(tu.TestResultCollector):
+class ClientInferTimeoutTest(tu.TestResultCollector):
     def setUp(self):
         self.model_name_ = "custom_identity_int32"
         self.input0_data_ = np.array([[10]], dtype=np.int32)
+        self.input0_data_byte_size_ = 32
+        self.INFER_SMALL_INTERVAL = 2.0  # seconds for a timeout
 
     def _prepare_request(self, protocol):
         if protocol == "grpc":
@@ -118,7 +120,7 @@ def test_grpc_async_infer(self):
                 inputs=self.inputs_,
                 callback=partial(callback, user_data),
                 outputs=self.outputs_,
-                client_timeout=2,
+                client_timeout=self.INFER_SMALL_INTERVAL,
             )
             data_item = user_data._completed_requests.get()
             if type(data_item) == InferenceServerException:
@@ -190,7 +192,9 @@ def test_http_infer(self):
         # response. Expect an exception for small timeout values.
         with self.assertRaises(socket.timeout) as cm:
             triton_client = httpclient.InferenceServerClient(
-                url="localhost:8000", verbose=True, network_timeout=2.0
+                url="localhost:8000",
+                verbose=True,
+                network_timeout=self.INFER_SMALL_INTERVAL,
             )
             _ = triton_client.infer(
                 model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
@@ -216,7 +220,9 @@ def test_http_async_infer(self):
         # response. Expect an exception for small timeout values.
         with self.assertRaises(socket.timeout) as cm:
             triton_client = httpclient.InferenceServerClient(
-                url="localhost:8000", verbose=True, network_timeout=2.0
+                url="localhost:8000",
+                verbose=True,
+                network_timeout=self.INFER_SMALL_INTERVAL,
             )
             async_request = triton_client.async_infer(
                 model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_