Skip to content

Commit

Permalink
Add timeout to client apis and tests (#6546)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbkyang-nvi authored Nov 16, 2023
1 parent 1c20826 commit fb5bc9f
Show file tree
Hide file tree
Showing 5 changed files with 474 additions and 47 deletions.
3 changes: 0 additions & 3 deletions Dockerfile.sdk
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.10-py3-min
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
ARG TRITON_COMMON_REPO_TAG=main
ARG TRITON_CORE_REPO_TAG=main
ARG TRITON_BACKEND_REPO_TAG=main
ARG TRITON_THIRD_PARTY_REPO_TAG=main
ARG TRITON_MODEL_ANALYZER_REPO_TAG=main
ARG TRITON_ENABLE_GPU=ON
Expand Down Expand Up @@ -107,7 +106,6 @@ RUN rm -f /usr/bin/python && \
ARG TRITON_CLIENT_REPO_SUBDIR
ARG TRITON_COMMON_REPO_TAG
ARG TRITON_CORE_REPO_TAG
ARG TRITON_BACKEND_REPO_TAG
ARG TRITON_THIRD_PARTY_REPO_TAG
ARG TRITON_ENABLE_GPU
ARG JAVA_BINDINGS_MAVEN_VERSION
Expand All @@ -123,7 +121,6 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
-DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
-DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
-DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
-DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
-DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@

import numpy as np
import test_util as tu
import tritongrpcclient as grpcclient
import tritonhttpclient as httpclient
from tritonclientutils import InferenceServerException
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException


class UserData:
Expand All @@ -54,10 +54,12 @@ def callback(user_data, result, error):
user_data._completed_requests.put(result)


class ClientTimeoutTest(tu.TestResultCollector):
class ClientInferTimeoutTest(tu.TestResultCollector):
def setUp(self):
self.model_name_ = "custom_identity_int32"
self.input0_data_ = np.array([[10]], dtype=np.int32)
self.input0_data_byte_size_ = 32
self.INFER_SMALL_INTERVAL = 2.0 # seconds for a timeout

def _prepare_request(self, protocol):
if protocol == "grpc":
Expand Down Expand Up @@ -118,7 +120,7 @@ def test_grpc_async_infer(self):
inputs=self.inputs_,
callback=partial(callback, user_data),
outputs=self.outputs_,
client_timeout=2,
client_timeout=self.INFER_SMALL_INTERVAL,
)
data_item = user_data._completed_requests.get()
if type(data_item) == InferenceServerException:
Expand Down Expand Up @@ -190,7 +192,9 @@ def test_http_infer(self):
# response. Expect an exception for small timeout values.
with self.assertRaises(socket.timeout) as cm:
triton_client = httpclient.InferenceServerClient(
url="localhost:8000", verbose=True, network_timeout=2.0
url="localhost:8000",
verbose=True,
network_timeout=self.INFER_SMALL_INTERVAL,
)
_ = triton_client.infer(
model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
Expand All @@ -216,7 +220,9 @@ def test_http_async_infer(self):
# response. Expect an exception for small timeout values.
with self.assertRaises(socket.timeout) as cm:
triton_client = httpclient.InferenceServerClient(
url="localhost:8000", verbose=True, network_timeout=2.0
url="localhost:8000",
verbose=True,
network_timeout=self.INFER_SMALL_INTERVAL,
)
async_request = triton_client.async_infer(
model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
Expand Down
Loading

0 comments on commit fb5bc9f

Please sign in to comment.