Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add timeout to client apis and tests #6546

Merged
merged 15 commits into from
Nov 16, 2023
3 changes: 0 additions & 3 deletions Dockerfile.sdk
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.10-py3-min
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
ARG TRITON_COMMON_REPO_TAG=main
ARG TRITON_CORE_REPO_TAG=main
ARG TRITON_BACKEND_REPO_TAG=main
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't need backend repo in client build

ARG TRITON_THIRD_PARTY_REPO_TAG=main
ARG TRITON_MODEL_ANALYZER_REPO_TAG=main
ARG TRITON_ENABLE_GPU=ON
Expand Down Expand Up @@ -107,7 +106,6 @@ RUN rm -f /usr/bin/python && \
ARG TRITON_CLIENT_REPO_SUBDIR
ARG TRITON_COMMON_REPO_TAG
ARG TRITON_CORE_REPO_TAG
ARG TRITON_BACKEND_REPO_TAG
ARG TRITON_THIRD_PARTY_REPO_TAG
ARG TRITON_ENABLE_GPU
ARG JAVA_BINDINGS_MAVEN_VERSION
Expand All @@ -123,7 +121,6 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
-DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
-DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
-DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
-DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
-DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@

import numpy as np
import test_util as tu
import tritongrpcclient as grpcclient
import tritonhttpclient as httpclient
from tritonclientutils import InferenceServerException
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException


class UserData:
Expand All @@ -54,10 +54,12 @@ def callback(user_data, result, error):
user_data._completed_requests.put(result)


class ClientTimeoutTest(tu.TestResultCollector):
class ClientInferTimeoutTest(tu.TestResultCollector):
def setUp(self):
self.model_name_ = "custom_identity_int32"
self.input0_data_ = np.array([[10]], dtype=np.int32)
self.input0_data_byte_size_ = 32
self.INFER_SMALL_INTERVAL = 2.0 # seconds for a timeout

def _prepare_request(self, protocol):
if protocol == "grpc":
Expand Down Expand Up @@ -118,7 +120,7 @@ def test_grpc_async_infer(self):
inputs=self.inputs_,
callback=partial(callback, user_data),
outputs=self.outputs_,
client_timeout=2,
client_timeout=self.INFER_SMALL_INTERVAL,
)
data_item = user_data._completed_requests.get()
if type(data_item) == InferenceServerException:
Expand Down Expand Up @@ -190,7 +192,9 @@ def test_http_infer(self):
# response. Expect an exception for small timeout values.
with self.assertRaises(socket.timeout) as cm:
triton_client = httpclient.InferenceServerClient(
url="localhost:8000", verbose=True, network_timeout=2.0
url="localhost:8000",
verbose=True,
network_timeout=self.INFER_SMALL_INTERVAL,
)
_ = triton_client.infer(
model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
Expand All @@ -216,7 +220,9 @@ def test_http_async_infer(self):
# response. Expect an exception for small timeout values.
with self.assertRaises(socket.timeout) as cm:
triton_client = httpclient.InferenceServerClient(
url="localhost:8000", verbose=True, network_timeout=2.0
url="localhost:8000",
verbose=True,
network_timeout=self.INFER_SMALL_INTERVAL,
)
async_request = triton_client.async_infer(
model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
Expand Down
Loading
Loading