-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add timeout to client apis and tests #6546
Merged
Merged
Changes from 10 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
8136e11
add grpc test
jbkyang-nvi 18a059e
update to working python tests
jbkyang-nvi 524ce16
temp commit for client api tests
jbkyang-nvi 753a71e
update tests
jbkyang-nvi 491e7f1
fix utils bug
jbkyang-nvi cd6b206
add python test for everything
jbkyang-nvi 1d33f34
finalize testing and make utils take float instead of only int
jbkyang-nvi 587885d
address redundant includes
jbkyang-nvi 8963361
changed location of delay
jbkyang-nvi 2cefaf6
update test
jbkyang-nvi 3ad2472
merge main
jbkyang-nvi 12e6497
addressed more comments
jbkyang-nvi 5dcec9e
Merge branch 'main' into kyang-timeout-to-client-apis
jbkyang-nvi 8662299
Merge branch 'main' into kyang-timeout-to-client-apis
jbkyang-nvi 7dcfcda
fix extra dependencies
jbkyang-nvi File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,9 +37,9 @@ | |
|
||
import numpy as np | ||
import test_util as tu | ||
import tritongrpcclient as grpcclient | ||
import tritonhttpclient as httpclient | ||
from tritonclientutils import InferenceServerException | ||
import tritonclient.grpc as grpcclient | ||
import tritonclient.http as httpclient | ||
from tritonclient.utils import InferenceServerException | ||
|
||
|
||
class UserData: | ||
|
@@ -58,6 +58,299 @@ class ClientTimeoutTest(tu.TestResultCollector): | |
def setUp(self): | ||
self.model_name_ = "custom_identity_int32" | ||
self.input0_data_ = np.array([[10]], dtype=np.int32) | ||
self.input0_data_byte_size_ = 32 | ||
self.SMALL_INTERVAL = 0.1 # seconds for a timeout | ||
self.INFER_SMALL_INTERVAL = 2.0 # seconds for a timeout | ||
self.NORMAL_INTERVAL = 5.0 # seconds for server to load then receive request | ||
|
||
def test_grpc_server_live(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move them to a new non_infer_client_timeout_test.py? |
||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.is_server_live(client_timeout=self.SMALL_INTERVAL) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
self.assertTrue( | ||
triton_client.is_server_live(client_timeout=self.NORMAL_INTERVAL) | ||
) | ||
|
||
def test_grpc_is_server_ready(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.is_server_ready(client_timeout=self.SMALL_INTERVAL) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
self.assertTrue( | ||
triton_client.is_server_ready(client_timeout=self.NORMAL_INTERVAL) | ||
) | ||
|
||
def test_grpc_is_model_ready(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.is_model_ready( | ||
model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
self.assertTrue( | ||
triton_client.is_model_ready( | ||
model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
) | ||
|
||
def test_grpc_get_server_metadata(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.get_server_metadata(client_timeout=self.SMALL_INTERVAL) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
|
||
triton_client.get_server_metadata(client_timeout=self.NORMAL_INTERVAL) | ||
|
||
def test_grpc_get_model_metadata(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.get_model_metadata( | ||
model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.get_model_metadata( | ||
model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
|
||
def test_grpc_get_model_config(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.get_model_config( | ||
model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.get_model_config( | ||
model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
|
||
def test_grpc_model_repository_index(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.get_model_repository_index( | ||
client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.get_model_repository_index(client_timeout=self.NORMAL_INTERVAL) | ||
|
||
def test_grpc_load_model(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
triton_client.unload_model(model_name=self.model_name_) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.load_model( | ||
model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.unload_model( | ||
model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
triton_client.load_model( | ||
model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
|
||
def test_grpc_unload_model(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.unload_model( | ||
model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.load_model(model_name=self.model_name_) | ||
triton_client.unload_model( | ||
model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
triton_client.load_model(model_name=self.model_name_) | ||
|
||
def test_grpc_get_inference_statistics(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.get_inference_statistics( | ||
model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.get_inference_statistics( | ||
model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
|
||
def test_grpc_update_trace_settings(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.update_trace_settings( | ||
model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.update_trace_settings( | ||
model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
|
||
def test_grpc_get_trace_settings(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.get_trace_settings( | ||
model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.get_trace_settings( | ||
model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
|
||
def test_grpc_update_log_settings(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
settings = {} | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.update_log_settings( | ||
settings=settings, client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.update_log_settings( | ||
settings=settings, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
|
||
def test_grpc_get_log_settings(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.get_log_settings( | ||
as_json=True, client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.get_log_settings( | ||
as_json=True, client_timeout=self.NORMAL_INTERVAL | ||
) | ||
|
||
def test_grpc_get_system_shared_memory_status(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.get_system_shared_memory_status( | ||
client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.get_system_shared_memory_status( | ||
client_timeout=self.NORMAL_INTERVAL | ||
) | ||
|
||
def test_grpc_register_system_shared_memory(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
triton_client.unregister_system_shared_memory() | ||
import tritonclient.utils.shared_memory as shm | ||
|
||
shm_ip0_handle = shm.create_shared_memory_region( | ||
"input0_data", "/input_simple", self.input0_data_byte_size_ | ||
) | ||
shm.set_shared_memory_region(shm_ip0_handle, [self.input0_data_]) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.register_system_shared_memory( | ||
"input0_data", | ||
"/input_simple", | ||
self.input0_data_byte_size_, | ||
client_timeout=self.SMALL_INTERVAL, | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.unregister_system_shared_memory() | ||
triton_client.register_system_shared_memory( | ||
"input0_data", | ||
"/input_simple", | ||
self.input0_data_byte_size_, | ||
client_timeout=self.NORMAL_INTERVAL, | ||
) | ||
triton_client.unregister_system_shared_memory() | ||
|
||
def test_grpc_unregister_system_shared_memory(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.unregister_system_shared_memory( | ||
client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.unregister_system_shared_memory( | ||
client_timeout=self.NORMAL_INTERVAL | ||
) | ||
|
||
def test_grpc_get_cuda_shared_memory_status(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.get_cuda_shared_memory_status( | ||
client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.get_cuda_shared_memory_status(client_timeout=self.NORMAL_INTERVAL) | ||
|
||
def test_grpc_register_cuda_shared_memory(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
import tritonclient.utils.cuda_shared_memory as cshm | ||
|
||
input_data = np.array([[10]], dtype=np.int32) | ||
byteSize = input_data.itemsize * input_data.size | ||
shm_op0_handle = cshm.create_shared_memory_region( | ||
"dummy_data", byte_size=byteSize, device_id=0 | ||
) | ||
cshm.set_shared_memory_region(shm_op0_handle, [input_data]) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.register_cuda_shared_memory( | ||
"dummy_data", | ||
cshm.get_raw_handle(shm_op0_handle), | ||
device_id=0, | ||
byte_size=byteSize, | ||
client_timeout=self.SMALL_INTERVAL, | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.unregister_cuda_shared_memory() | ||
triton_client.register_cuda_shared_memory( | ||
"dummy_data", | ||
cshm.get_raw_handle(shm_op0_handle), | ||
device_id=0, | ||
byte_size=byteSize, | ||
client_timeout=self.NORMAL_INTERVAL, | ||
) | ||
cshm.destroy_shared_memory_region(shm_op0_handle) | ||
|
||
def test_grpc_unregister_cuda_shared_memory(self): | ||
triton_client = grpcclient.InferenceServerClient( | ||
url="localhost:8001", verbose=True | ||
) | ||
with self.assertRaises(InferenceServerException) as cm: | ||
_ = triton_client.unregister_cuda_shared_memory( | ||
client_timeout=self.SMALL_INTERVAL | ||
) | ||
self.assertIn("Deadline Exceeded", str(cm.exception)) | ||
triton_client.unregister_cuda_shared_memory(client_timeout=self.NORMAL_INTERVAL) | ||
|
||
def _prepare_request(self, protocol): | ||
if protocol == "grpc": | ||
|
@@ -118,7 +411,7 @@ def test_grpc_async_infer(self): | |
inputs=self.inputs_, | ||
callback=partial(callback, user_data), | ||
outputs=self.outputs_, | ||
client_timeout=2, | ||
client_timeout=self.INFER_SMALL_INTERVAL, | ||
) | ||
data_item = user_data._completed_requests.get() | ||
if type(data_item) == InferenceServerException: | ||
|
@@ -190,7 +483,9 @@ def test_http_infer(self): | |
# response. Expect an exception for small timeout values. | ||
with self.assertRaises(socket.timeout) as cm: | ||
triton_client = httpclient.InferenceServerClient( | ||
url="localhost:8000", verbose=True, network_timeout=2.0 | ||
url="localhost:8000", | ||
verbose=True, | ||
network_timeout=self.INFER_SMALL_INTERVAL, | ||
) | ||
_ = triton_client.infer( | ||
model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_ | ||
|
@@ -216,7 +511,9 @@ def test_http_async_infer(self): | |
# response. Expect an exception for small timeout values. | ||
with self.assertRaises(socket.timeout) as cm: | ||
triton_client = httpclient.InferenceServerClient( | ||
url="localhost:8000", verbose=True, network_timeout=2.0 | ||
url="localhost:8000", | ||
verbose=True, | ||
network_timeout=self.INFER_SMALL_INTERVAL, | ||
) | ||
async_request = triton_client.async_infer( | ||
model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_ | ||
|
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
don't need backend repo in client build