Skip to content

Commit

Permalink
Add testing for GPU tensor error handling (triton-inference-server#5871)
Browse files Browse the repository at this point in the history
* Add testing for GPU tensor error handling

* Fix up

* Remove exit 0

* Fix jetson

* Fix up
  • Loading branch information
Tabrizian authored Jun 29, 2023
1 parent 1fe247d commit 4ba3871
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 1 deletion.
73 changes: 73 additions & 0 deletions qa/L0_backend_python/python_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
from tritonclient.utils import *
import tritonclient.http as httpclient

TEST_JETSON = bool(int(os.environ.get('TEST_JETSON', 0)))


class PythonTest(tu.TestResultCollector):

Expand All @@ -59,6 +61,14 @@ def _infer_help(self, model_name, shape, data_type):
output0 = result.as_numpy('OUTPUT0')
self.assertTrue(np.all(input_data_0 == output0))

def _create_cuda_region(self, client, size, name):
import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
shm0_handle = cuda_shared_memory.create_shared_memory_region(
name, byte_size=size, device_id=0)
client.register_cuda_shared_memory(
name, cuda_shared_memory.get_raw_handle(shm0_handle), 0, size)
return shm0_handle

def _optional_input_infer(self, model_name, has_input0, has_input1):
with httpclient.InferenceServerClient("localhost:8000") as client:
shape = (1,)
Expand Down Expand Up @@ -144,6 +154,69 @@ def test_growth_error(self):
with self._shm_leak_detector.Probe() as shm_probe:
self._infer_help(model_name, shape, dtype)

# GPU tensors are not supported on jetson
# CUDA Shared memory is not supported on jetson
if not TEST_JETSON:

def test_gpu_tensor_error(self):
import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
model_name = 'identity_bool'
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data = np.array([[True] * 1000], dtype=bool)
inputs = [
httpclient.InferInput("INPUT0", input_data.shape,
np_to_triton_dtype(input_data.dtype))
]
inputs[0].set_data_from_numpy(input_data)

requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')]

# intentionally create a shared memory region with not enough size.
client.unregister_cuda_shared_memory()
shm0_handle = self._create_cuda_region(client, 1,
'output0_data')

requested_outputs[0].set_shared_memory('output0_data', 1)
with self.assertRaises(InferenceServerException) as ex:
client.infer(model_name, inputs, outputs=requested_outputs)
self.assertIn(
"should be at least 1000 bytes to hold the results",
str(ex.exception))
client.unregister_cuda_shared_memory()
cuda_shared_memory.destroy_shared_memory_region(shm0_handle)

def test_dlpack_tensor_error(self):
import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
model_name = 'dlpack_identity'
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data = np.array([[1] * 1000], dtype=np.float32)
inputs = [
httpclient.InferInput("INPUT0", input_data.shape,
np_to_triton_dtype(input_data.dtype))
]

requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')]
input_data_size = input_data.itemsize * input_data.size
client.unregister_cuda_shared_memory()
input_region = self._create_cuda_region(client, input_data_size,
'input0_data')
inputs[0].set_shared_memory('input0_data', input_data_size)
cuda_shared_memory.set_shared_memory_region(
input_region, [input_data])

# Intentionally create a small region to trigger an error
shm0_handle = self._create_cuda_region(client, 1,
'output0_data')
requested_outputs[0].set_shared_memory('output0_data', 1)

with self.assertRaises(InferenceServerException) as ex:
client.infer(model_name, inputs, outputs=requested_outputs)
self.assertIn(
"should be at least 4000 bytes to hold the results",
str(ex.exception))
client.unregister_cuda_shared_memory()
cuda_shared_memory.destroy_shared_memory_region(shm0_handle)

def test_async_infer(self):
model_name = "identity_uint8"
request_parallelism = 4
Expand Down
9 changes: 8 additions & 1 deletion qa/L0_backend_python/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=524
PYTHON_BACKEND_BRANCH=$PYTHON_BACKEND_REPO_TAG
CLIENT_PY=./python_test.py
CLIENT_LOG="./client.log"
EXPECTED_NUM_TESTS="9"
EXPECTED_NUM_TESTS="11"
TEST_RESULT_FILE='test_results.txt'
SERVER_LOG="./inference_server.log"
source ../common/util.sh
Expand Down Expand Up @@ -128,9 +128,16 @@ mkdir -p models/string_fixed/1/
cp ../python_models/string_fixed/model.py ./models/string_fixed/1/
cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed

mkdir -p models/dlpack_identity/1/
cp ../python_models/dlpack_identity/model.py ./models/dlpack_identity/1/
cp ../python_models/dlpack_identity/config.pbtxt ./models/dlpack_identity

# Skip torch install on Jetson since it is already installed.
if [ "$TEST_JETSON" == "0" ]; then
pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
else
# GPU tensor tests are disabled on jetson
EXPECTED_NUM_TESTS=9
fi

prev_num_pages=`get_shm_pages`
Expand Down

0 comments on commit 4ba3871

Please sign in to comment.