diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py index ccd4d985b3..ba4dc25ecb 100644 --- a/qa/L0_backend_python/python_test.py +++ b/qa/L0_backend_python/python_test.py @@ -40,6 +40,8 @@ from tritonclient.utils import * import tritonclient.http as httpclient +TEST_JETSON = bool(int(os.environ.get('TEST_JETSON', 0))) + class PythonTest(tu.TestResultCollector): @@ -59,6 +61,14 @@ def _infer_help(self, model_name, shape, data_type): output0 = result.as_numpy('OUTPUT0') self.assertTrue(np.all(input_data_0 == output0)) + def _create_cuda_region(self, client, size, name): + import tritonclient.utils.cuda_shared_memory as cuda_shared_memory + shm0_handle = cuda_shared_memory.create_shared_memory_region( + name, byte_size=size, device_id=0) + client.register_cuda_shared_memory( + name, cuda_shared_memory.get_raw_handle(shm0_handle), 0, size) + return shm0_handle + def _optional_input_infer(self, model_name, has_input0, has_input1): with httpclient.InferenceServerClient("localhost:8000") as client: shape = (1,) @@ -144,6 +154,69 @@ def test_growth_error(self): with self._shm_leak_detector.Probe() as shm_probe: self._infer_help(model_name, shape, dtype) + # GPU tensors are not supported on jetson + # CUDA Shared memory is not supported on jetson + if not TEST_JETSON: + + def test_gpu_tensor_error(self): + import tritonclient.utils.cuda_shared_memory as cuda_shared_memory + model_name = 'identity_bool' + with httpclient.InferenceServerClient("localhost:8000") as client: + input_data = np.array([[True] * 1000], dtype=bool) + inputs = [ + httpclient.InferInput("INPUT0", input_data.shape, + np_to_triton_dtype(input_data.dtype)) + ] + inputs[0].set_data_from_numpy(input_data) + + requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')] + + # intentionally create a shared memory region with not enough size. + client.unregister_cuda_shared_memory() + shm0_handle = self._create_cuda_region(client, 1, + 'output0_data') + + requested_outputs[0].set_shared_memory('output0_data', 1) + with self.assertRaises(InferenceServerException) as ex: + client.infer(model_name, inputs, outputs=requested_outputs) + self.assertIn( + "should be at least 1000 bytes to hold the results", + str(ex.exception)) + client.unregister_cuda_shared_memory() + cuda_shared_memory.destroy_shared_memory_region(shm0_handle) + + def test_dlpack_tensor_error(self): + import tritonclient.utils.cuda_shared_memory as cuda_shared_memory + model_name = 'dlpack_identity' + with httpclient.InferenceServerClient("localhost:8000") as client: + input_data = np.array([[1] * 1000], dtype=np.float32) + inputs = [ + httpclient.InferInput("INPUT0", input_data.shape, + np_to_triton_dtype(input_data.dtype)) + ] + + requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')] + input_data_size = input_data.itemsize * input_data.size + client.unregister_cuda_shared_memory() + input_region = self._create_cuda_region(client, input_data_size, + 'input0_data') + inputs[0].set_shared_memory('input0_data', input_data_size) + cuda_shared_memory.set_shared_memory_region( + input_region, [input_data]) + + # Intentionally create a small region to trigger an error + shm0_handle = self._create_cuda_region(client, 1, + 'output0_data') + requested_outputs[0].set_shared_memory('output0_data', 1) + + with self.assertRaises(InferenceServerException) as ex: + client.infer(model_name, inputs, outputs=requested_outputs) + self.assertIn( + "should be at least 4000 bytes to hold the results", + str(ex.exception)) + client.unregister_cuda_shared_memory() + cuda_shared_memory.destroy_shared_memory_region(shm0_handle) + def test_async_infer(self): model_name = "identity_uint8" request_parallelism = 4 diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index 587d1b8e13..539d3b14d8 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -53,7 +53,7 @@ SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=524 PYTHON_BACKEND_BRANCH=$PYTHON_BACKEND_REPO_TAG CLIENT_PY=./python_test.py CLIENT_LOG="./client.log" -EXPECTED_NUM_TESTS="9" +EXPECTED_NUM_TESTS="11" TEST_RESULT_FILE='test_results.txt' SERVER_LOG="./inference_server.log" source ../common/util.sh @@ -128,9 +128,16 @@ mkdir -p models/string_fixed/1/ cp ../python_models/string_fixed/model.py ./models/string_fixed/1/ cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed +mkdir -p models/dlpack_identity/1/ +cp ../python_models/dlpack_identity/model.py ./models/dlpack_identity/1/ +cp ../python_models/dlpack_identity/config.pbtxt ./models/dlpack_identity + # Skip torch install on Jetson since it is already installed. if [ "$TEST_JETSON" == "0" ]; then pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html +else + # GPU tensor tests are disabled on jetson + EXPECTED_NUM_TESTS=9 fi prev_num_pages=`get_shm_pages`