Skip to content

Commit

Permalink
Add shared memory leak detection to Python backend tests (triton-infe…
Browse files Browse the repository at this point in the history
…rence-server#4122)

* Add shared memory leak detection to Python backend tests

* Add BLS tests to shared memory leak detection

* Remove delays for the tests that involved growing the region

* Update copyrights
  • Loading branch information
Tabrizian committed Apr 1, 2022
1 parent 99349b1 commit a40edf1
Show file tree
Hide file tree
Showing 10 changed files with 423 additions and 296 deletions.
11 changes: 11 additions & 0 deletions Dockerfile.QA
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,19 @@ RUN cd ${TRITONTMP_DIR}/tritonbuild/identity && \
-DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
make -j16 install

# L0_backend_python test require triton_shm_monitor
RUN cd ${TRITONTMP_DIR}/tritonbuild/python && \
rm -rf install build && mkdir build && cd build && \
cmake -DCMAKE_INSTALL_PREFIX:PATH=${TRITONTMP_DIR}/tritonbuild/python/install \
-DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} \
-DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} \
-DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
make -j18 triton-shm-monitor install

RUN cp ${TRITONTMP_DIR}/tritonbuild/identity/install/backends/identity/libtriton_identity.so \
qa/L0_lifecycle/. && \
cp ${TRITONTMP_DIR}/tritonbuild/python/install/backends/python/triton_shm_monitor*.so \
qa/common/. && \
mkdir -p qa/L0_perf_nomodel/custom_models/custom_zero_1_float32/1 && \
mkdir -p qa/L0_perf_pyclients/custom_models/custom_zero_1_int32/1 && \
mkdir -p qa/L0_infer_shm && \
Expand Down
84 changes: 47 additions & 37 deletions qa/L0_backend_python/ensemble/ensemble_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -28,6 +28,7 @@
sys.path.append("../../common")

import test_util as tu
import shm_util
import tritonclient.http as httpclient
from tritonclient.utils import *
import numpy as np
Expand All @@ -36,49 +37,58 @@

class EnsembleTest(tu.TestResultCollector):

def setUp(self):
self._shm_leak_detector = shm_util.ShmLeakDetector()

def test_ensemble(self):
model_name = "ensemble"
shape = [16]
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data_0 = np.random.random(shape).astype(np.float32)
input_data_1 = np.random.random(shape).astype(np.float32)
inputs = [
httpclient.InferInput("INPUT0", input_data_0.shape,
np_to_triton_dtype(input_data_0.dtype)),
httpclient.InferInput("INPUT1", input_data_1.shape,
np_to_triton_dtype(input_data_1.dtype))
]
inputs[0].set_data_from_numpy(input_data_0)
inputs[1].set_data_from_numpy(input_data_1)
result = client.infer(model_name, inputs)
output0 = result.as_numpy('OUTPUT0')
output1 = result.as_numpy('OUTPUT1')
self.assertIsNotNone(output0)
self.assertIsNotNone(output1)
with self._shm_leak_detector.Probe() as shm_probe:
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data_0 = np.random.random(shape).astype(np.float32)
input_data_1 = np.random.random(shape).astype(np.float32)
inputs = [
httpclient.InferInput(
"INPUT0", input_data_0.shape,
np_to_triton_dtype(input_data_0.dtype)),
httpclient.InferInput(
"INPUT1", input_data_1.shape,
np_to_triton_dtype(input_data_1.dtype))
]
inputs[0].set_data_from_numpy(input_data_0)
inputs[1].set_data_from_numpy(input_data_1)
result = client.infer(model_name, inputs)
output0 = result.as_numpy('OUTPUT0')
output1 = result.as_numpy('OUTPUT1')
self.assertIsNotNone(output0)
self.assertIsNotNone(output1)

self.assertTrue(np.allclose(output0, 2 * input_data_0))
self.assertTrue(np.allclose(output1, 2 * input_data_1))
self.assertTrue(np.allclose(output0, 2 * input_data_0))
self.assertTrue(np.allclose(output1, 2 * input_data_1))

model_name = "ensemble_gpu"
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data_0 = np.random.random(shape).astype(np.float32)
input_data_1 = np.random.random(shape).astype(np.float32)
inputs = [
httpclient.InferInput("INPUT0", input_data_0.shape,
np_to_triton_dtype(input_data_0.dtype)),
httpclient.InferInput("INPUT1", input_data_1.shape,
np_to_triton_dtype(input_data_1.dtype))
]
inputs[0].set_data_from_numpy(input_data_0)
inputs[1].set_data_from_numpy(input_data_1)
result = client.infer(model_name, inputs)
output0 = result.as_numpy('OUTPUT0')
output1 = result.as_numpy('OUTPUT1')
self.assertIsNotNone(output0)
self.assertIsNotNone(output1)
with self._shm_leak_detector.Probe() as shm_probe:
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data_0 = np.random.random(shape).astype(np.float32)
input_data_1 = np.random.random(shape).astype(np.float32)
inputs = [
httpclient.InferInput(
"INPUT0", input_data_0.shape,
np_to_triton_dtype(input_data_0.dtype)),
httpclient.InferInput(
"INPUT1", input_data_1.shape,
np_to_triton_dtype(input_data_1.dtype))
]
inputs[0].set_data_from_numpy(input_data_0)
inputs[1].set_data_from_numpy(input_data_1)
result = client.infer(model_name, inputs)
output0 = result.as_numpy('OUTPUT0')
output1 = result.as_numpy('OUTPUT1')
self.assertIsNotNone(output0)
self.assertIsNotNone(output1)

self.assertTrue(np.allclose(output0, 2 * input_data_0))
self.assertTrue(np.allclose(output1, 2 * input_data_1))
self.assertTrue(np.allclose(output0, 2 * input_data_0))
self.assertTrue(np.allclose(output1, 2 * input_data_1))


if __name__ == '__main__':
Expand Down
52 changes: 29 additions & 23 deletions qa/L0_backend_python/io/io_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -29,6 +29,7 @@
sys.path.append("../../common")

import test_util as tu
import shm_util
import tritonclient.http as httpclient
from tritonclient.utils import *
import numpy as np
Expand All @@ -37,30 +38,35 @@

class IOTest(tu.TestResultCollector):

def setUp(self):
self._shm_leak_detector = shm_util.ShmLeakDetector()

def test_ensemble_io(self):
model_name = "ensemble_io"
with httpclient.InferenceServerClient("localhost:8000") as client:
input0 = np.random.random([1000]).astype(np.float32)
for model_1_in_gpu in [True, False]:
for model_2_in_gpu in [True, False]:
for model_3_in_gpu in [True, False]:
gpu_output = np.asarray(
[model_1_in_gpu, model_2_in_gpu, model_3_in_gpu],
dtype=bool)
inputs = [
httpclient.InferInput(
"INPUT0", input0.shape,
np_to_triton_dtype(input0.dtype)),
httpclient.InferInput(
"GPU_OUTPUT", gpu_output.shape,
np_to_triton_dtype(gpu_output.dtype))
]
inputs[0].set_data_from_numpy(input0)
inputs[1].set_data_from_numpy(gpu_output)
result = client.infer(model_name, inputs)
output0 = result.as_numpy('OUTPUT0')
self.assertIsNotNone(output0)
self.assertTrue(np.all(output0 == input0))
with self._shm_leak_detector.Probe() as shm_probe:
with httpclient.InferenceServerClient("localhost:8000") as client:
input0 = np.random.random([1000]).astype(np.float32)
for model_1_in_gpu in [True, False]:
for model_2_in_gpu in [True, False]:
for model_3_in_gpu in [True, False]:
gpu_output = np.asarray([
model_1_in_gpu, model_2_in_gpu, model_3_in_gpu
],
dtype=bool)
inputs = [
httpclient.InferInput(
"INPUT0", input0.shape,
np_to_triton_dtype(input0.dtype)),
httpclient.InferInput(
"GPU_OUTPUT", gpu_output.shape,
np_to_triton_dtype(gpu_output.dtype))
]
inputs[0].set_data_from_numpy(input0)
inputs[1].set_data_from_numpy(gpu_output)
result = client.infer(model_name, inputs)
output0 = result.as_numpy('OUTPUT0')
self.assertIsNotNone(output0)
self.assertTrue(np.all(output0 == input0))


if __name__ == '__main__':
Expand Down
156 changes: 82 additions & 74 deletions qa/L0_backend_python/lifecycle/lifecycle_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -28,6 +28,7 @@
sys.path.append("../../common")

import test_util as tu
import shm_util
from functools import partial
import tritonclient.http as httpclient
import tritonclient.grpc as grpcclient
Expand All @@ -52,6 +53,9 @@ def callback(user_data, result, error):

class LifecycleTest(tu.TestResultCollector):

def setUp(self):
self._shm_leak_detector = shm_util.ShmLeakDetector()

def test_batch_error(self):
# The execute_error model returns an error for the first request and
# sucessfully processes the second request. This is making sure that
Expand All @@ -63,87 +67,91 @@ def test_batch_error(self):
triton_client = grpcclient.InferenceServerClient("localhost:8001")
triton_client.start_stream(callback=partial(callback, user_data))

input_datas = []
for i in range(number_of_requests):
input_data = np.random.randn(*shape).astype(np.float32)
input_datas.append(input_data)
inputs = [
grpcclient.InferInput("IN", input_data.shape,
np_to_triton_dtype(input_data.dtype))
]
inputs[0].set_data_from_numpy(input_data)
triton_client.async_stream_infer(model_name=model_name,
inputs=inputs)

for i in range(number_of_requests):
result = user_data._completed_requests.get()
if i == 0:
self.assertIs(type(result), InferenceServerException)
continue

print(result)
output_data = result.as_numpy("OUT")
self.assertIsNotNone(output_data, "error: expected 'OUT'")
self.assertTrue(
np.array_equal(output_data, input_datas[i]),
"error: expected output {} to match input {}".format(
output_data, input_datas[i]))
with self._shm_leak_detector.Probe() as shm_probe:
input_datas = []
for i in range(number_of_requests):
input_data = np.random.randn(*shape).astype(np.float32)
input_datas.append(input_data)
inputs = [
grpcclient.InferInput("IN", input_data.shape,
np_to_triton_dtype(input_data.dtype))
]
inputs[0].set_data_from_numpy(input_data)
triton_client.async_stream_infer(model_name=model_name,
inputs=inputs)

for i in range(number_of_requests):
result = user_data._completed_requests.get()
if i == 0:
self.assertIs(type(result), InferenceServerException)
continue

print(result)
output_data = result.as_numpy("OUT")
self.assertIsNotNone(output_data, "error: expected 'OUT'")
self.assertTrue(
np.array_equal(output_data, input_datas[i]),
"error: expected output {} to match input {}".format(
output_data, input_datas[i]))

def test_infer_pymodel_error(self):
model_name = "wrong_model"
shape = [2, 2]
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data = (16384 * np.random.randn(*shape)).astype(np.uint32)
inputs = [
httpclient.InferInput("IN", input_data.shape,
np_to_triton_dtype(input_data.dtype))
]
inputs[0].set_data_from_numpy(input_data)
try:
client.infer(model_name, inputs)
except InferenceServerException as e:
print(e.message())
self.assertTrue(
e.message().startswith(
"Failed to process the request(s) for model instance"),
"Exception message is not correct")
else:
self.assertTrue(
False,
"Wrong exception raised or did not raise an exception")

with self._shm_leak_detector.Probe() as shm_probe:
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data = (16384 * np.random.randn(*shape)).astype(np.uint32)
inputs = [
httpclient.InferInput("IN", input_data.shape,
np_to_triton_dtype(input_data.dtype))
]
inputs[0].set_data_from_numpy(input_data)
try:
client.infer(model_name, inputs)
except InferenceServerException as e:
print(e.message())
self.assertTrue(
e.message().startswith(
"Failed to process the request(s) for model instance"
), "Exception message is not correct")
else:
self.assertTrue(
False,
"Wrong exception raised or did not raise an exception")

def test_incorrect_execute_return(self):
model_name = 'execute_return_error'
shape = [1, 1]
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data = (5 * np.random.randn(*shape)).astype(np.float32)
inputs = [
httpclient.InferInput("INPUT", input_data.shape,
np_to_triton_dtype(input_data.dtype))
]
inputs[0].set_data_from_numpy(input_data)

# The first request to this model will return None.
with self.assertRaises(InferenceServerException) as e:
client.infer(model_name, inputs)

self.assertTrue(
str(e.exception).startswith(
"Failed to process the request(s) for model instance "
"'execute_return_error_0', message: Expected a list in the "
"execute return"), "Exception message is not correct.")

# The second inference request will return a list of None object
# instead of Python InferenceResponse objects.
with self.assertRaises(InferenceServerException) as e:
client.infer(model_name, inputs)

self.assertTrue(
str(e.exception).startswith(
"Failed to process the request(s) for model instance "
"'execute_return_error_0', message: Expected an "
"'InferenceResponse' object in the execute function return"
" list"), "Exception message is not correct.")
with self._shm_leak_detector.Probe() as shm_probe:
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data = (5 * np.random.randn(*shape)).astype(np.float32)
inputs = [
httpclient.InferInput("INPUT", input_data.shape,
np_to_triton_dtype(input_data.dtype))
]
inputs[0].set_data_from_numpy(input_data)

# The first request to this model will return None.
with self.assertRaises(InferenceServerException) as e:
client.infer(model_name, inputs)

self.assertTrue(
str(e.exception).startswith(
"Failed to process the request(s) for model instance "
"'execute_return_error_0', message: Expected a list in the "
"execute return"), "Exception message is not correct.")

# The second inference request will return a list of None object
# instead of Python InferenceResponse objects.
with self.assertRaises(InferenceServerException) as e:
client.infer(model_name, inputs)

self.assertTrue(
str(e.exception).startswith(
"Failed to process the request(s) for model instance "
"'execute_return_error_0', message: Expected an "
"'InferenceResponse' object in the execute function return"
" list"), "Exception message is not correct.")


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit a40edf1

Please sign in to comment.