Add shared memory leak detection to Python backend tests (triton-infe…

…rence-server#4122) * Add shared memory leak detection to Python backend tests * Add BLS tests to shared memory leak detection * Remove delays for the tests that involved growing the region * Update copyrights
xulizhi321 · Apr 1, 2022 · a40edf1 · a40edf1
1 parent 99349b1
commit a40edf1
Show file tree

Hide file tree

Showing 10 changed files with 423 additions and 296 deletions.
diff --git a/Dockerfile.QA b/Dockerfile.QA
@@ -154,8 +154,19 @@ RUN cd ${TRITONTMP_DIR}/tritonbuild/identity && \
         -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
     make -j16 install
 
+# L0_backend_python test require triton_shm_monitor
+RUN cd ${TRITONTMP_DIR}/tritonbuild/python && \
+    rm -rf install build && mkdir build && cd build && \
+    cmake -DCMAKE_INSTALL_PREFIX:PATH=${TRITONTMP_DIR}/tritonbuild/python/install \
+        -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} \
+        -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} \
+        -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
+    make -j18 triton-shm-monitor install
+
 RUN cp ${TRITONTMP_DIR}/tritonbuild/identity/install/backends/identity/libtriton_identity.so \
         qa/L0_lifecycle/. && \
+    cp ${TRITONTMP_DIR}/tritonbuild/python/install/backends/python/triton_shm_monitor*.so \
+        qa/common/. && \
     mkdir -p qa/L0_perf_nomodel/custom_models/custom_zero_1_float32/1 && \
     mkdir -p qa/L0_perf_pyclients/custom_models/custom_zero_1_int32/1 && \
     mkdir -p qa/L0_infer_shm && \

diff --git a/qa/L0_backend_python/ensemble/ensemble_test.py b/qa/L0_backend_python/ensemble/ensemble_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -28,6 +28,7 @@
 sys.path.append("../../common")
 
 import test_util as tu
+import shm_util
 import tritonclient.http as httpclient
 from tritonclient.utils import *
 import numpy as np
@@ -36,49 +37,58 @@
 
 class EnsembleTest(tu.TestResultCollector):
 
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
     def test_ensemble(self):
         model_name = "ensemble"
         shape = [16]
-        with httpclient.InferenceServerClient("localhost:8000") as client:
-            input_data_0 = np.random.random(shape).astype(np.float32)
-            input_data_1 = np.random.random(shape).astype(np.float32)
-            inputs = [
-                httpclient.InferInput("INPUT0", input_data_0.shape,
-                                      np_to_triton_dtype(input_data_0.dtype)),
-                httpclient.InferInput("INPUT1", input_data_1.shape,
-                                      np_to_triton_dtype(input_data_1.dtype))
-            ]
-            inputs[0].set_data_from_numpy(input_data_0)
-            inputs[1].set_data_from_numpy(input_data_1)
-            result = client.infer(model_name, inputs)
-            output0 = result.as_numpy('OUTPUT0')
-            output1 = result.as_numpy('OUTPUT1')
-            self.assertIsNotNone(output0)
-            self.assertIsNotNone(output1)
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient("localhost:8000") as client:
+                input_data_0 = np.random.random(shape).astype(np.float32)
+                input_data_1 = np.random.random(shape).astype(np.float32)
+                inputs = [
+                    httpclient.InferInput(
+                        "INPUT0", input_data_0.shape,
+                        np_to_triton_dtype(input_data_0.dtype)),
+                    httpclient.InferInput(
+                        "INPUT1", input_data_1.shape,
+                        np_to_triton_dtype(input_data_1.dtype))
+                ]
+                inputs[0].set_data_from_numpy(input_data_0)
+                inputs[1].set_data_from_numpy(input_data_1)
+                result = client.infer(model_name, inputs)
+                output0 = result.as_numpy('OUTPUT0')
+                output1 = result.as_numpy('OUTPUT1')
+                self.assertIsNotNone(output0)
+                self.assertIsNotNone(output1)
 
-            self.assertTrue(np.allclose(output0, 2 * input_data_0))
-            self.assertTrue(np.allclose(output1, 2 * input_data_1))
+                self.assertTrue(np.allclose(output0, 2 * input_data_0))
+                self.assertTrue(np.allclose(output1, 2 * input_data_1))
 
         model_name = "ensemble_gpu"
-        with httpclient.InferenceServerClient("localhost:8000") as client:
-            input_data_0 = np.random.random(shape).astype(np.float32)
-            input_data_1 = np.random.random(shape).astype(np.float32)
-            inputs = [
-                httpclient.InferInput("INPUT0", input_data_0.shape,
-                                      np_to_triton_dtype(input_data_0.dtype)),
-                httpclient.InferInput("INPUT1", input_data_1.shape,
-                                      np_to_triton_dtype(input_data_1.dtype))
-            ]
-            inputs[0].set_data_from_numpy(input_data_0)
-            inputs[1].set_data_from_numpy(input_data_1)
-            result = client.infer(model_name, inputs)
-            output0 = result.as_numpy('OUTPUT0')
-            output1 = result.as_numpy('OUTPUT1')
-            self.assertIsNotNone(output0)
-            self.assertIsNotNone(output1)
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient("localhost:8000") as client:
+                input_data_0 = np.random.random(shape).astype(np.float32)
+                input_data_1 = np.random.random(shape).astype(np.float32)
+                inputs = [
+                    httpclient.InferInput(
+                        "INPUT0", input_data_0.shape,
+                        np_to_triton_dtype(input_data_0.dtype)),
+                    httpclient.InferInput(
+                        "INPUT1", input_data_1.shape,
+                        np_to_triton_dtype(input_data_1.dtype))
+                ]
+                inputs[0].set_data_from_numpy(input_data_0)
+                inputs[1].set_data_from_numpy(input_data_1)
+                result = client.infer(model_name, inputs)
+                output0 = result.as_numpy('OUTPUT0')
+                output1 = result.as_numpy('OUTPUT1')
+                self.assertIsNotNone(output0)
+                self.assertIsNotNone(output1)
 
-            self.assertTrue(np.allclose(output0, 2 * input_data_0))
-            self.assertTrue(np.allclose(output1, 2 * input_data_1))
+                self.assertTrue(np.allclose(output0, 2 * input_data_0))
+                self.assertTrue(np.allclose(output1, 2 * input_data_1))
 
 
 if __name__ == '__main__':

diff --git a/qa/L0_backend_python/io/io_test.py b/qa/L0_backend_python/io/io_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -29,6 +29,7 @@
 sys.path.append("../../common")
 
 import test_util as tu
+import shm_util
 import tritonclient.http as httpclient
 from tritonclient.utils import *
 import numpy as np
@@ -37,30 +38,35 @@
 
 class IOTest(tu.TestResultCollector):
 
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
     def test_ensemble_io(self):
         model_name = "ensemble_io"
-        with httpclient.InferenceServerClient("localhost:8000") as client:
-            input0 = np.random.random([1000]).astype(np.float32)
-            for model_1_in_gpu in [True, False]:
-                for model_2_in_gpu in [True, False]:
-                    for model_3_in_gpu in [True, False]:
-                        gpu_output = np.asarray(
-                            [model_1_in_gpu, model_2_in_gpu, model_3_in_gpu],
-                            dtype=bool)
-                        inputs = [
-                            httpclient.InferInput(
-                                "INPUT0", input0.shape,
-                                np_to_triton_dtype(input0.dtype)),
-                            httpclient.InferInput(
-                                "GPU_OUTPUT", gpu_output.shape,
-                                np_to_triton_dtype(gpu_output.dtype))
-                        ]
-                        inputs[0].set_data_from_numpy(input0)
-                        inputs[1].set_data_from_numpy(gpu_output)
-                        result = client.infer(model_name, inputs)
-                        output0 = result.as_numpy('OUTPUT0')
-                        self.assertIsNotNone(output0)
-                        self.assertTrue(np.all(output0 == input0))
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient("localhost:8000") as client:
+                input0 = np.random.random([1000]).astype(np.float32)
+                for model_1_in_gpu in [True, False]:
+                    for model_2_in_gpu in [True, False]:
+                        for model_3_in_gpu in [True, False]:
+                            gpu_output = np.asarray([
+                                model_1_in_gpu, model_2_in_gpu, model_3_in_gpu
+                            ],
+                                                    dtype=bool)
+                            inputs = [
+                                httpclient.InferInput(
+                                    "INPUT0", input0.shape,
+                                    np_to_triton_dtype(input0.dtype)),
+                                httpclient.InferInput(
+                                    "GPU_OUTPUT", gpu_output.shape,
+                                    np_to_triton_dtype(gpu_output.dtype))
+                            ]
+                            inputs[0].set_data_from_numpy(input0)
+                            inputs[1].set_data_from_numpy(gpu_output)
+                            result = client.infer(model_name, inputs)
+                            output0 = result.as_numpy('OUTPUT0')
+                            self.assertIsNotNone(output0)
+                            self.assertTrue(np.all(output0 == input0))
 
 
 if __name__ == '__main__':

diff --git a/qa/L0_backend_python/lifecycle/lifecycle_test.py b/qa/L0_backend_python/lifecycle/lifecycle_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -28,6 +28,7 @@
 sys.path.append("../../common")
 
 import test_util as tu
+import shm_util
 from functools import partial
 import tritonclient.http as httpclient
 import tritonclient.grpc as grpcclient
@@ -52,6 +53,9 @@ def callback(user_data, result, error):
 
 class LifecycleTest(tu.TestResultCollector):
 
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
     def test_batch_error(self):
         # The execute_error model returns an error for the first request and
         # sucessfully processes the second request. This is making sure that
@@ -63,87 +67,91 @@ def test_batch_error(self):
         triton_client = grpcclient.InferenceServerClient("localhost:8001")
         triton_client.start_stream(callback=partial(callback, user_data))
 
-        input_datas = []
-        for i in range(number_of_requests):
-            input_data = np.random.randn(*shape).astype(np.float32)
-            input_datas.append(input_data)
-            inputs = [
-                grpcclient.InferInput("IN", input_data.shape,
-                                      np_to_triton_dtype(input_data.dtype))
-            ]
-            inputs[0].set_data_from_numpy(input_data)
-            triton_client.async_stream_infer(model_name=model_name,
-                                             inputs=inputs)
-
-        for i in range(number_of_requests):
-            result = user_data._completed_requests.get()
-            if i == 0:
-                self.assertIs(type(result), InferenceServerException)
-                continue
-
-            print(result)
-            output_data = result.as_numpy("OUT")
-            self.assertIsNotNone(output_data, "error: expected 'OUT'")
-            self.assertTrue(
-                np.array_equal(output_data, input_datas[i]),
-                "error: expected output {} to match input {}".format(
-                    output_data, input_datas[i]))
+        with self._shm_leak_detector.Probe() as shm_probe:
+            input_datas = []
+            for i in range(number_of_requests):
+                input_data = np.random.randn(*shape).astype(np.float32)
+                input_datas.append(input_data)
+                inputs = [
+                    grpcclient.InferInput("IN", input_data.shape,
+                                          np_to_triton_dtype(input_data.dtype))
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                triton_client.async_stream_infer(model_name=model_name,
+                                                 inputs=inputs)
+
+            for i in range(number_of_requests):
+                result = user_data._completed_requests.get()
+                if i == 0:
+                    self.assertIs(type(result), InferenceServerException)
+                    continue
+
+                print(result)
+                output_data = result.as_numpy("OUT")
+                self.assertIsNotNone(output_data, "error: expected 'OUT'")
+                self.assertTrue(
+                    np.array_equal(output_data, input_datas[i]),
+                    "error: expected output {} to match input {}".format(
+                        output_data, input_datas[i]))
 
     def test_infer_pymodel_error(self):
         model_name = "wrong_model"
         shape = [2, 2]
-        with httpclient.InferenceServerClient("localhost:8000") as client:
-            input_data = (16384 * np.random.randn(*shape)).astype(np.uint32)
-            inputs = [
-                httpclient.InferInput("IN", input_data.shape,
-                                      np_to_triton_dtype(input_data.dtype))
-            ]
-            inputs[0].set_data_from_numpy(input_data)
-            try:
-                client.infer(model_name, inputs)
-            except InferenceServerException as e:
-                print(e.message())
-                self.assertTrue(
-                    e.message().startswith(
-                        "Failed to process the request(s) for model instance"),
-                    "Exception message is not correct")
-            else:
-                self.assertTrue(
-                    False,
-                    "Wrong exception raised or did not raise an exception")
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient("localhost:8000") as client:
+                input_data = (16384 * np.random.randn(*shape)).astype(np.uint32)
+                inputs = [
+                    httpclient.InferInput("IN", input_data.shape,
+                                          np_to_triton_dtype(input_data.dtype))
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                try:
+                    client.infer(model_name, inputs)
+                except InferenceServerException as e:
+                    print(e.message())
+                    self.assertTrue(
+                        e.message().startswith(
+                            "Failed to process the request(s) for model instance"
+                        ), "Exception message is not correct")
+                else:
+                    self.assertTrue(
+                        False,
+                        "Wrong exception raised or did not raise an exception")
 
     def test_incorrect_execute_return(self):
         model_name = 'execute_return_error'
         shape = [1, 1]
-        with httpclient.InferenceServerClient("localhost:8000") as client:
-            input_data = (5 * np.random.randn(*shape)).astype(np.float32)
-            inputs = [
-                httpclient.InferInput("INPUT", input_data.shape,
-                                      np_to_triton_dtype(input_data.dtype))
-            ]
-            inputs[0].set_data_from_numpy(input_data)
-
-            # The first request to this model will return None.
-            with self.assertRaises(InferenceServerException) as e:
-                client.infer(model_name, inputs)
-
-            self.assertTrue(
-                str(e.exception).startswith(
-                    "Failed to process the request(s) for model instance "
-                    "'execute_return_error_0', message: Expected a list in the "
-                    "execute return"), "Exception message is not correct.")
-
-            # The second inference request will return a list of None object
-            # instead of Python InferenceResponse objects.
-            with self.assertRaises(InferenceServerException) as e:
-                client.infer(model_name, inputs)
-
-            self.assertTrue(
-                str(e.exception).startswith(
-                    "Failed to process the request(s) for model instance "
-                    "'execute_return_error_0', message: Expected an "
-                    "'InferenceResponse' object in the execute function return"
-                    " list"), "Exception message is not correct.")
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient("localhost:8000") as client:
+                input_data = (5 * np.random.randn(*shape)).astype(np.float32)
+                inputs = [
+                    httpclient.InferInput("INPUT", input_data.shape,
+                                          np_to_triton_dtype(input_data.dtype))
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+
+                # The first request to this model will return None.
+                with self.assertRaises(InferenceServerException) as e:
+                    client.infer(model_name, inputs)
+
+                self.assertTrue(
+                    str(e.exception).startswith(
+                        "Failed to process the request(s) for model instance "
+                        "'execute_return_error_0', message: Expected a list in the "
+                        "execute return"), "Exception message is not correct.")
+
+                # The second inference request will return a list of None object
+                # instead of Python InferenceResponse objects.
+                with self.assertRaises(InferenceServerException) as e:
+                    client.infer(model_name, inputs)
+
+                self.assertTrue(
+                    str(e.exception).startswith(
+                        "Failed to process the request(s) for model instance "
+                        "'execute_return_error_0', message: Expected an "
+                        "'InferenceResponse' object in the execute function return"
+                        " list"), "Exception message is not correct.")
 
 
 if __name__ == '__main__':