From 734363f58c0b3d870bebe4f808b6092fa8f00061 Mon Sep 17 00:00:00 2001 From: Kris Hung Date: Wed, 3 May 2023 17:38:49 -0700 Subject: [PATCH] Add testing for Python custom metrics API (#5669) * Add testing for python custom metrics API * Add custom metrics example to the test * Fix for CodeQL report * Fix test name * Address comment * Add logger and change the enum usage --- qa/L0_backend_python/custom_metrics/test.sh | 85 +++++++ qa/L0_backend_python/examples/test.sh | 30 +++ qa/L0_backend_python/test.sh | 5 + qa/python_models/custom_metrics/config.pbtxt | 43 ++++ qa/python_models/custom_metrics/model.py | 231 +++++++++++++++++++ 5 files changed, 394 insertions(+) create mode 100644 qa/L0_backend_python/custom_metrics/test.sh create mode 100644 qa/python_models/custom_metrics/config.pbtxt create mode 100644 qa/python_models/custom_metrics/model.py diff --git a/qa/L0_backend_python/custom_metrics/test.sh b/qa/L0_backend_python/custom_metrics/test.sh new file mode 100644 index 0000000000..8842fa4ecf --- /dev/null +++ b/qa/L0_backend_python/custom_metrics/test.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +CLIENT_PY=../python_unittest.py +CLIENT_LOG="./client.log" +EXPECTED_NUM_TESTS="1" +TEST_RESULT_FILE='test_results.txt' +source ../../common/util.sh + +TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"} +SERVER=${TRITON_DIR}/bin/tritonserver +BACKEND_DIR=${TRITON_DIR}/backends +SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --log-verbose=1" +SERVER_LOG="./inference_server.log" + +RET=0 +rm -fr *.log ./models *.txt + +mkdir -p models/custom_metrics/1/ +cp ../../python_models/custom_metrics/model.py models/custom_metrics/1/ +cp ../../python_models/custom_metrics/config.pbtxt models/custom_metrics + +run_server +if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + exit 1 +fi + +set +e + +export MODEL_NAME='custom_metrics' +python3 $CLIENT_PY >> $CLIENT_LOG 2>&1 +if [ $? -ne 0 ]; then + echo -e "\n***\n*** 'Custom Metrics' test FAILED. \n***" + cat $CLIENT_LOG + RET=1 +else + check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS + if [ $? -ne 0 ]; then + cat $CLIENT_LOG + echo -e "\n***\n*** Test Result Verification Failed\n***" + RET=1 + fi +fi + +set -e + +kill $SERVER_PID +wait $SERVER_PID + + +if [ $RET -eq 1 ]; then + cat $CLIENT_LOG + cat $SERVER_LOG + echo -e "\n***\n*** Custom Metrics test FAILED. \n***" +else + echo -e "\n***\n*** Custom Metrics test PASSED. \n***" +fi + +exit $RET diff --git a/qa/L0_backend_python/examples/test.sh b/qa/L0_backend_python/examples/test.sh index 5ed9c739d2..2c94904135 100644 --- a/qa/L0_backend_python/examples/test.sh +++ b/qa/L0_backend_python/examples/test.sh @@ -413,6 +413,36 @@ set -e kill $SERVER_PID wait $SERVER_PID +# Custom Metrics +CLIENT_LOG="./custom_metrics_client.log" +mkdir -p models/custom_metrics/1 +cp examples/custom_metrics/model.py models/custom_metrics/1/model.py +cp examples/custom_metrics/config.pbtxt models/custom_metrics/config.pbtxt +run_server +if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + RET=1 +fi + +set +e +python3 examples/custom_metrics/client.py > $CLIENT_LOG +if [ $? -ne 0 ]; then + echo -e "\n***\n*** Failed to verify Custom Metrics example. \n***" + RET=1 +fi + +grep "PASS" $CLIENT_LOG +if [ $? -ne 0 ]; then + echo -e "\n***\n*** Failed to verify Custom Metrics example. \n***" + cat $CLIENT_LOG + RET=1 +fi +set -e + +kill $SERVER_PID +wait $SERVER_PID + if [ $RET -eq 0 ]; then echo -e "\n***\n*** Example verification test PASSED.\n***" diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index a45f4c7101..9bf9163757 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -442,6 +442,11 @@ if [ "$TEST_JETSON" == "0" ]; then fi fi +(cd custom_metrics && bash -ex test.sh) +if [ $? -ne 0 ]; then + RET=1 +fi + if [ $RET -eq 0 ]; then echo -e "\n***\n*** Test Passed\n***" else diff --git a/qa/python_models/custom_metrics/config.pbtxt b/qa/python_models/custom_metrics/config.pbtxt new file mode 100644 index 0000000000..bba420d9d2 --- /dev/null +++ b/qa/python_models/custom_metrics/config.pbtxt @@ -0,0 +1,43 @@ +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: "custom_metrics" +backend: "python" + +output [ + { + name: "OUTPUT0" + data_type: TYPE_FP32 + dims: [ 16 ] + } +] + +instance_group [ + { + count: 3 + kind: KIND_CPU + } +] diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py new file mode 100644 index 0000000000..219868c757 --- /dev/null +++ b/qa/python_models/custom_metrics/model.py @@ -0,0 +1,231 @@ +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import unittest +import triton_python_backend_utils as pb_utils +import requests + + +class PBCustomMetricsTest(unittest.TestCase): + + def _get_metrics(self): + metrics_url = "http://localhost:8002/metrics" + r = requests.get(metrics_url) + r.raise_for_status() + return r.text + + def _metric_api_helper(self, metric, kind): + # Adding logger to test if custom metrics and logging work together + # as they use the same message queue. + logger = pb_utils.Logger + + # The value should be 0.0 before the test + self.assertEqual(metric.value(), 0.0) + + # Test increment positive value + increment = 2023.0 + metric.increment(increment) + self.assertEqual(metric.value(), increment) + logger.log_info("Incremented metric to : {}".format(metric.value())) + + # Test increment negative value + decrement = -23.5 + if kind == 'counter': + # Counter should not accept negative values + with self.assertRaises(pb_utils.TritonModelException): + metric.increment(decrement) + else: + metric.increment(decrement) + self.assertEqual(metric.value(), increment + decrement) + logger.log_info("Decremented metric to : {}".format(metric.value())) + + # Test set value + value = 999.9 + if kind == 'counter': + # Counter does not support set + with self.assertRaises(pb_utils.TritonModelException): + metric.set(value) + else: + metric.set(value) + self.assertEqual(metric.value(), value) + logger.log_info("Set metric to : {}".format(metric.value())) + + def _dup_metric_helper(self, labels={}): + # Adding logger to test if custom metrics and logging work together + # as they use the same message queue. + logger = pb_utils.Logger + + description = "dup metric" + metric_family = pb_utils.MetricFamily( + name="test_dup_metric", + description=description, + kind=pb_utils.MetricFamily.COUNTER) + + # Verify dupe metrics reference same underlying metric + metric1 = metric_family.Metric(labels=labels) + metric2 = metric_family.Metric(labels=labels) + + # The value should be 0 before the test + self.assertEqual(metric1.value(), 0.0) + self.assertEqual(metric2.value(), 0.0) + + # Increment metric 1, check metric 2 == metric 1 + increment = 7.5 + metric1.increment(increment) + self.assertEqual(metric1.value(), metric2.value()) + logger.log_info("Incremented metric1 to : {}".format(metric1.value())) + logger.log_info("Incremented metric2 to : {}".format(metric2.value())) + + # Assert custom metric/family remains when there's still a reference to it + del metric1 + metrics = self._get_metrics() + self.assertIn(description, metrics) + + def test_counter_e2e(self): + metric_family = pb_utils.MetricFamily( + name="test_counter_e2e", + description="test metric counter kind end to end", + kind=pb_utils.MetricFamily.COUNTER) + labels = {"example1": "counter_label1", "example2": "counter_label2"} + metric = metric_family.Metric(labels=labels) + self._metric_api_helper(metric, 'counter') + + pattern = 'test_counter_e2e{example1="counter_label1",example2="counter_label2"}' + metrics = self._get_metrics() + self.assertIn(pattern, metrics) + + def test_gauge_e2e(self): + metric_family = pb_utils.MetricFamily( + name="test_gauge_e2e", + description="test metric gauge kind end to end", + kind=pb_utils.MetricFamily.GAUGE) + labels = {"example1": "counter_label1", "example2": "counter_label2"} + metric = metric_family.Metric(labels=labels) + self._metric_api_helper(metric, 'gauge') + + pattern = 'test_gauge_e2e{example1="counter_label1",example2="counter_label2"}' + metrics = self._get_metrics() + self.assertIn(pattern, metrics) + + def test_dup_metric_family_diff_kind(self): + # Test that a duplicate metric family can't be added with a conflicting type/kind + metric_family1 = pb_utils.MetricFamily( + name="test_dup_metric_family_diff_kind", + description="test metric family with same name but different kind", + kind=pb_utils.MetricFamily.COUNTER) + with self.assertRaises(pb_utils.TritonModelException): + metric_family2 = pb_utils.MetricFamily( + name="test_dup_metric_family_diff_kind", + description= + "test metric family with same name but different kind", + kind=pb_utils.MetricFamily.GAUGE) + self.assertIsNone(metric_family2) + + self.assertIsNotNone(metric_family1) + + def test_dup_metric_family_diff_description(self): + # Test that a duplicate metric family name will still return the + # original metric family even if the description is changed + metric_family1 = pb_utils.MetricFamily( + name="test_dup_metric_family_diff_description", + description="first description", + kind=pb_utils.MetricFamily.COUNTER) + metric_family2 = pb_utils.MetricFamily( + name="test_dup_metric_family_diff_description", + description="second description", + kind=pb_utils.MetricFamily.COUNTER) + + metric2 = metric_family2.Metric() + self.assertEqual(metric2.value(), 0) + + # Delete metric_family1 and check if metric_family2 still references it + del metric_family1 + pattern = 'test_dup_metric_family_diff_description first description' + metrics = self._get_metrics() + self.assertIn(pattern, metrics) + + # The first description will be kept if adding a duplicate metric + # family name with a different description + pattern = 'test_dup_metric_family_diff_description second description' + self.assertNotIn(pattern, metrics) + + def test_dup_metric_family(self): + # Test that adding a duplicate metric family will reuse the original + # and not add another entry to registry + metric_family1 = pb_utils.MetricFamily( + name="test_dup_metric_family", + description="dup description", + kind=pb_utils.MetricFamily.COUNTER) + metric_family2 = pb_utils.MetricFamily( + name="test_dup_metric_family", + description="dup description", + kind=pb_utils.MetricFamily.COUNTER) + + metric_key = "custom_metric_key" + metric1 = metric_family1.Metric(labels={metric_key: "label1"}) + metric2 = metric_family2.Metric(labels={metric_key: "label2"}) + + self.assertEqual(metric1.value(), 0) + self.assertEqual(metric2.value(), 0) + + patterns = [ + '# HELP test_dup_metric_family dup description', + '# TYPE test_dup_metric_family counter', + 'test_dup_metric_family{custom_metric_key="label2"} 0', + 'test_dup_metric_family{custom_metric_key="label1"} 0' + ] + metrics = self._get_metrics() + for pattern in patterns: + self.assertIn(pattern, metrics) + + def test_dup_metric_labels(self): + # Test that adding a duplicate metric will refer to the same + # underlying metric, and all instances will be updated + labels = {"example1": "label1", "example2": "label2"} + self._dup_metric_helper(labels) + + def test_dup_metric_empty_labels(self): + # Test that adding a duplicate metric will refer to the same + # underlying metric, and all instances will be updated + self._dup_metric_helper() + + +class TritonPythonModel: + + def execute(self, requests): + responses = [] + for _ in requests: + # Run the unittest and store the results in InferenceResponse. + test = unittest.main('model', exit=False) + responses.append( + pb_utils.InferenceResponse([ + pb_utils.Tensor( + 'OUTPUT0', + np.array([test.result.wasSuccessful()], + dtype=np.float16)) + ])) + return responses