Add testing for python custom metrics API

triton-inference-server · Apr 20, 2023 · fa74da3 · fa74da3
1 parent 9060065
commit fa74da3
Show file tree

Hide file tree

Showing 4 changed files with 336 additions and 0 deletions.
diff --git a/qa/L0_backend_python/custom_metrics/test.sh b/qa/L0_backend_python/custom_metrics/test.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_PY=../python_unittest.py
+CLIENT_LOG="./client.log"
+EXPECTED_NUM_TESTS="1"
+TEST_RESULT_FILE='test_results.txt'
+source ../../common/util.sh
+
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_LOG="./inference_server.log"
+
+RET=0
+rm -fr *.log ./models *.txt
+
+mkdir -p models/custom_metrics/1/
+cp ../../python_models/custom_metrics/model.py models/custom_metrics/1/
+cp ../../python_models/custom_metrics/config.pbtxt models/custom_metrics
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+export MODEL_NAME='custom_metrics'
+python3 $CLIENT_PY >> $CLIENT_LOG 2>&1 
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** 'bls' $BLS_KIND test FAILED. \n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Custom Metrics test FAILED. \n***"
+else
+    echo -e "\n***\n*** Custom Metrics test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh
@@ -441,6 +441,11 @@ if [ "$TEST_JETSON" == "0" ]; then
   fi
 fi
 
+(cd custom_metrics && bash -ex test.sh)
+if [ $? -ne 0 ]; then
+  RET=1
+fi
+
 if [ $RET -eq 0 ]; then
   echo -e "\n***\n*** Test Passed\n***"
 else

diff --git a/qa/python_models/custom_metrics/config.pbtxt b/qa/python_models/custom_metrics/config.pbtxt
@@ -0,0 +1,38 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "custom_metrics"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py
@@ -0,0 +1,207 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import unittest
+import triton_python_backend_utils as pb_utils
+import requests
+
+
+class PBBLSMemoryTest(unittest.TestCase):
+    def _get_metrics(self):
+        metrics_url = "http://localhost:8002/metrics"
+        r = requests.get(metrics_url)
+        r.raise_for_status()
+        return r.text
+
+    def _metric_api_helper(self, metric, kind):
+        # The value should be 0.0 before the test
+        self.assertEqual(metric.value(), 0.0)
+
+        # Test increment positive value
+        increment = 2023.0
+        metric.increment(increment)
+        self.assertEqual(metric.value(), increment)
+
+        # Test increment negative value
+        decrement = -23.5
+        if kind == 'counter':
+            # Counter should not accept negative values
+            with self.assertRaises(pb_utils.TritonModelException):
+                metric.increment(decrement)
+        else:
+            metric.increment(decrement)
+            self.assertEqual(metric.value(), increment + decrement)
+
+        # Test set value
+        value = 999.9
+        if kind == 'counter':
+            # Counter does not support set
+            with self.assertRaises(pb_utils.TritonModelException):
+                metric.set(value)
+        else:
+            metric.set(value)
+            self.assertEqual(metric.value(), value)
+
+    def _dup_metric_helper(self, labels={}):
+        description = "dup metric"
+        metric_family = pb_utils.MetricFamily(name="test_dup_metric",
+                                              description=description,
+                                              kind=pb_utils.COUNTER)
+
+        # Verify dupe metrics reference same underlying metric
+        metric1 = metric_family.Metric(labels=labels)
+        metric2 = metric_family.Metric(labels=labels)
+
+        # The value should be 0 before the test
+        self.assertEqual(metric1.value(), 0.0)
+        self.assertEqual(metric2.value(), 0.0)
+
+        # Increment metric 1, check metric 2 == metric 1
+        increment = 7.5
+        metric1.increment(increment)
+        self.assertEqual(metric1.value(), metric2.value())
+
+        # Assert custom metric/family remains when there's still a reference to it
+        del metric1
+        metrics = self._get_metrics()
+        self.assertIn(description, metrics)
+
+    def test_counter_e2e(self):
+        metric_family = pb_utils.MetricFamily(
+            name="test_counter_e2e",
+            description="test metric counter kind end to end",
+            kind=pb_utils.COUNTER)
+        labels = {"example1": "counter_label1", "example2": "counter_label2"}
+        metric = metric_family.Metric(labels=labels)
+        self._metric_api_helper(metric, 'counter')
+
+        pattern = 'test_counter_e2e{example1="counter_label1",example2="counter_label2"}'
+        metrics = self._get_metrics()
+        self.assertIn(pattern, metrics)
+
+    def test_gauge_e2e(self):
+        metric_family = pb_utils.MetricFamily(
+            name="test_gauge_e2e",
+            description="test metric gauge kind end to end",
+            kind=pb_utils.GAUGE)
+        labels = {"example1": "counter_label1", "example2": "counter_label2"}
+        metric = metric_family.Metric(labels=labels)
+        self._metric_api_helper(metric, 'gauge')
+
+        pattern = 'test_gauge_e2e{example1="counter_label1",example2="counter_label2"}'
+        metrics = self._get_metrics()
+        self.assertIn(pattern, metrics)
+
+    def test_dup_metric_family_diff_kind(self):
+        # Test that a duplicate metric family can't be added with a conflicting type/kind
+        metric_family1 = pb_utils.MetricFamily(
+            name="test_dup_metric_family_diff_kind",
+            description="test metric family with same name but different kind",
+            kind=pb_utils.COUNTER)
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric_family2 = pb_utils.MetricFamily(
+                name="test_dup_metric_family_diff_kind",
+                description=
+                "test metric family with same name but different kind",
+                kind=pb_utils.GAUGE)
+
+    def test_dup_metric_family_diff_description(self):
+        # Test that a duplicate metric family name will still return the
+        # original metric family even if the description is changed
+        metric_family1 = pb_utils.MetricFamily(
+            name="test_dup_metric_family_diff_description",
+            description="first description",
+            kind=pb_utils.COUNTER)
+        metric_family2 = pb_utils.MetricFamily(
+            name="test_dup_metric_family_diff_description",
+            description="second description",
+            kind=pb_utils.COUNTER)
+        metric2 = metric_family2.Metric()
+        # Delete metric_family1 and check if metric_family2 still references it
+        del metric_family1
+        pattern = 'test_dup_metric_family_diff_description first description'
+        metrics = self._get_metrics()
+        self.assertIn(pattern, metrics)
+        # The first description will be kept if adding a duplicate metric
+        # family name with a different description
+        pattern = 'test_dup_metric_family_diff_description second description'
+        self.assertNotIn(pattern, metrics)
+
+    def test_dup_metric_family(self):
+        # Test that adding a duplicate metric family will reuse the original
+        # and not add another entry to registry
+        metric_family1 = pb_utils.MetricFamily(name="test_dup_metric_family",
+                                               description="dup description",
+                                               kind=pb_utils.COUNTER)
+        metric_family2 = pb_utils.MetricFamily(name="test_dup_metric_family",
+                                               description="dup description",
+                                               kind=pb_utils.COUNTER)
+
+        metric_key = "custom_metric_key"
+        metric1 = metric_family1.Metric(labels={metric_key: "label1"})
+        metric2 = metric_family2.Metric(labels={metric_key: "label2"})
+
+        patterns = [
+            '# HELP test_dup_metric_family dup description',
+            '# TYPE test_dup_metric_family counter',
+            'test_dup_metric_family{custom_metric_key="label2"} 0',
+            'test_dup_metric_family{custom_metric_key="label1"} 0'
+        ]
+        metrics = self._get_metrics()
+        for pattern in patterns:
+            self.assertIn(pattern, metrics)
+
+        del metric_family1
+        del metric_family2
+
+    def test_dup_metric_labels(self):
+        # Test that adding a duplicate metric will refer to the same
+        # underlying metric, and all instances will be updated
+        labels = {"example1": "label1", "example2": "label2"}
+        self._dup_metric_helper(labels)
+
+    def test_dup_metric_empty_labels(self):
+        # Test that adding a duplicate metric will refer to the same
+        # underlying metric, and all instances will be updated
+        self._dup_metric_helper()
+
+
+class TritonPythonModel:
+
+    def execute(self, requests):
+        responses = []
+        for _ in requests:
+            # Run the unittest and store the results in InferenceResponse.
+            test = unittest.main('model', exit=False)
+            responses.append(
+                pb_utils.InferenceResponse([
+                    pb_utils.Tensor(
+                        'OUTPUT0',
+                        np.array([test.result.wasSuccessful()],
+                                 dtype=np.float16))
+                ]))
+        return responses