Skip to content

Commit

Permalink
Add L0_response_stats test (triton-inference-server#4759)
Browse files Browse the repository at this point in the history
* Add L0_response_stats test

* Add per response stat support for GRPC server

* Add testing for GRPC per response stat

* Add custom delay and failing inference to test

* Add no response count and test

Co-authored-by: kthui <18255193+kthui@users.noreply.github.com>
  • Loading branch information
Tabrizian and kthui authored Sep 20, 2022
1 parent 2bbd868 commit 5e258d6
Show file tree
Hide file tree
Showing 5 changed files with 403 additions and 1 deletion.
4 changes: 4 additions & 0 deletions Dockerfile.QA
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ RUN mkdir -p qa/L0_decoupled/models/simple_repeat/1 && \
mkdir -p qa/L0_decoupled/models/repeat_square/1 && \
mkdir -p qa/L0_decoupled/models/nested_square/1

RUN mkdir -p qa/L0_response_stats/models/square_int32/1 && \
cp backends/square/libtriton_square.so \
qa/L0_response_stats/models/square_int32/1/.

RUN cp -r qa/L0_decoupled/models qa/L0_decoupled/python_models/ && \
cp /workspace/tritonbuild/python/examples/decoupled/repeat_model.py \
qa/L0_decoupled/python_models/repeat_int32/1/. && \
Expand Down
64 changes: 64 additions & 0 deletions qa/L0_response_stats/models/square_int32/config.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "square_int32"
backend: "square"
max_batch_size: 0
model_transaction_policy {
decoupled: True
}
input [
{
name: "IN"
data_type: TYPE_INT32
dims: [ 1 ]
}
]
output [
{
name: "OUT"
data_type: TYPE_INT32
dims: [ 1 ]
}
]
parameters: {
key: "CUSTOM_INFER_DELAY_NS"
value: {
string_value: "800000"
}
}
parameters: {
key: "CUSTOM_OUTPUT_DELAY_NS"
value: {
string_value: "200000"
}
}
parameters: {
key: "CUSTOM_FAIL_COUNT"
value: {
string_value: "2"
}
}
181 changes: 181 additions & 0 deletions qa/L0_response_stats/response_stats_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from collections import defaultdict
import time
import sys

sys.path.append("../common")

import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient
import test_util as tu
import queue
import numpy as np
from functools import partial
from tritonclient.utils import InferenceServerException
import unittest


class UserData:

def __init__(self):
self._completed_requests = queue.Queue()


def callback(user_data, result, error):
if error:
user_data._completed_requests.put(error)
else:
user_data._completed_requests.put(result)


class ResponseStatsTest(tu.TestResultCollector):

def setUp(self):
# We can only use the GRPC streaming interface because we are testing a
# decoupled model.
self._client = grpcclient.InferenceServerClient("localhost:8001")
self._model_name = 'square_int32'
self._user_data = UserData()
self._client.start_stream(callback=partial(callback, self._user_data))
self._http_client = httpclient.InferenceServerClient('localhost:8000')

def _wait_until_responses_complete(self, number_of_responses):
user_data = self._user_data
recv_count = 0
while recv_count < number_of_responses:
data_item = user_data._completed_requests.get()
if type(data_item) == InferenceServerException:
raise data_item

recv_count += 1

def _send_request(self, number_of_responses):
value_data = np.array([number_of_responses], dtype=np.int32)
inputs = []
inputs.append(grpcclient.InferInput('IN', value_data.shape, "INT32"))

inputs[0].set_data_from_numpy(value_data)
outputs = []
outputs.append(grpcclient.InferRequestedOutput('OUT'))

self._client.async_stream_infer(model_name=self._model_name,
inputs=inputs,
outputs=outputs)
if number_of_responses > 0:
self._wait_until_responses_complete(number_of_responses)
else:
time.sleep(2) # if not expecting response, wait 2 seconds

def _check_duration(self, duration_dict, expect_count, expect_duration_ns):
self.assertEqual(duration_dict['count'], expect_count)
self.assertGreaterEqual(duration_dict['ns'], expect_duration_ns)

def _check_response_stats(self, response_dict):
# response list contains a list containing the number of responses
# that should be present in the response stat
clients = [self._http_client, self._client]

for client in clients:
if type(client) == grpcclient.InferenceServerClient:
statistics = client.get_inference_statistics(
model_name=self._model_name, as_json=True)
model_stats = statistics
else:
statistics = client.get_inference_statistics(
model_name=self._model_name)
model_stats = statistics['model_stats']
self.assertEqual(len(model_stats), 1)
response_stats = model_stats[0]['response_stats']
no_response_count = model_stats[0]['no_response_count']
self.assertTrue(len(response_stats), len(response_dict))

if 0 in response_dict:
self.assertEqual(no_response_count, response_dict[0])

min_infer_delay_ns = 800000
min_output_delay_ns = 200000
num_fail_infer = 2

for response_stat in response_stats:
self.assertIn(len(response_stat['responses']), response_dict)
response_count = response_dict[len(response_stat['responses'])]

indexes = set()
for i in range(len(response_stat['responses'])):
response = response_stat['responses'][i]
indexes.add(response['index'])
if i + num_fail_infer < len(response_stat['responses']):
self._check_duration(
response['success'], response_count,
min_infer_delay_ns + min_output_delay_ns)
self._check_duration(response['compute_infer'],
response_count, min_infer_delay_ns)
self._check_duration(response['compute_output'],
response_count,
min_output_delay_ns)
self._check_duration(response['fail'], 0, 0)
else:
self._check_duration(response['success'], 0, 0)
self._check_duration(response['compute_infer'], 0, 0)
self._check_duration(response['compute_output'], 0, 0)
self._check_duration(
response['fail'], response_count,
min_infer_delay_ns + min_output_delay_ns)

expected_indexes = set(
list(range(0, len(response_stat['responses']))))
self.assertEqual(indexes, expected_indexes)

def test_response_stats(self):
number_of_responses = 5
response_dict = defaultdict(int)
response_dict[number_of_responses] += 1
self._send_request(number_of_responses)
self._check_response_stats(response_dict)

number_of_responses = 6
response_dict[number_of_responses] += 1
self._send_request(number_of_responses)
self._check_response_stats(response_dict)

number_of_responses = 5
response_dict[number_of_responses] += 1
self._send_request(number_of_responses)
self._check_response_stats(response_dict)

number_of_responses = 0
response_dict[number_of_responses] += 1
self._send_request(number_of_responses)
self._check_response_stats(response_dict)

def tearDown(self):
self._client.close()


if __name__ == '__main__':
unittest.main()
86 changes: 86 additions & 0 deletions qa/L0_response_stats/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
echo -e "Repository version must be specified"
echo -e "\n***\n*** Test Failed\n***"
exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

source ../common/util.sh

RET=0
rm -rf *.log

CLIENT_LOG="./response_stat.log"
TEST_PY=./response_stats_test.py
TEST_RESULT_FILE='test_results.txt'
SERVER_ARGS="--model-repository=`pwd`/models"
SERVER_LOG="./inference_server.log"
export TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver

run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

set +e
python $TEST_PY >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Response Stats test Failed\n***"
cat $CLIENT_LOG
RET=1
else
check_test_results $TEST_RESULT_FILE 1
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
cat $SERVER_LOG
echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET
Loading

0 comments on commit 5e258d6

Please sign in to comment.