Skip to content

Commit

Permalink
updated qa tests to expect the new max_batch_size (triton-inference-s…
Browse files Browse the repository at this point in the history
…erver#4092)

* updated qa tests to expect the new max_batch_size

default-max-batch-size is now a global backend config

documented default-max-batch-size feature

* parsing backend config options is now global backend aware
  • Loading branch information
nv-kmcgill53 authored Apr 13, 2022
1 parent db3ca6a commit 0df01c5
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 6 deletions.
9 changes: 9 additions & 0 deletions docs/model_configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ For models that do not support batching, or do not support batching in
the specific ways described above, *max_batch_size* must be set to
zero.

When a model is using the auto-complete feature, a default maximum batch size may
be set by using the `--backend-config=default-max-batch-size=<int>` command line argument.
This allows all models which are capable of batching and which make use of
[Auto Generated Model configuration](#auto-generated-model-configuration) to have
a default maximum batch size. This value is set to 4 by default. While none of the
officially supported Triton backends implement this feature, backend developers
may make use of this value by obtaining it from the TRITONBACKEND_BackendConfig api.


### Inputs and Outputs

Each model input and output must specify a name, datatype, and shape.
Expand Down
161 changes: 161 additions & 0 deletions qa/L0_backend_config/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/bin/bash
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
echo -e "Repository version must be specified"
echo -e "\n***\n*** Test Failed\n***"
exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

rm -rf ./models/
mkdir -p ./models/no_config
cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/savedmodel_float32_float32_float32/1 ./models/no_config/


SERVER=/opt/tritonserver/bin/tritonserver
SERVER_TIMEOUT=20
source ../common/util.sh

SERVER_LOG_BASE="./inference_server"
rm -f $SERVER_LOG_BASE*

COMMON_ARGS="--model-repository=`pwd`/models --strict-model-config=false --log-verbose=1 "

NEGATIVE_PARSE_ARGS=("--backend-config=,default-max-batch-size=3 $COMMON_ARGS" \
"--backend-config=default-max-batch-size= $COMMON_ARGS" \
"--backend-config=default-max-batch-size $COMMON_ARGS" \
"--backend-config=tensorflow,default-max-batch-size= $COMMON_ARGS" \
"--backend-config=tensorflow,default-max-batch-size $COMMON_ARGS" \
)

POSITIVE_DEFAULT_ARGS=$COMMON_ARGS
POSITIVE_TEST_ARGS=("--backend-config=tensorflow,default-max-batch-size=5 $COMMON_ARGS" \
"--backend-config=default-max-batch-size=6 $COMMON_ARGS" \
"--backend-config=default-max-batch-size=7 --backend-config=tensorflow,default-max-batch-size=8 $COMMON_ARGS" \
)

# These integers correspond to the expected default-max-batch-size which gets set
# in the POSITIVE_TEST_ARGS
POSITIVE_TEST_ANSWERS=(5 6 8)

RET=0
# Positive tests
SERVER_ARGS=$POSITIVE_DEFAULT_ARGS
SERVER_LOG=$SERVER_LOG_BASE.backend_config_positive_default.log
run_server

if [ "$SERVER_PID" == "0" ]; then
echo -e "*** FAILED: Server failed to start $SERVER\n"
RET=1

else
RESULT_LOG_LINE=$(grep "Adding default backend config setting:" $SERVER_LOG)
if [ "$RESULT_LOG_LINE" != "" ]; then

# Pick out the logged value of the default-max-batch-size which gets passed into model creation
RESOLVED_DEFAULT_MAX_BATCH_SIZE=$(awk -v line="$RESULT_LOG_LINE" 'BEGIN {split(line, a, "]"); split(a[2], b, ": "); split(b[2], c, ","); print c[2]}')

if [ "$RESOLVED_DEFAULT_MAX_BATCH_SIZE" != "4" ]; then
echo "*** FAILED: Found default-max-batch-size not equal to the expected default-max-batch-size. Expected: default-max-batch-size,4, Found: $RESOLVED_DEFAULT_MAX_BATCH_SIZE \n"
RET=1
fi
else
echo "*** FAILED: No log statement stating default amx batch size\n"
RET=1
fi

kill $SERVER_PID
wait $SERVER_PID
fi

for ((i=0; i < ${#POSITIVE_TEST_ARGS[@]}; i++)); do
SERVER_ARGS=${POSITIVE_TEST_ARGS[$i]}
SERVER_LOG=$SERVER_LOG_BASE.backend_config_positive_$i.log
run_server

if [ "$SERVER_PID" == "0" ]; then
echo -e "*** FAILED: Server failed to start $SERVER\n"
RET=1

else
RESULT_LOG_LINE=$(grep "Found overwritten default setting:" $SERVER_LOG)
if [ "$RESULT_LOG_LINE" != "" ]; then

# Pick out the logged value of the default-max-batch-size which gets passed into model creation
RESOLVED_DEFAULT_MAX_BATCH_SIZE=$(awk -v line="$RESULT_LOG_LINE" 'BEGIN {split(line, a, "]"); split(a[2], b, ": "); split(b[2], c, ","); print c[2]}')

if [ "$RESOLVED_DEFAULT_MAX_BATCH_SIZE" != "${POSITIVE_TEST_ANSWERS[$i]}" ]; then
echo "*** FAILED: Found default-max-batch-size not equal to the expected default-max-batch-size. Expected: ${POSITIVE_TEST_ANSWERS[$i]}, Found: $RESOLVED_DEFAULT_MAX_BATCH_SIZE \n"
RET=1
fi
else
echo "*** FAILED: No log statement stating default amx batch size\n"
RET=1
fi

kill $SERVER_PID
wait $SERVER_PID
fi

done

# Negative tests
# Failing because the syntax is incorrect
for ((i=0; i < ${#NEGATIVE_PARSE_ARGS[@]}; i++)); do
SERVER_ARGS=${NEGATIVE_PARSE_ARGS[$i]}
SERVER_LOG=$SERVER_LOG_BASE.backend_config_negative_parse$i.log
run_server

if [ "$SERVER_PID" == "0" ]; then
if ! grep -e "--backend-config option format is" $SERVER_LOG; then
echo -e "*** FAILED: Expected invalid backend config parse message but found other error.\n"
RET=1
fi
else
echo -e "*** FAILED: Expected server to exit with error, but found running.\n"
RET=1
kill $SERVER_PID
wait $SERVER_PID
fi
done

# Print test outcome
if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET

21 changes: 15 additions & 6 deletions src/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1169,24 +1169,33 @@ ParseRateLimiterResourceOption(const std::string arg)
std::tuple<std::string, std::string, std::string>
ParseBackendConfigOption(const std::string arg)
{
// Format is "<backend_name>,<setting>=<value>"
// Format is "<backend_name>,<setting>=<value>" for specific
// config/settings and "<setting>=<value>" for backend agnostic
// configs/settings
int delim_name = arg.find(",");
int delim_setting = arg.find("=", delim_name + 1);

// Check for 2 semicolons
if ((delim_name < 0) || (delim_setting < 0)) {
std::string name_string = std::string();
if (delim_name > 0) {
name_string = arg.substr(0, delim_name);
} else if (delim_name == 0) {
std::cerr << "No backend specified. --backend-config option format is "
<< "<backend name>,<setting>=<value> or "
<< "<setting>=<value>. Got " << arg << std::endl;
exit(1);
} // else global backend config

if (delim_setting < 0) {
std::cerr << "--backend-config option format is '<backend "
"name>,<setting>=<value>'. Got "
<< arg << std::endl;
exit(1);
}

std::string name_string = arg.substr(0, delim_name);
std::string setting_string =
arg.substr(delim_name + 1, delim_setting - delim_name - 1);
std::string value_string = arg.substr(delim_setting + 1);

if (name_string.empty() || setting_string.empty() || value_string.empty()) {
if (setting_string.empty() || value_string.empty()) {
std::cerr << "--backend-config option format is '<backend "
"name>,<setting>=<value>'. Got "
<< arg << std::endl;
Expand Down

0 comments on commit 0df01c5

Please sign in to comment.