Skip to content

Commit

Permalink
Fix perf test warmup to ensure all model instances ready (triton-infe…
Browse files Browse the repository at this point in the history
…rence-server#3324)

* Fix perf test warmup to ensure all model instances ready

* Disable
  • Loading branch information
deadeyegoodwin authored Sep 13, 2021
1 parent e65d6c6 commit f49f7a8
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 28 deletions.
10 changes: 6 additions & 4 deletions qa/L0_perf_deeprecommender/run_test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -80,12 +80,14 @@ for STATIC_BATCH in $STATIC_BATCH_SIZES; do
set +e

# Run the model once to warm up. Some frameworks do
# optimization on the first requests.
$PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p5000 -b${STATIC_BATCH}
# optimization on the first requests. Must warmup similar
# to actual run so that all instances are ready
$PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p5000 \
-b${STATIC_BATCH} --concurrency-range ${CONCURRENCY}

$PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p5000 \
-b${STATIC_BATCH} --concurrency-range ${CONCURRENCY} \
-f ${NAME}.csv >> ${NAME}.log 2>&1
-f ${NAME}.csv 2>&1 | tee ${NAME}.log
if (( $? != 0 )); then
RET=1
fi
Expand Down
4 changes: 2 additions & 2 deletions qa/L0_perf_nomodel/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
ARCH=${ARCH:="x86_64"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR}"
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR}"
source ../common/util.sh

# DATADIR is already set in environment variable for aarch64
Expand Down Expand Up @@ -161,7 +161,7 @@ for BACKEND in $BACKENDS; do
-i ${PERF_CLIENT_PROTOCOL} -m ${MODEL_NAME} \
-b${STATIC_BATCH} -t${CONCURRENCY} \
--shape ${INPUT_NAME}:${SHAPE} \
-f ${RESULTDIR}/${NAME}.csv >> ${RESULTDIR}/${NAME}.log 2>&1
-f ${RESULTDIR}/${NAME}.csv 2>&1 | tee ${RESULTDIR}/${NAME}.log
if [ $? -ne 0 ]; then
RET=1
fi
Expand Down
10 changes: 6 additions & 4 deletions qa/L0_perf_resnet/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ REPORTER=../common/reporter.py
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} ${BACKEND_CONFIG}"
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} ${BACKEND_CONFIG}"
source ../common/util.sh

# Select the single GPU that will be available to the inference
Expand All @@ -60,7 +60,7 @@ if (( $SERVER_PID == 0 )); then
exit 1
fi

# Onnx and onnx-trt models are very slow on Jetson Xavier. So we need to increase the time period for perf_client
# Onnx and onnx-trt models are very slow on Jetson.
MEASUREMENT_WINDOW=5000
if [ "$ARCH" == "aarch64" ]; then
PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client
Expand All @@ -74,11 +74,13 @@ fi
set +e

# Run the model once to warm up. Some frameworks do optimization on the first requests.
$PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p${MEASUREMENT_WINDOW} -b${STATIC_BATCH}
# Must warmup similar to actual run so that all instances are ready
$PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p${MEASUREMENT_WINDOW} \
-b${STATIC_BATCH} --concurrency-range ${CONCURRENCY}

$PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p${MEASUREMENT_WINDOW} \
-b${STATIC_BATCH} --concurrency-range ${CONCURRENCY} \
-f ${NAME}.csv >> ${NAME}.log 2>&1
-f ${NAME}.csv 2>&1 | tee ${NAME}.log
if (( $? != 0 )); then
RET=1
fi
Expand Down
39 changes: 21 additions & 18 deletions qa/L0_perf_resnet/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -202,22 +202,25 @@ for MODEL_NAME in $OPTIMIZED_MODEL_NAMES; do
done
done

# FIXME Disable the following due to
# https://jirasw.nvidia.com/browse/DLIS-2933.
#
# Needs this additional test configuration for comparing against TFS.
if [ "$ARCH" == "x86_64" ]; then
MODEL_NAME=${TF_MODEL_NAME}
REPO=$REPODIR/perf_model_store
STATIC_BATCH=128
INSTANCE_CNT=1
CONCURRENCY=1
FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3)
MODEL_NAME=${MODEL_NAME} \
MODEL_FRAMEWORK=${FRAMEWORK} \
MODEL_PATH="$REPO/${MODEL_NAME}" \
STATIC_BATCH=${STATIC_BATCH} \
PERF_CLIENT_PROTOCOL="grpc" \
INSTANCE_CNT=${INSTANCE_CNT} \
CONCURRENCY=${CONCURRENCY} \
ARCH=${ARCH} \
BACKEND_CONFIG=" --backend-config=tensorflow,version=2" \
bash -x run_test.sh
fi
#if [ "$ARCH" == "x86_64" ]; then
# MODEL_NAME=${TF_MODEL_NAME}
# REPO=$REPODIR/perf_model_store
# STATIC_BATCH=128
# INSTANCE_CNT=1
# CONCURRENCY=1
# FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3)
# MODEL_NAME=${MODEL_NAME} \
# MODEL_FRAMEWORK=${FRAMEWORK} \
# MODEL_PATH="$REPO/${MODEL_NAME}" \
# STATIC_BATCH=${STATIC_BATCH} \
# PERF_CLIENT_PROTOCOL="grpc" \
# INSTANCE_CNT=${INSTANCE_CNT} \
# CONCURRENCY=${CONCURRENCY} \
# ARCH=${ARCH} \
# BACKEND_CONFIG=" --backend-config=tensorflow,version=2" \
# bash -x run_test.sh
#fi

0 comments on commit f49f7a8

Please sign in to comment.