Fix perf test warmup to ensure all model instances ready (triton-infe…

…rence-server#3324) * Fix perf test warmup to ensure all model instances ready * Disable
Weizhongjin · Sep 13, 2021 · f49f7a8 · f49f7a8
1 parent e65d6c6
commit f49f7a8
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 28 deletions.
diff --git a/qa/L0_perf_deeprecommender/run_test.sh b/qa/L0_perf_deeprecommender/run_test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -80,12 +80,14 @@ for STATIC_BATCH in $STATIC_BATCH_SIZES; do
             set +e
 
             # Run the model once to warm up. Some frameworks do
-            # optimization on the first requests.
-            $PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p5000 -b${STATIC_BATCH}
+            # optimization on the first requests.  Must warmup similar
+            # to actual run so that all instances are ready
+            $PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p5000 \
+                         -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY}
 
             $PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p5000 \
                          -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY} \
-                         -f ${NAME}.csv >> ${NAME}.log 2>&1
+                         -f ${NAME}.csv 2>&1 | tee ${NAME}.log
             if (( $? != 0 )); then
                 RET=1
             fi

diff --git a/qa/L0_perf_nomodel/run_test.sh b/qa/L0_perf_nomodel/run_test.sh
@@ -47,7 +47,7 @@ TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
 ARCH=${ARCH:="x86_64"}
 SERVER=${TRITON_DIR}/bin/tritonserver
 BACKEND_DIR=${TRITON_DIR}/backends
-SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR}" 
+SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR}"
 source ../common/util.sh
 
 # DATADIR is already set in environment variable for aarch64
@@ -161,7 +161,7 @@ for BACKEND in $BACKENDS; do
                  -i ${PERF_CLIENT_PROTOCOL} -m ${MODEL_NAME} \
                  -b${STATIC_BATCH} -t${CONCURRENCY} \
                  --shape ${INPUT_NAME}:${SHAPE} \
-                 -f ${RESULTDIR}/${NAME}.csv >> ${RESULTDIR}/${NAME}.log 2>&1
+                 -f ${RESULTDIR}/${NAME}.csv 2>&1 | tee ${RESULTDIR}/${NAME}.log
     if [ $? -ne 0 ]; then
         RET=1
     fi

diff --git a/qa/L0_perf_resnet/run_test.sh b/qa/L0_perf_resnet/run_test.sh
@@ -34,7 +34,7 @@ REPORTER=../common/reporter.py
 TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
 SERVER=${TRITON_DIR}/bin/tritonserver
 BACKEND_DIR=${TRITON_DIR}/backends
-SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} ${BACKEND_CONFIG}" 
+SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} ${BACKEND_CONFIG}"
 source ../common/util.sh
 
 # Select the single GPU that will be available to the inference
@@ -60,7 +60,7 @@ if (( $SERVER_PID == 0 )); then
     exit 1
 fi
 
-# Onnx and onnx-trt models are very slow on Jetson Xavier. So we need to increase the time period for perf_client
+# Onnx and onnx-trt models are very slow on Jetson.
 MEASUREMENT_WINDOW=5000
 if [ "$ARCH" == "aarch64" ]; then
     PERF_CLIENT=${TRITON_DIR}/clients/bin/perf_client
@@ -74,11 +74,13 @@ fi
 set +e
 
 # Run the model once to warm up. Some frameworks do optimization on the first requests.
-$PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p${MEASUREMENT_WINDOW} -b${STATIC_BATCH}
+# Must warmup similar to actual run so that all instances are ready
+$PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p${MEASUREMENT_WINDOW} \
+                -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY}
 
 $PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p${MEASUREMENT_WINDOW} \
                 -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY} \
-                -f ${NAME}.csv >> ${NAME}.log 2>&1
+                -f ${NAME}.csv 2>&1 | tee ${NAME}.log
 if (( $? != 0 )); then
     RET=1
 fi

diff --git a/qa/L0_perf_resnet/test.sh b/qa/L0_perf_resnet/test.sh
@@ -202,22 +202,25 @@ for MODEL_NAME in $OPTIMIZED_MODEL_NAMES; do
     done
 done
 
+# FIXME Disable the following due to
+# https://jirasw.nvidia.com/browse/DLIS-2933.
+#
 # Needs this additional test configuration for comparing against TFS.
-if [ "$ARCH" == "x86_64" ]; then
-    MODEL_NAME=${TF_MODEL_NAME}
-    REPO=$REPODIR/perf_model_store
-    STATIC_BATCH=128
-    INSTANCE_CNT=1
-    CONCURRENCY=1
-    FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3)
-    MODEL_NAME=${MODEL_NAME} \
-        MODEL_FRAMEWORK=${FRAMEWORK} \
-        MODEL_PATH="$REPO/${MODEL_NAME}" \
-        STATIC_BATCH=${STATIC_BATCH} \
-        PERF_CLIENT_PROTOCOL="grpc" \
-        INSTANCE_CNT=${INSTANCE_CNT} \
-        CONCURRENCY=${CONCURRENCY} \
-        ARCH=${ARCH} \
-        BACKEND_CONFIG=" --backend-config=tensorflow,version=2" \
-        bash -x run_test.sh
-fi
+#if [ "$ARCH" == "x86_64" ]; then
+#    MODEL_NAME=${TF_MODEL_NAME}
+#    REPO=$REPODIR/perf_model_store
+#    STATIC_BATCH=128
+#    INSTANCE_CNT=1
+#    CONCURRENCY=1
+#    FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3)
+#    MODEL_NAME=${MODEL_NAME} \
+#        MODEL_FRAMEWORK=${FRAMEWORK} \
+#        MODEL_PATH="$REPO/${MODEL_NAME}" \
+#        STATIC_BATCH=${STATIC_BATCH} \
+#        PERF_CLIENT_PROTOCOL="grpc" \
+#        INSTANCE_CNT=${INSTANCE_CNT} \
+#        CONCURRENCY=${CONCURRENCY} \
+#        ARCH=${ARCH} \
+#        BACKEND_CONFIG=" --backend-config=tensorflow,version=2" \
+#        bash -x run_test.sh
+#fi