Update PA test to have programmable delay (#4911)

triton-inference-server · Sep 22, 2022 · f36806d · f36806d
1 parent a6227b5
commit f36806d
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 19 deletions.
diff --git a/qa/L0_perf_analyzer_ground_truth/test.sh b/qa/L0_perf_analyzer_ground_truth/test.sh
@@ -92,6 +92,12 @@ function check_grpc_time {
     done
 }
 
+# Create input_data.json to communicate the requested model delay 
+# $1: desired model delay
+function create_input_data {
+    echo "{\"data\":[{\"INPUT0\" : [${1}]}]}" > input_data.json
+}
+
 # Setup server
 export CUDA_VISIBLE_DEVICES=0
 SERVER=/opt/tritonserver/bin/tritonserver
@@ -123,24 +129,27 @@ set +e
 RET=0
 PROTOCOLS="http grpc"
 OUTPUT_FILE="results"
-EXPECTED_RESULT="90.00"
+MODEL_DELAYS=(0.05 0.5)
 TOLERANCE="0.05"
-STABILITY_THRESHOLD="15"
 
-for protocol in ${PROTOCOLS}; do
-    for model in ${MODELS}; do
-	echo "================================================================"
-	echo "[PERMUTATION] Protocol=${protocol} Model=${model}"
-	echo "================================================================"
+for model_delay in ${MODEL_DELAYS[@]}; do
+    create_input_data ${model_delay}
+    EXPECTED_RESULT=$(python3 -c "print(1 / ${model_delay})")    
+    for protocol in ${PROTOCOLS}; do
+        for model in ${MODELS}; do
+        echo "================================================================"
+        echo "[PERMUTATION] Protocol=${protocol} Model=${model}"
+        echo "================================================================"
 
-        ${PERF_ANALYZER} -v -i ${protocol} -m ${model} -f ${OUTPUT_FILE} -s ${STABILITY_THRESHOLD} | tee ${CLIENT_LOG} 2>&1
-        check_perf_analyzer_error $?
+            ${PERF_ANALYZER} -v -i ${protocol} --concurrency-range 2 --input-data input_data.json -m ${model} -f ${OUTPUT_FILE} | tee ${CLIENT_LOG} 2>&1
+            check_perf_analyzer_error $?
 
-        check_performance ${OUTPUT_FILE} ${EXPECTED_RESULT} ${TOLERANCE}
+            check_performance ${OUTPUT_FILE} ${EXPECTED_RESULT} ${TOLERANCE}
 
-        if [ "${protocol}" == "grpc" ]; then
-            check_grpc_time ${CLIENT_LOG}
-        fi
+            if [ "${protocol}" == "grpc" ]; then
+                check_grpc_time ${CLIENT_LOG}
+            fi
+        done;
     done;
 done;
 

diff --git a/qa/python_models/ground_truth/model.py b/qa/python_models/ground_truth/model.py
@@ -32,12 +32,20 @@ class TritonPythonModel:
 
     def execute(self, requests):
         """
-        Identity model in Python backend.
+        Mock Model that uses the input data to determine how long to wait
+        before returning identity data
         """
-        time.sleep(0.01)
+        assert(len(requests) == 1)
+        delay = 0
+        request = requests[0]
         responses = []
-        for request in requests:
-            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
-            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
-            responses.append(pb_utils.InferenceResponse([out_tensor]))
+
+        delay_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+        delay_as_numpy = delay_tensor.as_numpy()
+        delay = float(delay_as_numpy[0][0])
+
+        out_tensor = pb_utils.Tensor("OUTPUT0", delay_as_numpy)
+        responses.append(pb_utils.InferenceResponse([out_tensor]))
+
+        time.sleep(delay)
         return responses