Skip to content

Commit

Permalink
Fix long-running stress test (triton-inference-server#3346)
Browse files Browse the repository at this point in the history
* Change tensor shape for plan models

* Increase timeout

* Increase sequence idle time

* Add CI output link

* Fix for sequence_no_start

* Use last_seq_choices for sequence-only cases

* Use dict to remember the last-used model for no-end cases

Co-authored-by: Kris Hung <krish@krish-dt.nvidia.com>
  • Loading branch information
krishung5 and Kris Hung authored Sep 13, 2021
1 parent f49f7a8 commit 4301e4b
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 35 deletions.
3 changes: 3 additions & 0 deletions qa/L0_long_running_stress/crashing_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def crashing_client(model_name,
triton_client,
tensor_shape=(1000,),
input_name="INPUT0"):
if "plan" in model_name:
tensor_shape = (32,)

in0 = np.random.random(tensor_shape).astype(dtype)
if "libtorch" in model_name:
input_name = "INPUT__0"
Expand Down
88 changes: 54 additions & 34 deletions qa/L0_long_running_stress/stress.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@
import Queue as queue

FLAGS = None
CORRELATION_ID_BLOCK_SIZE = 100
DEFAULT_TIMEOUT_MS = 10000
CORRELATION_ID_BLOCK_SIZE = 1024 * 1024
DEFAULT_TIMEOUT_MS = 25000
SEQUENCE_LENGTH_MEAN = 16
SEQUENCE_LENGTH_STDEV = 8
BACKENDS = os.environ.get('BACKENDS', "graphdef savedmodel onnx plan")
Expand Down Expand Up @@ -163,7 +163,8 @@ def check_sequence_async(client_metadata,
now_ms = int(round(time.time() * 1000))
if (now_ms - seq_start_ms) > timeout_ms:
raise TimeoutException(
"Timeout expired for {}".format(sequence_name))
"Timeout expired for {}, got {} ms".format(
sequence_name, (now_ms - seq_start_ms)))

result = results.as_numpy(
output_name)[0] if "nobatch" in trial else results.as_numpy(
Expand Down Expand Up @@ -553,19 +554,18 @@ def stress_thread(name, seed, test_duration, correlation_id_base,
# inference requests. Also create some rare-use contexts that
# are used to make requests with rarely-used correlation IDs.
#
# Need to remember the last choice for each context since we
# don't want some choices to follow others since that gives
# results not expected. See below for details.
# Need to remember if the last sequence case runs on each model
# is no-end cases since we don't want some choices to follow others
# since that gives results not expected. See below for details.
common_cnt = 2
rare_cnt = 8
last_choices = []
is_last_used_no_end = {}

for c in range(common_cnt + rare_cnt):
client_metadata_list.append(
(grpcclient.InferenceServerClient("localhost:8001",
verbose=FLAGS.verbose),
correlation_id_base + c))
last_choices.append(None)

rare_idx = 0
start_time = time.time()
Expand All @@ -587,7 +587,8 @@ def stress_thread(name, seed, test_duration, correlation_id_base,
# scheduler
if choice < 0.33:
count_test_case("sequence_no_end", test_case_count)
last_choices[client_idx] = "sequence_no_end"
is_last_used_no_end[model_name] = True
last_choice = "sequence_no_end"
sequence_no_end(
client_metadata_list[client_idx],
rng,
Expand All @@ -600,7 +601,8 @@ def stress_thread(name, seed, test_duration, correlation_id_base,
sequence_request_count=sequence_request_count)
elif choice < 0.66:
count_test_case("sequence_valid_no_end", test_case_count)
last_choices[client_idx] = "sequence_valid_no_end"
is_last_used_no_end[model_name] = True
last_choice = "sequence_valid_no_end"
sequence_valid_no_end(
client_metadata_list[client_idx],
rng,
Expand All @@ -613,7 +615,8 @@ def stress_thread(name, seed, test_duration, correlation_id_base,
sequence_request_count=sequence_request_count)
else:
count_test_case("sequence_valid_valid", test_case_count)
last_choices[client_idx] = "sequence_valid_valid"
is_last_used_no_end[model_name] = False
last_choice = "sequence_valid_valid"
sequence_valid_valid(
client_metadata_list[client_idx],
rng,
Expand All @@ -630,30 +633,45 @@ def stress_thread(name, seed, test_duration, correlation_id_base,
# Common context...
client_idx = 0
client_metadata = client_metadata_list[client_idx]
last_choice = last_choices[client_idx]

choice = rng.rand()

# no-start cannot follow no-end since the server will
# just assume that the no-start is a continuation of
# the no-end sequence instead of being a sequence
# missing start flag.
if ((last_choice != "sequence_no_end") and
(last_choice != "sequence_valid_no_end") and
(choice < 0.01)):
count_test_case("sequence_no_start", test_case_count)
last_choices[client_idx] = "sequence_no_start"
sequence_no_start(
client_metadata,
rng,
trial,
model_name,
dtype,
sequence_name=name,
sequence_request_count=sequence_request_count)
if model_name in is_last_used_no_end:
if ((not is_last_used_no_end[model_name]) and
(choice < 0.01)):
count_test_case("sequence_no_start", test_case_count)
is_last_used_no_end[model_name] = False
last_choice = "sequence_no_start"
sequence_no_start(
client_metadata,
rng,
trial,
model_name,
dtype,
sequence_name=name,
sequence_request_count=sequence_request_count)
else:
if choice < 0.01:
count_test_case("sequence_no_start",
test_case_count)
is_last_used_no_end[model_name] = False
last_choice = "sequence_no_start"
sequence_no_start(
client_metadata,
rng,
trial,
model_name,
dtype,
sequence_name=name,
sequence_request_count=sequence_request_count)
elif choice < 0.05:
count_test_case("sequence_no_end", test_case_count)
last_choices[client_idx] = "sequence_no_end"
is_last_used_no_end[model_name] = True
last_choice = "sequence_no_end"
sequence_no_end(
client_metadata,
rng,
Expand All @@ -666,7 +684,8 @@ def stress_thread(name, seed, test_duration, correlation_id_base,
sequence_request_count=sequence_request_count)
elif choice < 0.10:
count_test_case("sequence_valid_no_end", test_case_count)
last_choices[client_idx] = "sequence_valid_no_end"
is_last_used_no_end[model_name] = True
last_choice = "sequence_valid_no_end"
sequence_valid_no_end(
client_metadata,
rng,
Expand All @@ -679,7 +698,8 @@ def stress_thread(name, seed, test_duration, correlation_id_base,
sequence_request_count=sequence_request_count)
elif choice < 0.15:
count_test_case("sequence_valid_valid", test_case_count)
last_choices[client_idx] = "sequence_valid_valid"
is_last_used_no_end[model_name] = False
last_choice = "sequence_valid_valid"
sequence_valid_valid(
client_metadata,
rng,
Expand All @@ -692,7 +712,8 @@ def stress_thread(name, seed, test_duration, correlation_id_base,
sequence_request_count=sequence_request_count)
else:
count_test_case("sequence_valid", test_case_count)
last_choices[client_idx] = "sequence_valid"
is_last_used_no_end[model_name] = False
last_choice = "sequence_valid"
sequence_valid(
client_metadata,
rng,
Expand All @@ -710,26 +731,25 @@ def stress_thread(name, seed, test_duration, correlation_id_base,

if choice < 0.3:
count_test_case("timeout_client", test_case_count)
last_choices[client_idx] = "timeout_client"
last_choice = "timeout_client"
timeout_client(
client_metadata=client_metadata_list[client_idx],
sequence_name=name,
sequence_request_count=sequence_request_count)
elif choice < 0.7:
count_test_case("resnet_model_request", test_case_count)
last_choices[client_idx] = "resnet_model_request"
last_choice = "resnet_model_request"
resnet_model_request(
sequence_name=name,
sequence_request_count=sequence_request_count)
else:
count_test_case("crashing_client", test_case_count)
last_choices[client_idx] = "crashing_client"
last_choice = "crashing_client"
crashing_client(
sequence_name=name,
sequence_request_count=sequence_request_count)
except Exception as ex:
count_failed_test_case(last_choices[client_idx],
failed_test_case_count)
count_failed_test_case(last_choice, failed_test_case_count)
_thread_exceptions_mutex.acquire()
try:
_thread_exceptions.append(traceback.format_exc())
Expand Down
5 changes: 5 additions & 0 deletions qa/L0_long_running_stress/stress_mail.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,21 @@
import sys
sys.path.append("../common")

import os
import nightly_email_helper

import glob
from datetime import date

CI_JOB_ID = os.environ.get('CI_JOB_ID', '')

if __name__ == '__main__':
today = date.today().strftime("%Y-%m-%d")
subject = "Triton Long-Running Stress Test Summary: " + today
stress_report = "stress_report.txt"
link = "https://gitlab-master.nvidia.com/dl/dgx/tritonserver/-/jobs/" + CI_JOB_ID
write_up = "<p>The table below includes results from long-running stress test. Please refer to the description of each test case to see what different kinds of inference requests were sent. Request concurrency is set to 8.</p>"
write_up += "<p>Please check the CI output webpage for the details of the failures: " + link + "</p>"
html_content = "<html><head></head><body><pre style=\"font-size:11pt;font-family:Arial, sans-serif;\">" + write_up + "</pre><pre style=\"font-size:11pt;font-family:Consolas;\">"
with open(stress_report, "r") as f:
html_content += f.read() + "\n"
Expand Down
6 changes: 5 additions & 1 deletion qa/L0_long_running_stress/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ source ../common/util.sh
RET=0

# If BACKENDS not specified, set to all
BACKENDS=${BACKENDS:="graphdef savedmodel onnx plan libtorch"}
BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch"}
export BACKENDS

export CI_JOB_ID=${CI_JOB_ID}

MODEL_DIR=models

rm -fr *.log *.txt *.serverlog models && mkdir models
Expand All @@ -76,6 +78,7 @@ for MODEL in $MODELS; do
cp -r $MODEL $MODEL_DIR/. && \
(cd $MODEL_DIR/$(basename $MODEL) && \
sed -i "s/^max_batch_size:.*/max_batch_size: 2/" config.pbtxt && \
sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 7000000/" config.pbtxt && \
sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 2/" config.pbtxt && \
sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 2/" config.pbtxt)
done
Expand All @@ -89,6 +92,7 @@ done
for MODEL in $MODELS; do
cp -r $MODEL $MODEL_DIR/. && \
(cd $MODEL_DIR/$(basename $MODEL) && \
sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 7000000/" config.pbtxt && \
sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 2/" config.pbtxt && \
sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 2/" config.pbtxt)
done
Expand Down

0 comments on commit 4301e4b

Please sign in to comment.