Add L0_storage_swiftstack test (triton-inference-server#2934)

* Add L0_storage_swiftstack test * Rename test
ileixe · Jun 1, 2021 · e1b2f8b · e1b2f8b
1 parent f2f1aa1
commit e1b2f8b
Show file tree

Hide file tree

Showing 2 changed files with 358 additions and 0 deletions.
diff --git a/qa/L0_storage_swiftstack/infer_test.py b/qa/L0_storage_swiftstack/infer_test.py
@@ -0,0 +1,164 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+sys.path.append("../common")
+
+from builtins import range
+from future.utils import iteritems
+import unittest
+import numpy as np
+import infer_util as iu
+import test_util as tu
+import os
+
+
+class InferTest(tu.TestResultCollector):
+
+    def _full_exact(self, input_dtype, output0_dtype, output1_dtype,
+                    output0_raw, output1_raw, swap):
+
+        def _infer_exact_helper(tester,
+                                pf,
+                                tensor_shape,
+                                batch_size,
+                                input_dtype,
+                                output0_dtype,
+                                output1_dtype,
+                                output0_raw=True,
+                                output1_raw=True,
+                                model_version=None,
+                                swap=False,
+                                outputs=("OUTPUT0", "OUTPUT1"),
+                                use_http=True,
+                                use_grpc=True,
+                                skip_request_id_check=False,
+                                use_streaming=True,
+                                correlation_id=0):
+            for bs in (1, batch_size):
+                iu.infer_exact(tester,
+                               pf, (bs,) + tensor_shape,
+                               bs,
+                               input_dtype,
+                               output0_dtype,
+                               output1_dtype,
+                               output0_raw=output0_raw,
+                               output1_raw=output1_raw,
+                               model_version=model_version,
+                               swap=swap,
+                               outputs=outputs,
+                               use_http=use_http,
+                               use_grpc=use_grpc,
+                               skip_request_id_check=skip_request_id_check,
+                               use_streaming=use_streaming,
+                               correlation_id=correlation_id)
+
+        input_size = 16
+
+        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
+                                    (input_size,), (input_size,),
+                                    (input_size,)):
+            for pf in ["graphdef", "savedmodel"]:
+                _infer_exact_helper(self,
+                                    pf, (input_size,),
+                                    8,
+                                    input_dtype,
+                                    output0_dtype,
+                                    output1_dtype,
+                                    output0_raw=output0_raw,
+                                    output1_raw=output1_raw,
+                                    swap=swap)
+
+        if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype,
+                                     (input_size, 1, 1), (input_size, 1, 1),
+                                     (input_size, 1, 1)):
+            if input_dtype == np.int8:
+                _infer_exact_helper(self,
+                                    'plan', (input_size, 1, 1),
+                                    8,
+                                    input_dtype,
+                                    output0_dtype,
+                                    output1_dtype,
+                                    output0_raw=output0_raw,
+                                    output1_raw=output1_raw,
+                                    swap=swap)
+            else:
+                _infer_exact_helper(self,
+                                    'plan', (input_size,),
+                                    8,
+                                    input_dtype,
+                                    output0_dtype,
+                                    output1_dtype,
+                                    output0_raw=output0_raw,
+                                    output1_raw=output1_raw,
+                                    swap=swap)
+
+        if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype,
+                                      (input_size,), (input_size,),
+                                      (input_size,)):
+            _infer_exact_helper(self,
+                                'onnx', (input_size,),
+                                8,
+                                input_dtype,
+                                output0_dtype,
+                                output1_dtype,
+                                output0_raw=output0_raw,
+                                output1_raw=output1_raw,
+                                swap=swap)
+
+        if tu.validate_for_libtorch_model(input_dtype, output0_dtype,
+                                          output1_dtype, (input_size,),
+                                          (input_size,), (input_size,)):
+            _infer_exact_helper(self,
+                                'libtorch', (input_size,),
+                                8,
+                                input_dtype,
+                                output0_dtype,
+                                output1_dtype,
+                                output0_raw=output0_raw,
+                                output1_raw=output1_raw,
+                                swap=swap)
+
+    def test_raw_fff(self):
+        self._full_exact(np.float32,
+                         np.float32,
+                         np.float32,
+                         output0_raw=True,
+                         output1_raw=True,
+                         swap=True)
+
+
+    def test_class_fff(self):
+        self._full_exact(np.float32,
+                         np.float32,
+                         np.float32,
+                         output0_raw=False,
+                         output1_raw=False,
+                         swap=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/qa/L0_storage_swiftstack/test.sh b/qa/L0_storage_swiftstack/test.sh
@@ -0,0 +1,194 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+unset AWS_ACCESS_KEY_ID
+unset AWS_SECRET_ACCESS_KEY
+unset AWS_DEFAULT_REGION
+
+pip3 install --no-deps awscli-plugin-endpoint
+
+# cli_legacy_plugin_path = /usr/local/lib/python3.8/site-packages
+
+mkdir -p ~/.aws
+# Swiftstack S3 credentials are necessary for this test. Passed via ENV variables
+echo "[plugins]
+endpoint = awscli_plugin_endpoint
+
+[default]
+aws_access_key_id = $SWIFTSTACK_ACCESS_KEY_ID
+aws_secret_access_key = $SWIFTSTACK_SECRET_ACCESS_KEY
+region = $SWIFTSTACK_DEFAULT_REGION
+
+s3 =
+    endpoint_url = https://pbss.s8k.io
+    signature_version = s3v4
+    payload_signing_enabled = true
+" > ~/.aws/config
+
+export AWS_ACCESS_KEY_ID=$SWIFTSTACK_ACCESS_KEY_ID &&
+export AWS_SECRET_ACCESS_KEY=$SWIFTSTACK_SECRET_ACCESS_KEY &&
+export AWS_DEFAULT_REGION=$SWIFTSTACK_DEFAULT_REGION
+
+# S3 bucket path (Point to bucket when testing cloud storage)
+BUCKET_URL="s3://triton-bucket-${CI_PIPELINE_ID}"
+
+# S3 repo path to pass to Triton server
+S3_REPO_URL="s3://https://pbss.s8k.io:443/triton-bucket-${CI_PIPELINE_ID}"
+
+# Cleanup S3 test bucket if exists (due to test failure)
+aws s3 rm $BUCKET_URL --recursive --include "*" && \
+    aws s3 rb $BUCKET_URL || true
+
+# Make S3 test bucket
+aws s3 mb $BUCKET_URL
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_TIMEOUT=420
+
+CLIENT_LOG_BASE="./client"
+SERVER_LOG_BASE="./inference_server"
+INFER_TEST=infer_test.py
+EXPECTED_NUM_TESTS="2"
+source ../common/util.sh
+
+rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
+RET=0
+
+SERVER_LOG=$SERVER_LOG_BASE.log
+CLIENT_LOG=$CLIENT_LOG_BASE.log
+
+# Copy models in model directory
+rm -rf models && mkdir -p models
+
+aws s3 rm $BUCKET_URL/ --recursive --include "*"
+
+# Now start model tests
+
+for FW in graphdef savedmodel onnx libtorch plan; do
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32/ models/
+done
+
+for FW in graphdef savedmodel onnx libtorch plan; do
+    for MC in `ls models/${FW}*/config.pbtxt`; do
+        echo "instance_group [ { kind: KIND_GPU }]" >> $MC
+    done
+done
+
+# copy contents of /models into S3 bucket.
+aws s3 cp models/ $BUCKET_URL/ --recursive --include "*"
+
+# Test without polling
+SERVER_ARGS="--model-repository=$S3_REPO_URL --exit-timeout-secs=120"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $INFER_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $CLIENT_LOG $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Clean up bucket contents
+aws s3 rm $BUCKET_URL/ --recursive --include "*"
+
+
+# Test with polling enabled
+SERVER_ARGS="--model-repository=$S3_REPO_URL --exit-timeout-secs=120 --model-control-mode=poll"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# copy contents of /models into S3 bucket and wait for them to be loaded.
+aws s3 cp models/ $BUCKET_URL/ --recursive --include "*"
+sleep 420
+
+set +e
+
+python $INFER_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $CLIENT_LOG $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Clean up bucket contents and delete bucket
+aws s3 rm $BUCKET_URL/ --recursive --include "*"
+aws s3 rb $BUCKET_URL
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET