triton-inference-server · dyastremsky · Jun 22, 2023 · Jun 22, 2023 · Jun 22, 2023
diff --git a/compose.py b/compose.py
@@ -130,6 +130,7 @@ def add_requested_repoagents(ddir, dockerfile_name, repoagents):
     with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
         dfile.write(df)
 
+
 def add_requested_caches(ddir, dockerfile_name, caches):
     df = "#  Copying over caches \n"
     for cache in caches:
@@ -143,6 +144,7 @@ def add_requested_caches(ddir, dockerfile_name, caches):
     with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
         dfile.write(df)
 
+
 def end_dockerfile(ddir, dockerfile_name, argmap):
     # Install additional dependencies
     df = ""
@@ -372,8 +374,7 @@ def create_argmap(images, skip_pull):
         '--cache',
         action='append',
         required=False,
-        help=
-        'Include <cache-name> in the generated Docker image. The flag may '
+        help='Include <cache-name> in the generated Docker image. The flag may '
         'be specified multiple times.')
     parser.add_argument(
         '--skip-pull',

diff --git a/deploy/mlflow-triton-plugin/mlflow_triton/config.py b/deploy/mlflow-triton-plugin/mlflow_triton/config.py
@@ -48,13 +48,12 @@ def __init__(self):
                 protocol = "http://"
             endpoint_url = None
             if uri.host_name != "" and uri.host_port != "":
-                endpoint_url = '{}{}:{}'.format(
-                    protocol, uri.host_name, uri.host_port)
+                endpoint_url = '{}{}:{}'.format(protocol, uri.host_name,
+                                                uri.host_port)
 
             import boto3
             # boto3 handles AWS credentials
-            self['s3'] = boto3.client(
-                's3', endpoint_url=endpoint_url)
+            self['s3'] = boto3.client('s3', endpoint_url=endpoint_url)
             self['s3_bucket'] = uri.bucket
             self['s3_prefix'] = uri.prefix
             self['triton_model_repo'] = 's3://{}'.format(

diff --git a/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py b/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py
@@ -188,11 +188,12 @@ def list_deployments(self):
                                                 d['name'],
                                                 _MLFLOW_META_FILENAME)
                 if 's3' in self.server_config:
-                    meta_dict = ast.literal_eval(self.server_config['s3'].get_object(
-                        Bucket=self.server_config['s3_bucket'],
-                        Key=os.path.join(
-                            self.server_config['s3_prefix'], d['name'], _MLFLOW_META_FILENAME),
-                    )['Body'].read().decode('utf-8'))
+                    meta_dict = ast.literal_eval(
+                        self.server_config['s3'].get_object(
+                            Bucket=self.server_config['s3_bucket'],
+                            Key=os.path.join(self.server_config['s3_prefix'],
+                                             d['name'], _MLFLOW_META_FILENAME),
+                        )['Body'].read().decode('utf-8'))
                 elif os.path.isfile(mlflow_meta_path):
                     meta_dict = self._get_mlflow_meta_dict(d['name'])
                 else:
@@ -280,12 +281,13 @@ def _generate_mlflow_meta_file(self, name, flavor, model_uri):
             self.server_config['s3'].put_object(
                 Body=json.dumps(meta_dict, indent=4).encode('utf-8'),
                 Bucket=self.server_config["s3_bucket"],
-                Key=os.path.join(
-                    self.server_config['s3_prefix'], name, _MLFLOW_META_FILENAME),
+                Key=os.path.join(self.server_config['s3_prefix'], name,
+                                 _MLFLOW_META_FILENAME),
             )
         else:
-            with open(os.path.join(triton_deployment_dir, _MLFLOW_META_FILENAME),
-                      "w") as outfile:
+            with open(
+                    os.path.join(triton_deployment_dir, _MLFLOW_META_FILENAME),
+                    "w") as outfile:
                 json.dump(meta_dict, outfile, indent=4)
 
         print("Saved", _MLFLOW_META_FILENAME, "to", triton_deployment_dir)
@@ -295,11 +297,12 @@ def _get_mlflow_meta_dict(self, name):
                                         _MLFLOW_META_FILENAME)
 
         if 's3' in self.server_config:
-            mlflow_meta_dict = ast.literal_eval(self.server_config['s3'].get_object(
-                Bucket=self.server_config['s3_bucket'],
-                Key=os.path.join(
-                    self.server_config['s3_prefix'], name, _MLFLOW_META_FILENAME),
-            )['Body'].read().decode('utf-8'))
+            mlflow_meta_dict = ast.literal_eval(
+                self.server_config['s3'].get_object(
+                    Bucket=self.server_config['s3_bucket'],
+                    Key=os.path.join(self.server_config['s3_prefix'], name,
+                                     _MLFLOW_META_FILENAME),
+                )['Body'].read().decode('utf-8'))
         else:
             with open(mlflow_meta_path, 'r') as metafile:
                 mlflow_meta_dict = json.load(metafile)
@@ -392,7 +395,8 @@ def _copy_files_to_triton_repo(self, artifact_path, name, flavor):
                             s3_path = os.path.join(
                                 self.server_config['s3_prefix'],
                                 copy_paths[key]['to'].replace(
-                                    self.server_config['triton_model_repo'], '').strip('/'),
+                                    self.server_config['triton_model_repo'],
+                                    '').strip('/'),
                                 filename,
                             )
 
@@ -413,8 +417,8 @@ def _copy_files_to_triton_repo(self, artifact_path, name, flavor):
                 if os.path.isdir(copy_paths[key]['from']):
                     if os.path.isdir(copy_paths[key]['to']):
                         shutil.rmtree(copy_paths[key]['to'])
-                    shutil.copytree(
-                        copy_paths[key]['from'], copy_paths[key]['to'])
+                    shutil.copytree(copy_paths[key]['from'],
+                                    copy_paths[key]['to'])
                 else:
                     if not os.path.isdir(copy_paths[key]['to']):
                         os.makedirs(copy_paths[key]['to'])

diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/common.h b/docs/examples/jetson/concurrency_and_dynamic_batching/common.h
@@ -27,6 +27,7 @@
 
 #include <iostream>
 #include <string>
+
 #include "triton/core/tritonserver.h"
 
 #define RETURN_IF_ERR(X)             \

diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc b/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc
@@ -27,24 +27,23 @@
 #include <rapidjson/document.h>
 #include <rapidjson/error/en.h>
 #include <unistd.h>
+
 #include <chrono>
 #include <cstring>
 #include <future>
 #include <iostream>
+#include <opencv2/dnn.hpp>
 #include <string>
 #include <thread>
 #include <unordered_map>
 #include <vector>
 
-#include "triton/core/tritonserver.h"
-
 #include "common.h"
-
-#include <opencv2/dnn.hpp>
 #include "opencv2/core.hpp"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/opencv.hpp"
+#include "triton/core/tritonserver.h"
 
 #ifdef TRITON_ENABLE_GPU
 #include <cuda_runtime_api.h>

diff --git a/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py
@@ -108,7 +108,7 @@ def execute(self, requests):
             with self.inflight_thread_count_lck:
                 self.inflight_thread_count += 1
             thread1.start()
-        
+
         logger = pb_utils.Logger
         logger.log("Execute-Specific Msg!", logger.INFO)
         logger.log_info("Execute-Info Msg!")

diff --git a/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py
@@ -57,15 +57,14 @@ def execute(self, requests):
 
         for request in requests:
             thread = threading.Thread(target=self.response_thread,
-                                       args=(request.get_response_sender(),
-                                             pb_utils.get_input_tensor_by_name(
-                                                 request, 'IN').as_numpy()))
+                                      args=(request.get_response_sender(),
+                                            pb_utils.get_input_tensor_by_name(
+                                                request, 'IN').as_numpy()))
             thread.daemon = True
             with self.inflight_thread_count_lck:
                 self.inflight_thread_count += 1
             thread.start()
 
-
         return None
 
     def response_thread(self, response_sender, in_value):
@@ -95,22 +94,22 @@ def response_thread(self, response_sender, in_value):
                         response,
                         flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
                 else:
-                    output_tensors = [pb_utils.Tensor('OUT', output0.as_numpy())]
+                    output_tensors = [
+                        pb_utils.Tensor('OUT', output0.as_numpy())
+                    ]
                     response = pb_utils.InferenceResponse(
                         output_tensors=output_tensors)
                     response_sender.send(response)
 
             response_count += 1
 
-        if in_value != response_count-1:
-            error_message = (
-                "Expected {} responses, got {}".format(
-                    in_value, len(infer_responses)-1))
-            response = pb_utils.InferenceResponse(
-                error=error_message)
+        if in_value != response_count - 1:
+            error_message = ("Expected {} responses, got {}".format(
+                in_value,
+                len(infer_responses) - 1))
+            response = pb_utils.InferenceResponse(error=error_message)
             response_sender.send(
-                response,
-                flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+                response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
         else:
             response_sender.send(
                 flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

diff --git a/qa/L0_backend_python/logging/logging_test.py b/qa/L0_backend_python/logging/logging_test.py
@@ -31,10 +31,10 @@
 import numpy as np
 import test_util as tu
 
-
 from tritonclient.utils import *
 import tritonclient.http as httpclient
 
+
 class LogTest(tu.TestResultCollector):
 
     def test_log_output(self):
@@ -43,13 +43,14 @@ def test_log_output(self):
             input_data = np.array([[1.0]], dtype=np.float32)
             inputs = [
                 httpclient.InferInput("INPUT0", input_data.shape,
-                                        np_to_triton_dtype(input_data.dtype))
+                                      np_to_triton_dtype(input_data.dtype))
             ]
             inputs[0].set_data_from_numpy(input_data)
             result = client.infer(model_name, inputs)
             output0 = result.as_numpy('OUTPUT0')
             self.assertIsNotNone(output0)
             self.assertTrue(np.all(output0 == input_data))
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py
@@ -182,9 +182,8 @@ def test_async_infer(self):
 
                 # Make sure the requests ran in parallel.
                 stats = client.get_inference_statistics(model_name)
-                test_cond = (len(stats['model_stats'])
-                             != 1) or (stats['model_stats'][0]['name']
-                                       != model_name)
+                test_cond = (len(stats['model_stats']) != 1) or (
+                    stats['model_stats'][0]['name'] != model_name)
                 self.assertFalse(
                     test_cond,
                     "error: expected statistics for {}".format(model_name))

diff --git a/qa/L0_backend_python/python_unittest.py b/qa/L0_backend_python/python_unittest.py
@@ -53,7 +53,7 @@ def _run_unittest(self, model_name):
 
     def test_python_unittest(self):
         model_name = os.environ['MODEL_NAME']
-        bls_kind =  os.environ.get('BLS_KIND', 'non_decoupled')
+        bls_kind = os.environ.get('BLS_KIND', 'non_decoupled')
 
         if bls_kind == "decoupled":
             # Skip the shared memory probe for decoupled models for now as

diff --git a/qa/L0_cuda_graph/trt_cuda_graph_test.py b/qa/L0_cuda_graph/trt_cuda_graph_test.py
@@ -36,7 +36,7 @@
 
 
 class TrtCudaGraphTest(tu.TestResultCollector):
-    MODELNAME= "plan"
+    MODELNAME = "plan"
 
     def setUp(self):
         self.dtype_ = np.float32
@@ -50,7 +50,8 @@ def _check_infer(self, tensor_shape, batch_size=1):
             else:
                 full_shape = tensor_shape
             iu.infer_exact(self,
-                           self.model_name_, full_shape,
+                           self.model_name_,
+                           full_shape,
                            batch_size,
                            self.dtype_,
                            self.dtype_,
@@ -140,6 +141,7 @@ def test_range_dynamic_shape(self):
     def test_nobatch_fixed_shape(self):
         self._check_infer((16,), 0)
 
+
 if __name__ == '__main__':
     if len(sys.argv) > 2:
         TrtCudaGraphTest.MODELNAME = sys.argv.pop()