Merge branch 'master' into master

chauhang · web-flow · commit 6d20febcf29c · 2021-09-12T01:57:09.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,7 @@ dist/
 frontend/server/src/main/java/org/pytorch/serve/grpc/
 *.pem
 *.backup
+docs/sphinx/src/
 
 # Postman files
 test/artifacts/
diff --git a/benchmarks/benchmark-ab.py b/benchmarks/benchmark-ab.py
@@ -104,6 +104,7 @@ def benchmark(test_plan, url, gpus, exec_env, concurrency, requests, batch_size,
         docker_torchserve_start()
 
     check_torchserve_health()
+    warm_up()
     run_benchmark()
     generate_report()
 
@@ -123,12 +124,18 @@ def check_torchserve_health():
             time.sleep(3)
     failure_exit("Could not connect to Tochserve instance at " + execution_params['inference_url'])
 
-
-def run_benchmark():
+def warm_up():
     register_model()
 
-    click.secho("\n\nExecuting Apache Bench tests ...", fg='green')
-    click.secho("*Executing inference performance test...", fg='green')
+    click.secho("\n\nExecuting warm-up ...", fg='green')
+    ab_cmd = f"ab -c {execution_params['concurrency']}  -n {execution_params['requests']/10} -k -p {TMP_DIR}/benchmark/input -T " \
+             f"{execution_params['content_type']} {execution_params['inference_url']}/{execution_params['inference_model_url']} > {result_file}"
+    
+    execute(ab_cmd, wait=True)
+
+
+def run_benchmark():
+    click.secho("\n\nExecuting inference perfromance tests ...", fg='green')
     ab_cmd = f"ab -c {execution_params['concurrency']}  -n {execution_params['requests']} -k -p {TMP_DIR}/benchmark/input -T " \
              f"{execution_params['content_type']} {execution_params['inference_url']}/{execution_params['inference_model_url']} > {result_file}"
     
@@ -480,4 +487,4 @@ def failure_exit(msg):
 
 
 if __name__ == '__main__':
-    benchmark()
+    benchmark()
diff --git a/docs/README.md b/docs/README.md
@@ -1,36 +1,51 @@
 # TorchServe
 
-TorchServe is a flexible and easy to use tool for serving PyTorch models.
+TorchServe is a performant, flexible and easy to use tool for serving PyTorch eager mode and torschripted models.
 
 ## Basic Features
 
 * [Serving Quick Start](https://github.com/pytorch/serve/blob/master/README.md#serve-a-model) - Basic server usage tutorial
 * [Model Archive Quick Start](https://github.com/pytorch/serve/tree/master/model-archiver#creating-a-model-archive) - Tutorial that shows you how to package a model archive file.
 * [Installation](https://github.com/pytorch/serve/blob/master/README.md#install-torchserve) - Installation procedures
-* [Serving Models](server.md) - Explains how to use torchserve
-   * [REST API](rest_api.md) - Specification on the API endpoint for TorchServe
+* [Serving Models](server.md) - Explains how to use TorchServe
+* [REST API](rest_api.md) - Specification on the API endpoint for TorchServe
+* [gRPC API](grpc_api.md) - TorchServe supports gRPC APIs for both inference and management calls
 * [Packaging Model Archive](https://github.com/pytorch/serve/tree/master/model-archiver#torch-model-archiver-for-torchserve) - Explains how to package model archive file, use `model-archiver`.
 * [Inference API](inference_api.md) - How to check for the health of a deployed model and get inferences
 * [Management API](management_api.md) - How to manage and scale models
 * [Logging](logging.md) - How to configure logging
 * [Metrics](metrics.md) - How to configure metrics
-   * [Metrics API](metrics_api.md) - How to configure metrics API
+* [Prometheus and Grafana metrics](metrics_api.md) - How to configure metrics API with Prometheus formatted metrics in a Grafana dashboard
+* [Captum Explanations](https://github.com/pytorch/serve/blob/master/captum/Captum_visualization_for_bert.ipynb) - Built in support for Captum explanations for both text and images
 * [Batch inference with TorchServe](batch_inference_with_ts.md) - How to create and serve a model with batch inference in TorchServe
 * [Workflows](workflows.md) - How to create workflows to compose Pytorch models and Python functions in sequential and parallel pipelines
-* [Model Zoo](model_zoo.md) - List of pre-trained model archives ready to be served for inference with TorchServe.
-* [Examples](https://github.com/pytorch/serve/tree/master/examples) - Many examples of how to package and deploy models and workflows with TorchServe
 
-## Advanced Features
 
-* [Advanced configuration](configuration.md) - Describes advanced TorchServe configurations.
-* [Custom Service](custom_service.md) - Describes how to develop custom inference services.
-* [Unit Tests](https://github.com/pytorch/serve/tree/master/ts/tests#testing-torchserve) - Housekeeping unit tests for TorchServe.
-* [Benchmark](https://github.com/pytorch/serve/tree/master/benchmarks#torchserve-model-server-benchmarking) - Use JMeter to run TorchServe through the paces and collect benchmark data.
-* [TorchServe on Kubernetes](https://github.com/pytorch/serve/blob/master/kubernetes/README.md#torchserve-on-kubernetes) -  Demonstrates a Torchserve deployment in Kubernetes using Helm Chart.
 
 ## Default Handlers
 
 * [Image Classifier](https://github.com/pytorch/serve/blob/master/ts/torch_handler/image_classifier.py) - This handler takes an image and returns the name of object in that image
 * [Text Classifier](https://github.com/pytorch/serve/blob/master/ts/torch_handler/text_classifier.py) - This handler takes a text (string) as input and returns the classification text based on the model vocabulary
 * [Object Detector](https://github.com/pytorch/serve/blob/master/ts/torch_handler/object_detector.py) - This handler takes an image and returns list of detected classes and bounding boxes respectively
 * [Image Segmenter](https://github.com/pytorch/serve/blob/master/ts/torch_handler/image_segmenter.py)- This handler takes an image and returns output shape as [CL H W], CL - number of classes, H - height and W - width
+
+## Examples
+
+* [HuggingFace Language Model](https://github.com/pytorch/serve/blob/master/examples/Huggingface_Transformers/Transformer_handler_generalized.py) - This handler takes an input sentence and can return sequence classifications, token classifications or Q&A answers
+* [Multi Modal Framework](https://github.com/pytorch/serve/blob/master/examples/MMF-activity-recognition/handler.py) - Build and deploy a classifier that combines text, audio and video input data
+* [Dual Translation Workflow](https://github.com/pytorch/serve/tree/master/examples/Workflows/nmt_tranformers_pipeline) - 
+* [Model Zoo](model_zoo.md) - List of pre-trained model archives ready to be served for inference with TorchServe.
+* [Examples](https://github.com/pytorch/serve/tree/master/examples) - Many examples of how to package and deploy models with TorchServe
+* [Workflow Examples](https://github.com/pytorch/serve/tree/master/examples/Workflows) - Examples of how to compose models in a workflow with TorchServe
+
+## Advanced Features
+
+* [Advanced configuration](configuration.md) - Describes advanced TorchServe configurations.
+* [A/B test models](https://github.com/pytorch/serve/blob/master/docs/use_cases.md#serve-models-for-ab-testing) - A/B test your models for regressions before shipping them to production
+* [Custom Service](custom_service.md) - Describes how to develop custom inference services.
+* [Encrypted model serving](management_api.md/encrypted-model-serving) - S3 server side model encryption via KMS
+* [Snapshot serialization](https://github.com/pytorch/serve/blob/master/plugins/docs/ddb_endpoint.md) - Serialize model artifacts to AWS Dynamo DB
+* [Benchmarking and Profiling](https://github.com/pytorch/serve/tree/master/benchmarks#torchserve-model-server-benchmarking) - Use JMeter or Apache Bench to benchmark your models and TorchServe itself
+* [TorchServe on Kubernetes](https://github.com/pytorch/serve/blob/master/kubernetes/README.md#torchserve-on-kubernetes) -  Demonstrates a Torchserve deployment in Kubernetes using Helm Chart supported in both Azure Kubernetes Service and Google Kubernetes service
+* [mlflow-torchserve](https://github.com/mlflow/mlflow-torchserve) - Deploy mlflow pipeline models into TorchServe
+* [Kubeflow pipelines](https://github.com/kubeflow/pipelines/tree/master/samples/contrib/pytorch-samples) - Kubeflow pipelines and Google Vertex AI Managed pipelines
diff --git a/docs/Troubleshooting.md b/docs/Troubleshooting.md
@@ -1,4 +1,4 @@
-## Troubleshooting guide.
+## Troubleshooting Guide
 Refer to this section for common issues faced while deploying your Pytorch models using Torchserve and their corresponding troubleshooting steps.
 
 * [Deployment and config issues](#deployment-and-config-issues)
diff --git a/docs/apis.rst b/docs/apis.rst
@@ -0,0 +1,10 @@
+.. toctree::
+  :caption: APIs:
+  
+  grpc_api
+  inference_api
+  management_api
+  metrics_api
+  rest_api
+  workflow_inference_api
+  workflow_management_api
diff --git a/docs/code_coverage.md b/docs/code_coverage.md
@@ -1,3 +1,5 @@
+# Code Coverage
+
 ## To check branch stability run the sanity suite as follows
  - Install dependencies (if not already installed)
    For CPU:
diff --git a/docs/conf.py b/docs/conf.py
@@ -78,7 +78,7 @@
 
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = 'contents'
 
 # General information about the project.
 project = 'PyTorch/Serve'
@@ -129,7 +129,7 @@
 #
 html_theme_options = {
     'pytorch_project': 'audio',
-    'collapse_navigation': False,
+    'collapse_navigation': True,
     'display_version': True,
     'logo_only': True,
 }
diff --git a/docs/contents.rst b/docs/contents.rst
@@ -0,0 +1,30 @@
+.. toctree::
+  :maxdepth: 0
+  :numbered:
+  :caption: Contents:
+  :titlesonly:
+  
+  index
+  Troubleshooting
+  batch_inference_with_ts
+  code_coverage
+  configuration
+  custom_service
+  default_handlers
+  logging
+  metrics
+  model_zoo
+  request_envelopes
+  server
+  snapshot
+  sphinx/requirements
+  torchserve_on_win_native
+  torchserve_on_wsl
+  use_cases
+  workflows
+
+.. toctree::
+  :maxdepth: 0
+  :caption: APIs
+  
+  apis
diff --git a/docs/custom_service.md b/docs/custom_service.md
@@ -1,3 +1,5 @@
+# Custom Service
+
 ## Contents of this Document
 
 * [Custom handlers](#custom-handlers)
@@ -380,7 +382,7 @@ class ModelHandler(object):
         self.device = torch.device("cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")
 ```
 
-# Installing model specific python dependencies
+## Installing model specific python dependencies
 
 Custom models/handlers may depend on different python packages which are not installed by-default as a part of `TorchServe` setup.
 
diff --git a/docs/default_handlers.md b/docs/default_handlers.md
@@ -38,16 +38,16 @@ For more details see [examples](https://github.com/pytorch/serve/tree/master/exa
 
 For a more comprehensive list of available handlers make sure to check out the [examples page](https://github.com/pytorch/serve/tree/master/examples)
 
-# Common features
+## Common features
 
-## index_to_name.json
+### index_to_name.json
 
 `image_classifier`, `text_classifier` and `object_detector` can all automatically map from numeric classes (0,1,2...) to friendly strings. To do this, simply include in your model archive a file, `index_to_name.json`, that contains a mapping of class number (as a string) to friendly name (also as a string). You can see some examples here:
 - [image_classifier](https://github.com/pytorch/serve/tree/master/examples/image_classifier/index_to_name.json)
 - [text_classifier](https://github.com/pytorch/serve/tree/master/examples/text_classification/index_to_name.json)
 - [object_detector](https://github.com/pytorch/serve/tree/master/examples/object_detector/index_to_name.json)
 
-# Contributing
+### Contributing
 We welcome new contributed handlers, if your usecase isn't covered by one of the existing default handlers please follow the below steps to contribute it
 1. Write a new class derived from [BaseHandler](https://github.com/pytorch/serve/blob/master/ts/torch_handler/base_handler.py). Add it as a separate file in `ts/torch_handler/`
 2. Update `model-archiver/model_packaging.py` to add in your classes name
diff --git a/docs/index.md b/docs/index.md
@@ -30,4 +30,4 @@ TorchServe is a flexible and easy to use tool for serving PyTorch models.
 * [Image Classifier](https://github.com/pytorch/serve/blob/master/ts/torch_handler/image_classifier.py) - This handler takes an image and returns the name of object in that image
 * [Text Classifier](https://github.com/pytorch/serve/blob/master/ts/torch_handler/text_classifier.py) - This handler takes a text (string) as input and returns the classification text based on the model vocabulary
 * [Object Detector](https://github.com/pytorch/serve/blob/master/ts/torch_handler/object_detector.py) - This handler takes an image and returns list of detected classes and bounding boxes respectively
-* [Image Segmenter](https://github.com/pytorch/serve/blob/master/ts/torch_handler/image_segmenter.py)- This handler takes an image and returns output shape as [CL H W], CL - number of classes, H - height and W - width
+* [Image Segmenter](https://github.com/pytorch/serve/blob/master/ts/torch_handler/image_segmenter.py)- This handler takes an image and returns output shape as [CL H W], CL - number of classes, H - height and W - width 
diff --git a/docs/management_api.md b/docs/management_api.md
@@ -38,6 +38,8 @@ curl -X POST  "http://localhost:8081/models?url=https://torchserve.pytorch.org/m
   "status": "Model \"squeezenet_v1.1\" Version: 1.0 registered with 0 initial workers. Use scale workers API to add workers for the model."
 }
 ```
+
+### Encrypted model serving 
 If you'd like to serve an encrypted model then you need to setup [S3 SSE-KMS](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) with the following environment variables:
 * AWS_ACCESS_KEY_ID
 * AWS_SECRET_ACCESS_KEY
diff --git a/docs/request_envelopes.md b/docs/request_envelopes.md
@@ -1,4 +1,4 @@
-# Introduction
+# Request Envelopes
 
 Many model serving systems provide a signature for request bodies. Examples include:
 
@@ -10,9 +10,9 @@ Data scientists use these multi-framework systems to manage deployments of many
 
 Torchserve currently has no fixed request body signature. Envelopes allow you to automatically translate from the fixed signature required for your model orchestrator to a flat Python list.
 
-# Usage
+## Usage
 1. When you write a handler, always expect a plain Python list containing data ready to go into `preprocess`. Crucially, you should assume that your handler code looks the same locally or in your model orchestrator.
 2. When you deploy Torchserve behind a model orchestrator, make sure to set the corresponding `service_envelope` in your `config.properties` file. For example, if you're using Google Cloud AI Platform, which has a JSON format, you'd add `service_envelope=json` to your `config.properties` file.
 
-# Contributing
+## Contributing
 Add new files under [ts/torch_handler/request_envelope](https://github.com/pytorch/serve/tree/master/ts/torch_handler). Only include one class per file. The key used in `config.properties` will be the name of the .py file you write your class in.
diff --git a/docs/torchserve_on_wsl.md b/docs/torchserve_on_wsl.md
@@ -1,4 +1,5 @@
-# TorchServe on Windows Subsystem for Linux (WSL) - Ubuntu 18.0.4
+# TorchServe on Windows Subsystem for Linux (WSL)
+* Ubuntu 18.0.4
 
 ## Contents of this Document
 
diff --git a/docs/workflows.md b/docs/workflows.md
@@ -129,11 +129,11 @@ def postprocess(data, context):
 
 ```
 
-# Related docs
+## Related docs
 * [workflow_inference_api.md](workflow_inference_api.md)
 * [workflow_management_api.md](workflow_management_api.md)
 
-# Known issues
+## Known issues
 
 * Each workflow dag node (model/function) will receive input as bytes
 * Only following output types are supported by workflow models/functions : String, Int, List, Dict of String, int, Json serializable objects, byte array and Torch Tensors
diff --git a/frontend/README.md b/frontend/README.md
@@ -5,7 +5,14 @@ TorchServe REST API endpoint
 
 ### Building frontend
 
-You can build frontend using gradle:
+You can build the frontend using gradle:
+
+```sh
+$ cd frontend
+$ ./gradlew -p frontend clean assemble
+```
+
+And you can build and test the front by running
 
 ```sh
 $ cd frontend
diff --git a/kubernetes/kfserving/image_transformer/setup.py b/kubernetes/kfserving/image_transformer/setup.py
@@ -39,7 +39,7 @@
         "numpy>=1.16.3",
         "kubernetes >= 9.0.0",
         "torchvision>=0.4.0",
-        "pillow==8.2.0"
+        "pillow==8.3.2"
     ],
     tests_require=tests_require,
     extras_require={'test': tests_require}
diff --git a/requirements/common.txt b/requirements/common.txt
@@ -3,7 +3,7 @@ psutil
 wheel
 requests
 sentencepiece
-pillow==8.2.0
+pillow==8.3.2
 captum
 packaging
 numpy; sys_platform != 'win32'
diff --git a/requirements/torch_windows.txt b/requirements/torch_windows.txt
@@ -1,4 +1,4 @@
-pillow==8.2.0
+pillow==8.3.2
 torch==1.9.0; sys_platform == 'win32'
 torchvision==0.10.0; sys_platform == 'win32'
 torchtext==0.10.0; sys_platform == 'win32'

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-## Troubleshooting guide.`
	`1`	`+## Troubleshooting Guide`
`2`	`2`	`Refer to this section for common issues faced while deploying your Pytorch models using Torchserve and their corresponding troubleshooting steps.`
`3`	`3`
`4`	`4`	`* [Deployment and config issues](#deployment-and-config-issues)`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# Code Coverage`
	`2`	`+`
`1`	`3`	`## To check branch stability run the sanity suite as follows`
`2`	`4`	`- Install dependencies (if not already installed)`
`3`	`5`	`For CPU:`
Original file line number	Diff line number	Diff line change
`@@ -78,7 +78,7 @@`
`78`	`78`
`79`	`79`
`80`	`80`	`# The master toctree document.`
`81`		`-master_doc = 'index'`
	`81`	`+master_doc = 'contents'`
`82`	`82`
`83`	`83`	`# General information about the project.`
`84`	`84`	`project = 'PyTorch/Serve'`
`@@ -129,7 +129,7 @@`
`129`	`129`	`#`
`130`	`130`	`html_theme_options = {`
`131`	`131`	`'pytorch_project': 'audio',`
`132`		`- 'collapse_navigation': False,`
	`132`	`+ 'collapse_navigation': True,`
`133`	`133`	`'display_version': True,`
`134`	`134`	`'logo_only': True,`
`135`	`135`	`}`