Merge branch 'main' into feat/evict-req-on-client-disconnect-streamin…

…g-case
Lightning-AI · Sep 21, 2024 · e060e39 · e060e39
2 parents f08ed4b + 44e0fe9
commit e060e39
Show file tree

Hide file tree

Showing 33 changed files with 948 additions and 130 deletions.
diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml
@@ -86,7 +86,11 @@ jobs:
         displayName: "Statistics"
 
       - bash: |
-          pip install jsonargparse torch torchvision -U -q --find-links=${TORCH_URL}
+          pip install torch torchvision -U -q --find-links=${TORCH_URL} -r _requirements/perf.txt
           export PYTHONPATH=$PWD && python tests/parity_fastapi/main.py
+        displayName: "Run FastAPI parity tests"
 
-        displayName: "Run parity tests"
+      - bash: |
+          pip install gpustat wget -U -q
+          bash tests/perf_test/bert/run_test.sh
+        displayName: "Run GPU perf test"
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
diff --git a/.github/workflows/ci-checks.yml b/.github/workflows/ci-checks.yml
@@ -18,14 +18,14 @@ jobs:
 #      actions-ref: main
 
   check-schema:
-    uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.6
+    uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.7
     with:
       azure-dir: ""
 
   check-package:
-    uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.11.6
+    uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.11.7
     with:
-      actions-ref: v0.11.6
+      actions-ref: v0.11.7
       import-name: "litserve"
       artifact-name: dist-packages-${{ github.sha }}
       testing-matrix: |

diff --git a/.github/workflows/ci-parity.yml b/.github/workflows/ci-parity.yml
@@ -26,8 +26,11 @@ jobs:
       - name: Install LitServe
         run: |
           pip --version
-          pip install . torchvision jsonargparse uvloop -U -q -r _requirements/test.txt -U -q
+          pip install . torchvision jsonargparse uvloop tenacity -U -q -r _requirements/test.txt -U -q
           pip list
 
-      - name: Tests
+      - name: Parity test
         run: export PYTHONPATH=$PWD && python tests/parity_fastapi/main.py
+
+      - name: Streaming speed test
+        run: bash tests/perf_test/stream/run_test.sh
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -59,7 +59,7 @@ repos:
           - pydocstyle
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.5.6
+    rev: v0.6.3
     hooks:
       - id: ruff-format
         args: ["--preview"]

diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@ Easy. Flexible. Enterprise-scale.
 
 ----
 
-**LitServe** is an easy-to-use, flexible serving engine for AI models built on FastAPI. Features like batching, streaming, and GPU autoscaling eliminate the need to rebuild a FastAPI server per model.  
+**LitServe** is an easy-to-use, flexible serving engine for AI models built on FastAPI. It augments FastAPI with features like batching, streaming, and GPU autoscaling eliminate the need to rebuild a FastAPI server per model.  
 
 LitServe is at least [2x faster](#performance) than plain FastAPI due to AI-specific multi-worker handling.    
 
@@ -148,11 +148,11 @@ Use LitServe to deploy any model or AI service: (Gen AI, classical ML, embedding
 <strong>LLMs:</strong>           <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-private-llama-3-8b-api">Llama 3 (8B)</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/openai-fault-tolerant-proxy-server">LLM Proxy server</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-ai-agent-with-tool-use">Agent with tool use</a>
 <strong>RAG:</strong>            <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-private-llama-3-1-rag-api">RAG API (LlamaIndex)</a>
 <strong>NLP:</strong>            <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-any-hugging-face-model-instantly">Hugging face</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-hugging-face-bert-model">BERT</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-text-embedding-api-with-litserve">Text embedding API</a>
-<strong>Multimodal:</strong>     <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-open-ai-clip-with-litserve">OpenAI Clip</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-multi-modal-llm-with-minicpm">MiniCPM</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-phi3-5-vision-api-with-litserve">Phi-3.5 Vision Instruct</a>, <a target="_blank" href="https://lightning.ai/bhimrajyadav/studios/deploy-and-chat-with-qwen2-vl-using-litserve">Qwen2-VL</a>
+<strong>Multimodal:</strong>     <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-open-ai-clip-with-litserve">OpenAI Clip</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-multi-modal-llm-with-minicpm">MiniCPM</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-phi3-5-vision-api-with-litserve">Phi-3.5 Vision Instruct</a>, <a target="_blank" href="https://lightning.ai/bhimrajyadav/studios/deploy-and-chat-with-qwen2-vl-using-litserve">Qwen2-VL</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-multi-modal-llm-with-pixtral">Pixtral</a>
 <strong>Audio:</strong>          <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-open-ai-s-whisper-model">Whisper</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-an-music-generation-api-with-meta-s-audio-craft">AudioCraft</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-an-audio-generation-api">StableAudio</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-noise-cancellation-api-with-deepfilternet">Noise cancellation (DeepFilterNet)</a>
 <strong>Vision:</strong>         <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-private-api-for-stable-diffusion-2">Stable diffusion 2</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-an-image-generation-api-with-auraflow">AuraFlow</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-an-image-generation-api-with-flux">Flux</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-super-resolution-image-api-with-aura-sr">Image Super Resolution (Aura SR)</a>,
                 <a target="_blank" href="https://lightning.ai/bhimrajyadav/studios/deploy-background-removal-api-with-litserve">Background Removal</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-controlled-image-generation-api-controlnet">Control Stable Diffusion (ControlNet)</a>
-<strong>Speech:</strong>         <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-voice-clone-api-coqui-xtts-v2-model">Text-speech (XTTS V2)</a>
+<strong>Speech:</strong>         <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-a-voice-clone-api-coqui-xtts-v2-model">Text-speech (XTTS V2)</a>, <a target="_blank" href="https://lightning.ai/bhimrajyadav/studios/deploy-a-speech-generation-api-using-parler-tts-powered-by-litserve">Parler-TTS</a>
 <strong>Classical ML:</strong>   <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-random-forest-with-litserve">Random forest</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-xgboost-with-litserve">XGBoost</a>
 <strong>Miscellaneous:</strong>  <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-an-media-conversion-api-with-ffmpeg">Media conversion API (ffmpeg)</a>, <a target="_blank" href="https://lightning.ai/lightning-ai/studios/deploy-both-pytorch-and-tensorflow-in-a-single-api">PyTorch + TensorFlow in one API</a>
 </pre>

diff --git a/_requirements/perf.txt b/_requirements/perf.txt
@@ -0,0 +1,3 @@
+uvloop
+tenacity
+jsonargparse
diff --git a/_requirements/test.txt b/_requirements/test.txt
@@ -1,7 +1,7 @@
 coverage[toml] >=7.5.3
 pytest >=8.0
 pytest-cov
-mypy ==1.11.1
+mypy ==1.11.2
 pytest-asyncio
 asgi-lifespan
 python-multipart

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
 fastapi >=0.100
 httpx
-uvicorn >=0.29.0
+uvicorn[standard] >=0.29.0
diff --git a/src/litserve/__init__.py b/src/litserve/__init__.py
@@ -16,5 +16,6 @@
 from litserve.server import LitServer, Request, Response
 from litserve import test_examples
 from litserve.specs.openai import OpenAISpec
+from litserve.callbacks import Callback
 
-__all__ = ["LitAPI", "LitServer", "Request", "Response", "test_examples", "OpenAISpec"]
+__all__ = ["LitAPI", "LitServer", "Request", "Response", "test_examples", "OpenAISpec", "Callback"]
diff --git a/src/litserve/callbacks/__init__.py b/src/litserve/callbacks/__init__.py
@@ -0,0 +1,3 @@
+from .base import Callback, CallbackRunner, EventTypes, NoopCallback
+
+__all__ = ["Callback", "CallbackRunner", "EventTypes", "NoopCallback"]
diff --git a/src/litserve/callbacks/base.py b/src/litserve/callbacks/base.py
@@ -0,0 +1,80 @@
+import dataclasses
+import logging
+from abc import ABC
+from typing import List, Union
+
+logger = logging.getLogger(__name__)
+
+
+@dataclasses.dataclass
+class EventTypes:
+    BEFORE_SETUP = "on_before_setup"
+    AFTER_SETUP = "on_after_setup"
+    BEFORE_DECODE_REQUEST = "on_before_decode_request"
+    AFTER_DECODE_REQUEST = "on_after_decode_request"
+    BEFORE_ENCODE_RESPONSE = "on_before_encode_response"
+    AFTER_ENCODE_RESPONSE = "on_after_encode_response"
+    BEFORE_PREDICT = "on_before_predict"
+    AFTER_PREDICT = "on_after_predict"
+    ON_SERVER_START = "on_server_start"
+    ON_SERVER_END = "on_server_end"
+
+
+class Callback(ABC):
+    def on_before_setup(self, *args, **kwargs):
+        """Called before setup is started."""
+
+    def on_after_setup(self, *args, **kwargs):
+        """Called after setup is completed."""
+
+    def on_before_decode_request(self, *args, **kwargs):
+        """Called before request decoding is started."""
+
+    def on_after_decode_request(self, *args, **kwargs):
+        """Called after request decoding is completed."""
+
+    def on_before_encode_response(self, *args, **kwargs):
+        """Called before response encoding is started."""
+
+    def on_after_encode_response(self, *args, **kwargs):
+        """Called after response encoding is completed."""
+
+    def on_before_predict(self, *args, **kwargs):
+        """Called before prediction is started."""
+
+    def on_after_predict(self, *args, **kwargs):
+        """Called after prediction is completed."""
+
+    def on_server_start(self, *args, **kwargs):
+        """Called before server starts."""
+
+    def on_server_end(self, *args, **kwargs):
+        """Called when server terminates."""
+
+
+class CallbackRunner:
+    def __init__(self, callbacks: Union[Callback, List[Callback]] = None):
+        self._callbacks = []
+        if callbacks:
+            self._add_callbacks(callbacks)
+
+    def _add_callbacks(self, callbacks: Union[Callback, List[Callback]]):
+        if not isinstance(callbacks, list):
+            callbacks = [callbacks]
+        for callback in callbacks:
+            if not isinstance(callback, Callback):
+                raise ValueError(f"Invalid callback type: {callback}")
+        self._callbacks.extend(callbacks)
+
+    def trigger_event(self, event_name, *args, **kwargs):
+        """Triggers an event, invoking all registered callbacks for that event."""
+        for callback in self._callbacks:
+            try:
+                getattr(callback, event_name)(*args, **kwargs)
+            except Exception:
+                # Handle exceptions to prevent one callback from disrupting others
+                logger.exception(f"Error in callback '{callback}' during event '{event_name}'")
+
+
+class NoopCallback(Callback):
+    """This callback does nothing."""
diff --git a/src/litserve/callbacks/defaults/__init__.py b/src/litserve/callbacks/defaults/__init__.py
@@ -0,0 +1,3 @@
+from litserve.callbacks.defaults.metric_callback import PredictionTimeLogger
+
+__all__ = ["PredictionTimeLogger"]
diff --git a/src/litserve/callbacks/defaults/metric_callback.py b/src/litserve/callbacks/defaults/metric_callback.py
@@ -0,0 +1,21 @@
+import time
+import typing
+from logging import getLogger
+
+from ..base import Callback
+
+if typing.TYPE_CHECKING:
+    from litserve import LitAPI
+
+logger = getLogger(__name__)
+
+
+class PredictionTimeLogger(Callback):
+    def on_before_predict(self, lit_api: "LitAPI"):
+        t0 = time.perf_counter()
+        self._start_time = t0
+
+    def on_after_predict(self, lit_api: "LitAPI"):
+        t1 = time.perf_counter()
+        elapsed = t1 - self._start_time
+        print(f"Prediction took {elapsed:.2f} seconds", flush=True)