Skip to content

Commit

Permalink
add streaming no-batch speed test (#283)
Browse files Browse the repository at this point in the history
* add single streaming test

* update docstring

* add tenacity dep

* GH CI metrics
  • Loading branch information
aniketmaurya authored Sep 20, 2024
1 parent 4d2ac63 commit 718682c
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 2 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/ci-parity.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ jobs:
- name: Install LitServe
run: |
pip --version
pip install . torchvision jsonargparse uvloop -U -q -r _requirements/test.txt -U -q
pip install . torchvision jsonargparse uvloop tenacity -U -q -r _requirements/test.txt -U -q
pip list
- name: Tests
- name: Parity test
run: export PYTHONPATH=$PWD && python tests/parity_fastapi/main.py

- name: Streaming speed test
run: bash tests/perf_test/stream/run_test.sh
29 changes: 29 additions & 0 deletions tests/perf_test/stream/run_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash
# 1. Test server streams data very fast

# Function to clean up server process
cleanup() {
pkill -f "python tests/perf_test/stream/stream_speed/server.py"
}

# Trap script exit to run cleanup
trap cleanup EXIT

# Start the server in the background and capture its PID
python tests/perf_test/stream/stream_speed/server.py &
SERVER_PID=$!

echo "Server started with PID $SERVER_PID"

# Run your benchmark script
echo "Preparing to run benchmark.py..."

export PYTHONPATH=$PWD && python tests/perf_test/stream/stream_speed/benchmark.py

# Check if benchmark.py exited successfully
if [ $? -ne 0 ]; then
echo "benchmark.py failed to run successfully."
exit 1
else
echo "benchmark.py ran successfully."
fi
58 changes: 58 additions & 0 deletions tests/perf_test/stream/stream_speed/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Consume 10K tokens from the stream endpoint and measure the speed."""

import logging
import time

import requests
from tenacity import retry, stop_after_attempt

logger = logging.getLogger(__name__)
# Configuration
SERVER_URL = "http://0.0.0.0:8000/predict"
TOTAL_TOKENS = 10000
EXPECTED_TTFT = 0.005 # time to first token

# tokens per second
MAX_SPEED = 3600 # 3600 on GitHub CI, 10000 on M3 Pro

session = requests.Session()


def speed_test():
start = time.time()
resp = session.post(SERVER_URL, stream=True, json={"input": 1})
num_tokens = 0
ttft = None # time to first token
for line in resp.iter_lines():
if not line:
continue
if ttft is None:
ttft = time.time() - start
print(f"Time to first token: {ttft}")
assert ttft < EXPECTED_TTFT, "Expected time to first token to be less than 0.1 seconds"
num_tokens += 1
end = time.time()
resp.raise_for_status()
assert num_tokens == TOTAL_TOKENS, f"Expected {TOTAL_TOKENS} tokens, got {num_tokens}"
speed = num_tokens / (end - start)
return {"speed": speed, "time": end - start}


@retry(stop=stop_after_attempt(10))
def main():
for i in range(10):
try:
resp = requests.get("http://localhost:8000/health")
if resp.status_code == 200:
break
except requests.exceptions.ConnectionError as e:
logger.error(f"Error connecting to server: {e}")
time.sleep(10)
data = speed_test()
speed = data["speed"]
print(data)
assert speed >= MAX_SPEED, f"Expected streaming speed to be greater than {MAX_SPEED}, got {speed}"


if __name__ == "__main__":
main()
25 changes: 25 additions & 0 deletions tests/perf_test/stream/stream_speed/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import litserve as ls


class SimpleStreamingAPI(ls.LitAPI):
def setup(self, device) -> None:
self.model = None

def decode_request(self, request):
return request["input"]

def predict(self, x):
yield from range(10000)

def encode_response(self, output_stream):
for output in output_stream:
yield {"output": output}


if __name__ == "__main__":
api = SimpleStreamingAPI()
server = ls.LitServer(
api,
stream=True,
)
server.run(port=8000, generate_client_file=False)

0 comments on commit 718682c

Please sign in to comment.