Containerized servings.py (metavoiceio#17)

* Containerized * containerized * containerized 0.1 * assets * Update README.md Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu> * Update serving.py Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu> * Update .gitignore Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu> * Delete assets/GER_F_SylviaF.flac Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu> * Delete assets/barackobamafederalplaza.flac Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu> * requested changes * add flash-attn * update: docker compose with common configs * feat: add health check endpoint * feat: make services naming terse * feat: reduce health check durations * update: README.md --------- Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu> Co-authored-by: l4b4r4b4b4 <l4b4r4b4b4> Co-authored-by: sid <sid@themetavoice.xyz> Co-authored-by: Vatsal Aggarwal <vatsal@themetavoice.xyz>
Pkpk11 · Feb 22, 2024 · 33cd288 · 33cd288
1 parent 394b7fc
commit 33cd288
Show file tree

Hide file tree

Showing 6 changed files with 119 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+.vscode
 *.pkl
 *.flac
 *.npz

diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,32 @@
+FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as base
+
+# Install system dependencies in a single RUN command to reduce layers
+# Combine apt-get update, upgrade, and installation of packages. Clean up in the same layer to reduce image size.
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y python3.10 python3-pip git wget curl build-essential && \
+    apt-get autoremove -y && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# install ffmpeg
+RUN wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz &&\
+    wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz.md5 &&\
+    md5sum -c ffmpeg-git-amd64-static.tar.xz.md5 &&\
+    tar xvf ffmpeg-git-amd64-static.tar.xz &&\
+    mv ffmpeg-git-*-static/ffprobe ffmpeg-git-*-static/ffmpeg /usr/local/bin/ &&\
+    rm -rf ffmpeg-git-*
+
+WORKDIR /app
+
+COPY requirements.txt requirements.txt
+
+RUN pip install --no-cache-dir packaging wheel torch
+RUN pip install --no-cache-dir flash-attn
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+RUN pip install --no-cache-dir -e .
+
+ENTRYPOINT ["python3.10", "fam/llm/serving.py"]
diff --git a/README.md b/README.md
@@ -17,6 +17,18 @@ MetaVoice-1B is a 1.2B parameter base model trained on 100K hours of speech for
 We’re releasing MetaVoice-1B under the Apache 2.0 license, *it can be used without restrictions*.
 
 
+## Quickstart - tl;dr
+
+Web UI
+```bash
+docker-compose up -d ui && docker-compose ps && docker-compose logs -f
+```
+
+Server
+```bash
+docker-compose up -d server && docker-compose ps && docker-compose logs -f
+```
+
 ## Installation  
 
 **Pre-requisites:**

diff --git a/app.py b/app.py
@@ -65,7 +65,7 @@
 MAX_CHARS = 220
 PRESET_VOICES = {
     # female
-    "Bria": "https://cdn.themetavoice.xyz/speakers/bria.mp3",
+    "Bria": "https://cdn.themetavoice.xyz/speakers%2Fbria.mp3",
     # male
     "Alex": "https://cdn.themetavoice.xyz/speakers/alex.mp3",
     "Jacob": "https://cdn.themetavoice.xyz/speakers/jacob.wav",
@@ -230,5 +230,9 @@ def change_voice_selection_layout(choice):
     )
 
 
-demo.queue(default_concurrency_limit=2)
-demo.launch(favicon_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets/favicon.ico"))
+demo.queue()
+demo.launch(
+    favicon_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets/favicon.ico"),
+    server_name="0.0.0.0",
+    server_port=7861,
+)
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,61 @@
+version: "3.5"
+
+networks:
+  metavoice-net:
+    driver: bridge
+
+volumes:
+  hf-cache:
+    driver: local
+
+x-common-settings: &common-settings
+  volumes:
+    - hf-cache:/.hf-cache
+    - ./assets:/app/assets
+  deploy:
+    replicas: 1
+    resources:
+      reservations:
+        devices:
+          - driver: nvidia
+            count: 1
+            capabilities: [ gpu ]
+  runtime: nvidia
+  ipc: host
+  tty: true # enable colorized logs
+  build:
+    context: .
+  image: metavoice-server:latest
+  networks:
+    - metavoice-net
+  environment:
+    - NVIDIA_VISIBLE_DEVICES=all
+    - HF_HOME=/.hf-cache
+  logging:
+    options:
+      max-size: "100m"
+      max-file: "10"
+
+services:
+  server:
+    <<: *common-settings
+    container_name: metavoice-server
+    command: [ "--port=58004" ]
+    ports:
+      - 58004:58004
+    healthcheck:
+      test: [ "CMD", "curl", "http://metavoice-server:58004/health" ]
+      interval: 1m
+      timeout: 10s
+      retries: 20
+  ui:
+    <<: *common-settings
+    container_name: metavoice-ui
+    entrypoint: [ "python3.10", "app.py" ]
+    ports:
+      - 7861:7861
+    healthcheck:
+      test: [ "CMD", "curl", "http://localhost:7861" ]
+      interval: 1m
+      timeout: 10s
+      retries: 1
diff --git a/fam/llm/serving.py b/fam/llm/serving.py
@@ -88,6 +88,11 @@ class TTSRequest:
     top_k: Optional[int] = None
 
 
+@app.get("/health")
+async def health_check():
+    return {"status": "ok"}
+
+
 @app.post("/tts", response_class=Response)
 async def text_to_speech(req: Request):
     audiodata = await req.body()
@@ -201,7 +206,7 @@ def _convert_audiodata_to_wav_path(audiodata, wav_tmp):
     # start server
     uvicorn.run(
         app,
-        host="127.0.0.1",
+        host="0.0.0.0",
         port=GlobalState.config.port,
         log_level="info",
     )