Skip to content

Commit

Permalink
Containerized servings.py (metavoiceio#17)
Browse files Browse the repository at this point in the history
* Containerized

* containerized

* containerized 0.1

* assets

* Update README.md

Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu>

* Update serving.py

Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu>

* Update .gitignore

Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu>

* Delete assets/GER_F_SylviaF.flac

Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu>

* Delete assets/barackobamafederalplaza.flac

Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu>

* requested changes

* add flash-attn

* update: docker compose with common configs

* feat: add health check endpoint

* feat: make services naming terse

* feat: reduce health check durations

* update: README.md

---------

Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu>
Co-authored-by: l4b4r4b4b4 <l4b4r4b4b4>
Co-authored-by: sid <sid@themetavoice.xyz>
Co-authored-by: Vatsal Aggarwal <vatsal@themetavoice.xyz>
  • Loading branch information
3 people authored Feb 22, 2024
1 parent 394b7fc commit 33cd288
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.vscode
*.pkl
*.flac
*.npz
Expand Down
32 changes: 32 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as base

# Install system dependencies in a single RUN command to reduce layers
# Combine apt-get update, upgrade, and installation of packages. Clean up in the same layer to reduce image size.
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y python3.10 python3-pip git wget curl build-essential && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# install ffmpeg
RUN wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz &&\
wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz.md5 &&\
md5sum -c ffmpeg-git-amd64-static.tar.xz.md5 &&\
tar xvf ffmpeg-git-amd64-static.tar.xz &&\
mv ffmpeg-git-*-static/ffprobe ffmpeg-git-*-static/ffmpeg /usr/local/bin/ &&\
rm -rf ffmpeg-git-*

WORKDIR /app

COPY requirements.txt requirements.txt

RUN pip install --no-cache-dir packaging wheel torch
RUN pip install --no-cache-dir flash-attn
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

RUN pip install --no-cache-dir -e .

ENTRYPOINT ["python3.10", "fam/llm/serving.py"]
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ MetaVoice-1B is a 1.2B parameter base model trained on 100K hours of speech for
We’re releasing MetaVoice-1B under the Apache 2.0 license, *it can be used without restrictions*.


## Quickstart - tl;dr

Web UI
```bash
docker-compose up -d ui && docker-compose ps && docker-compose logs -f
```

Server
```bash
docker-compose up -d server && docker-compose ps && docker-compose logs -f
```

## Installation

**Pre-requisites:**
Expand Down
10 changes: 7 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
MAX_CHARS = 220
PRESET_VOICES = {
# female
"Bria": "https://cdn.themetavoice.xyz/speakers/bria.mp3",
"Bria": "https://cdn.themetavoice.xyz/speakers%2Fbria.mp3",
# male
"Alex": "https://cdn.themetavoice.xyz/speakers/alex.mp3",
"Jacob": "https://cdn.themetavoice.xyz/speakers/jacob.wav",
Expand Down Expand Up @@ -230,5 +230,9 @@ def change_voice_selection_layout(choice):
)


demo.queue(default_concurrency_limit=2)
demo.launch(favicon_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets/favicon.ico"))
demo.queue()
demo.launch(
favicon_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets/favicon.ico"),
server_name="0.0.0.0",
server_port=7861,
)
61 changes: 61 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
version: "3.5"

networks:
metavoice-net:
driver: bridge

volumes:
hf-cache:
driver: local

x-common-settings: &common-settings
volumes:
- hf-cache:/.hf-cache
- ./assets:/app/assets
deploy:
replicas: 1
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [ gpu ]
runtime: nvidia
ipc: host
tty: true # enable colorized logs
build:
context: .
image: metavoice-server:latest
networks:
- metavoice-net
environment:
- NVIDIA_VISIBLE_DEVICES=all
- HF_HOME=/.hf-cache
logging:
options:
max-size: "100m"
max-file: "10"

services:
server:
<<: *common-settings
container_name: metavoice-server
command: [ "--port=58004" ]
ports:
- 58004:58004
healthcheck:
test: [ "CMD", "curl", "http://metavoice-server:58004/health" ]
interval: 1m
timeout: 10s
retries: 20
ui:
<<: *common-settings
container_name: metavoice-ui
entrypoint: [ "python3.10", "app.py" ]
ports:
- 7861:7861
healthcheck:
test: [ "CMD", "curl", "http://localhost:7861" ]
interval: 1m
timeout: 10s
retries: 1
7 changes: 6 additions & 1 deletion fam/llm/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ class TTSRequest:
top_k: Optional[int] = None


@app.get("/health")
async def health_check():
return {"status": "ok"}


@app.post("/tts", response_class=Response)
async def text_to_speech(req: Request):
audiodata = await req.body()
Expand Down Expand Up @@ -201,7 +206,7 @@ def _convert_audiodata_to_wav_path(audiodata, wav_tmp):
# start server
uvicorn.run(
app,
host="127.0.0.1",
host="0.0.0.0",
port=GlobalState.config.port,
log_level="info",
)

0 comments on commit 33cd288

Please sign in to comment.