Skip to content

Commit

Permalink
支持edge_tts
Browse files Browse the repository at this point in the history
  • Loading branch information
shell-nlp committed Dec 23, 2024
1 parent d682f61 commit 8ba4a84
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 103 deletions.
10 changes: 6 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# FROM docker.rainbond.cc/506610466/cuda:12.2.0-runtime-ubuntu20.04-uv
FROM 506610466/cuda:12.2.0-runtime-ubuntu20.04-uv
# FROM 506610466/cuda:12.2.0-runtime-ubuntu20.04-uv
# 从基础镜像开始构建,加快构建速度
FROM 506610466/gpt_server:base
COPY ./ /gpt_server
WORKDIR /gpt_server

RUN uv venv --seed && uv sync && uv cache clean && \
echo '[[ -f .venv/bin/activate ]] && source .venv/bin/activate' >> ~/.bashrc
RUN uv sync && uv cache clean
# RUN uv venv --seed && uv sync && uv cache clean && \
# echo '[[ -f .venv/bin/activate ]] && source .venv/bin/activate' >> ~/.bashrc

CMD ["/bin/bash"]
2 changes: 1 addition & 1 deletion Dockerfile.copy
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.rainbond.cc/506610466/gpt_server:latest
FROM hub.geekery.cn/506610466/gpt_server:latest

COPY ./ /gpt_server

Expand Down
20 changes: 20 additions & 0 deletions gpt_server/openai_api_protocol/custom_api_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,26 @@
from pydantic import Field, BaseModel


class SpeechRequest(BaseModel):
model: str = Field(
default="edge_tts", description="One of the available TTS models:"
)
input: str = Field(
description="The text to generate audio for. The maximum length is 4096 characters."
)
voice: str = Field(
default="zh-CN-YunxiNeural",
description="The voice to use when generating the audio",
)
response_format: Optional[str] = Field(
default="mp3", description="The format of the audio"
)
speed: Optional[float] = Field(
default=1.0,
description="The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.",
)


class ModerationsRequest(BaseModel):
input: Union[str, List[str]]
model: str
Expand Down
26 changes: 25 additions & 1 deletion gpt_server/serving/openai_api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from fastapi import Depends, HTTPException
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer
import httpx

Expand Down Expand Up @@ -699,7 +699,31 @@ async def generate_completion(payload: Dict[str, Any], worker_addr: str):
CustomEmbeddingsRequest,
RerankRequest,
ModerationsRequest,
SpeechRequest,
)
import edge_tts
import uuid

OUTPUT_DIR = "./edge_tts_cache"


@app.post("/v1/audio/speech", dependencies=[Depends(check_api_key)])
async def speech(request: SpeechRequest):
os.makedirs(OUTPUT_DIR, exist_ok=True) # 即使存在也不会报错
list_voices = await edge_tts.list_voices()
support_list_voices = [i["ShortName"] for i in list_voices]
if request.voice not in support_list_voices:
return JSONResponse(
ErrorResponse(
message=f"不支持voice:{request.voice}", code=ErrorCode.INVALID_MODEL
).dict(),
status_code=400,
)
filename = f"{uuid.uuid4()}.mp3"
output_path = os.path.join(OUTPUT_DIR, filename)
communicate = edge_tts.Communicate(text=request.input, voice=request.voice)
await communicate.save(output_path)
return FileResponse(output_path, media_type="audio/mpeg", filename=filename)


@app.post("/v1/moderations", dependencies=[Depends(check_api_key)])
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies = [
"qwen_vl_utils",
"evalscope[perf]==0.7.0",
"modelscope==1.20.1",
"edge-tts>=7.0.0",
]

[tool.uv]
Expand All @@ -37,6 +38,10 @@ override-dependencies = [

]

[[tool.uv.index]]
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
default = true

[project.scripts]
gpt_server = "gpt_server.cli:main"

Expand Down
14 changes: 11 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ aiohappyeyeballs==2.4.4
aiohttp==3.11.11
# via
# datasets
# edge-tts
# evalscope
# fschat
# fsspec
Expand Down Expand Up @@ -75,6 +76,7 @@ cachetools==5.5.0
# streamlit
certifi==2024.12.14
# via
# edge-tts
# httpcore
# httpx
# requests
Expand All @@ -84,7 +86,7 @@ cffi==1.17.1
# soundfile
charset-normalizer==3.4.0
# via requests
click==8.1.7
click==8.1.8
# via
# nltk
# ray
Expand Down Expand Up @@ -133,6 +135,8 @@ diskcache==5.6.3
# outlines
distro==1.9.0
# via openai
edge-tts==7.0.0
# via gpt-server (pyproject.toml)
editdistance==0.8.1
# via evalscope
einops==0.8.0
Expand Down Expand Up @@ -256,7 +260,7 @@ interegular==0.3.3
# outlines-core
jieba==0.42.1
# via evalscope
jinja2==3.1.4
jinja2==3.1.5
# via
# altair
# gradio
Expand Down Expand Up @@ -741,6 +745,8 @@ sortedcontainers==2.4.0
# via modelscope
soundfile==0.12.1
# via infinity-emb
srt==3.5.3
# via edge-tts
sse-starlette==2.1.3
# via evalscope
starlette==0.38.6
Expand All @@ -759,6 +765,7 @@ sympy==1.13.1
# torch
tabulate==0.9.0
# via
# edge-tts
# evalscope
# sacrebleu
tenacity==9.0.0
Expand Down Expand Up @@ -865,6 +872,7 @@ typing-extensions==4.12.2
# via
# altair
# anyio
# edge-tts
# fastapi
# gradio
# gradio-client
Expand All @@ -885,7 +893,7 @@ tzdata==2024.2
# via pandas
unicorn==2.1.1
# via evalscope
urllib3==2.2.3
urllib3==2.3.0
# via
# modelscope
# requests
Expand Down
12 changes: 12 additions & 0 deletions tests/test_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from pathlib import Path
from openai import OpenAI

# 新版本 opnai
client = OpenAI(api_key="EMPTY", base_url="http://localhost:8082/v1")
speech_file_path = Path(__file__).parent / "speech.mp3"
response = client.audio.speech.create(
model="edge_tts",
voice="zh-CN-YunxiNeural",
input="你好啊,我是人工智能。",
)
response.write_to_file(speech_file_path)
Loading

0 comments on commit 8ba4a84

Please sign in to comment.