Skip to content

Commit 395a745

Browse files
committed
发布0.6.2
1 parent f5b44c4 commit 395a745

File tree

9 files changed

+3314
-3176
lines changed

9 files changed

+3314
-3176
lines changed

.github/workflows/docker-image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
# 构建 Docker 镜像
3434
- name: Build Docker image
3535
run: |
36-
docker build -t ${{ secrets.DOCKER_USERNAME }}/gpt_server:${{ env.VERSION }} .
36+
docker build -f Dockerfile -t ${{ secrets.DOCKER_USERNAME }}/gpt_server:${{ env.VERSION }} .
3737
# docker tag ${{ secrets.DOCKER_USERNAME }}/gpt_server:${{ env.VERSION }} ${{ secrets.DOCKER_USERNAME }}/gpt_server:latest
3838
# 推送镜像到 Docker Hub
3939
- name: Push Docker image

Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ COPY ./ /gpt_server
77
WORKDIR /gpt_server
88
# RUN uv sync && uv cache clean
99
ENV UV_HTTP_TIMEOUT=120 CUDA_HOME=/usr/local/cuda-12.2
10-
RUN uv venv --seed && uv sync && uv cache clean && \
10+
ENV PATH=$CUDA_HOME/bin:$PATH
11+
ENV LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
12+
RUN uv venv --seed && uv sync -v && uv cache clean && \
1113
echo '[[ -f .venv/bin/activate ]] && source .venv/bin/activate' >> ~/.bashrc
1214
ENV PATH=/gpt_server/.venv/bin:$PATH
1315

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@
126126
* [X] 可视化启动界面(不稳定,对开发人员来说比较鸡肋,后期将弃用!)
127127
* [X] 并行的function call功能(tools)
128128
* [X] 支持 文生图 模型
129+
* [X] 支持 图片编辑 模型
129130
* [ ] 支持 pip install 方式进行安装
130131

131132

gpt_server/script/config_example.yaml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ models:
174174
- 6
175175

176176
- flux:
177-
#文生图模型
177+
# 文生图模型
178178
alias: null
179179
enable: true
180180
model_config:
@@ -185,3 +185,16 @@ models:
185185
workers:
186186
- gpus:
187187
- 7
188+
189+
- image-edit:
190+
# 图片编辑模型
191+
alias: null
192+
enable: true
193+
model_config:
194+
model_name_or_path: /home/dev/model/Qwen/Qwen-Image-Edit/
195+
model_type: qwen_image_edit
196+
work_mode: hf
197+
device: gpu
198+
workers:
199+
- gpus:
200+
- 7

gpt_server/serving/openai_api_server.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,13 @@
1919

2020
import aiohttp
2121
import fastapi
22-
from fastapi import Depends, HTTPException, responses
22+
from fastapi import Depends, File, HTTPException, Request, responses, Form, UploadFile
2323
from fastapi.exceptions import RequestValidationError
2424
from fastapi.middleware.cors import CORSMiddleware
2525
from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
2626
from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer
2727
import httpx
28+
import base64
2829

2930
try:
3031
from pydantic.v1 import BaseSettings, validator
@@ -194,18 +195,18 @@ def create_error_response(code: int, message: str) -> JSONResponse:
194195

195196

196197
@app.exception_handler(RequestValidationError)
197-
async def validation_exception_handler(request, exc):
198+
async def validation_exception_handler(request: Request, exc: RequestValidationError):
198199
return create_error_response(ErrorCode.VALIDATION_TYPE_ERROR, str(exc))
199200

200201

201-
def check_model(request) -> Optional[JSONResponse]:
202+
def check_model(model: str) -> Optional[JSONResponse]:
202203
global model_address_map, models_
203204
ret = None
204205
models = models_
205-
if request.model not in models_:
206+
if model not in models_:
206207
ret = create_error_response(
207208
ErrorCode.INVALID_MODEL,
208-
f"Only {'&&'.join(models)} allowed now, your model {request.model}",
209+
f"Only {'&&'.join(models)} allowed now, your model {model}",
209210
)
210211
return ret
211212

@@ -418,7 +419,7 @@ def get_model_address_map():
418419
)
419420
async def create_chat_completion(request: CustomChatCompletionRequest):
420421
"""Creates a completion for the chat message"""
421-
error_check_ret = check_model(request)
422+
error_check_ret = check_model(request.model)
422423
if error_check_ret is not None:
423424
return error_check_ret
424425
worker_addr = get_worker_address(request.model)
@@ -554,7 +555,7 @@ async def chat_completion_stream_generator(
554555
response_class=responses.ORJSONResponse,
555556
)
556557
async def create_completion(request: CompletionRequest):
557-
error_check_ret = check_model(request)
558+
error_check_ret = check_model(request.model)
558559
if error_check_ret is not None:
559560
return error_check_ret
560561

@@ -714,7 +715,6 @@ async def generate_completion(payload: Dict[str, Any], worker_addr: str):
714715
SpeechRequest,
715716
OpenAISpeechRequest,
716717
ImagesGenRequest,
717-
ImagesEditsRequest,
718718
)
719719

720720

@@ -729,17 +729,27 @@ async def get_images_edits(payload: Dict[str, Any]):
729729

730730

731731
@app.post("/v1/images/edits", dependencies=[Depends(check_api_key)])
732-
async def images_edits(request: ImagesEditsRequest):
732+
async def images_edits(
733+
model: str = Form(...),
734+
image: UploadFile = File(media_type="application/octet-stream"),
735+
prompt: Optional[Union[str, List[str]]] = Form(None),
736+
# negative_prompt: Optional[Union[str, List[str]]] = Form(None),
737+
response_format: Optional[str] = Form("url"),
738+
output_format: Optional[str] = Form("png"),
739+
):
733740
"""图片编辑"""
734-
error_check_ret = check_model(request)
741+
742+
error_check_ret = check_model(model)
735743
if error_check_ret is not None:
736744
return error_check_ret
737745
payload = {
738-
"image": request.image,
739-
"model": request.model,
740-
"prompt": request.prompt,
741-
"output_format": request.output_format,
742-
"response_format": request.response_format,
746+
"image": base64.b64encode(await image.read()).decode(
747+
"utf-8"
748+
), # bytes → Base64 字符串,
749+
"model": model,
750+
"prompt": prompt,
751+
"output_format": output_format,
752+
"response_format": response_format,
743753
}
744754
result = await get_images_edits(payload=payload)
745755
return result
@@ -758,7 +768,7 @@ async def get_images_gen(payload: Dict[str, Any]):
758768
@app.post("/v1/images/generations", dependencies=[Depends(check_api_key)])
759769
async def images_generations(request: ImagesGenRequest):
760770
"""文生图"""
761-
error_check_ret = check_model(request)
771+
error_check_ret = check_model(request.model)
762772
if error_check_ret is not None:
763773
return error_check_ret
764774
payload = {
@@ -877,10 +887,6 @@ async def get_transcriptions(payload: Dict[str, Any]):
877887
return json.loads(transcription)
878888

879889

880-
from fastapi import UploadFile, Form
881-
import base64
882-
883-
884890
@app.post(
885891
"/v1/audio/transcriptions",
886892
dependencies=[Depends(check_api_key)],
@@ -915,7 +921,7 @@ async def transcriptions(file: UploadFile, model: str = Form()):
915921
response_class=responses.ORJSONResponse,
916922
)
917923
async def classify(request: ModerationsRequest):
918-
error_check_ret = check_model(request)
924+
error_check_ret = check_model(request.model)
919925
if error_check_ret is not None:
920926
return error_check_ret
921927
request.input = process_input(request.model, request.input)
@@ -958,7 +964,7 @@ async def classify(request: ModerationsRequest):
958964
response_class=responses.ORJSONResponse,
959965
)
960966
async def rerank(request: RerankRequest):
961-
error_check_ret = check_model(request)
967+
error_check_ret = check_model(request.model)
962968
if error_check_ret is not None:
963969
return error_check_ret
964970
request.documents = process_input(request.model, request.documents)
@@ -1009,7 +1015,7 @@ async def create_embeddings(request: CustomEmbeddingsRequest, model_name: str =
10091015
"""Creates embeddings for the text"""
10101016
if request.model is None:
10111017
request.model = model_name
1012-
error_check_ret = check_model(request)
1018+
error_check_ret = check_model(request.model)
10131019
if error_check_ret is not None:
10141020
return error_check_ret
10151021

@@ -1111,7 +1117,7 @@ async def count_tokens(request: APITokenCheckRequest):
11111117
@app.post("/api/v1/chat/completions")
11121118
async def create_chat_completion(request: APIChatCompletionRequest):
11131119
"""Creates a completion for the chat message"""
1114-
error_check_ret = check_model(request)
1120+
error_check_ret = check_model(request.model)
11151121
if error_check_ret is not None:
11161122
return error_check_ret
11171123

pyproject.toml

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "gpt_server"
3-
version = "0.6.1"
4-
description = "gpt_server是一个用于生产级部署LLMs或Embedding的开源框架"
3+
version = "0.6.2"
4+
description = "gpt_server是一个用于生产级部署LLMs、Embedding、Reranker、ASR和TTS的开源框架"
55
readme = "README.md"
66
license = { text = "Apache 2.0" }
77
authors = [{ name = "Yu Liu", email = "506610466@qq.com" }]
@@ -12,38 +12,40 @@ dependencies = [
1212
"ffmpy",
1313
"fschat==0.2.36",
1414
"infinity-emb[all]==0.0.76",
15-
"lmdeploy==0.9.2",
15+
"lmdeploy==0.9.2.post1",
1616
"loguru>=0.7.2",
1717
"openai==1.99.1",
1818
"setuptools==75.2.0",
1919
"streamlit==1.39.0",
2020
"torch==2.6.0",
2121
"torchvision==0.20.1",
22-
"vllm==0.10.1",
22+
"vllm",
2323
"qwen_vl_utils",
2424
"evalscope[perf,rag]==0.16.1",
2525
"modelscope==1.26.0",
2626
"edge-tts>=7.0.0",
2727
"funasr>=1.2.6",
28-
"sglang[all]>=0.4.10.post2",
28+
"sglang[all]>=0.5.1.post3",
2929
"flashinfer-python",
3030
"flashtts>=0.1.7",
31-
"diffusers>=0.33.1",
31+
"diffusers>=0.35.1",
32+
"sqlmodel>=0.0.24",
3233
]
3334

3435
[tool.uv]
3536
default-groups = [] # 默认只安装dependencies中的库
3637
override-dependencies = [
37-
"setuptools==75.2.0",
38-
"torchvision==0.22.1",
39-
"torchaudio==2.7.1",
40-
"torch==2.7.0",
41-
"triton",
42-
"transformers==4.53.3", # infinity-emb
38+
"setuptools==77.0.3",
39+
"torchvision==0.23.0",
40+
"torchaudio==2.8.0",
41+
"torch==2.8.0",
42+
"triton==3.4.0",
43+
"transformers==4.56.1", # infinity-emb
4344
"soundfile==0.13.1", # infinity
4445
"xgrammar==0.1.23", # sglang[all]==0.4.5 depends on xgrammar==0.1.17
4546
"flashinfer-python==0.2.10",
4647
"outlines-core==0.2.10", # sglang 和 vllm 的冲突
48+
"peft>=0.17.0" # 和 lmdeloy 冲突
4749
]
4850

4951
[project.scripts]
@@ -53,6 +55,13 @@ gpt_server = "gpt_server.cli:main"
5355
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
5456
default = true
5557

58+
[tool.uv.sources]
59+
vllm = { index = "vllm-custom" }
60+
61+
[[tool.uv.index]]
62+
name = "vllm-custom"
63+
url = "https://wheels.vllm.ai/006e7a34aeb3e905ca4131a3251fe079f0511e2f"
64+
5665
[build-system]
5766
requires = ["setuptools", "wheel"]
5867
build-backend = "setuptools.build_meta"

0 commit comments

Comments
 (0)