Skip to content

Commit 59e5e42

Browse files
committed
优化docker 减小镜像大小
1 parent bcc7baa commit 59e5e42

File tree

6 files changed

+16
-5
lines changed

6 files changed

+16
-5
lines changed

Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
# FROM docker.rainbond.cc/506610466/cuda:12.2.0-runtime-ubuntu20.04-uv
2-
# FROM 506610466/cuda:12.2.0-runtime-ubuntu20.04-uv
1+
# FROM docker.1ms.run/506610466/cuda:12.2.0-runtime-ubuntu20.04-uv
2+
FROM 506610466/cuda:12.2.0-runtime-ubuntu20.04-uv
33
# 从基础镜像开始构建,加快构建速度
4-
FROM 506610466/gpt_server:base
4+
# FROM 506610466/gpt_server:base
5+
RUN apt update -y && apt install -y build-essential && rm -rf /var/lib/apt/lists/*
56
COPY ./ /gpt_server
67
WORKDIR /gpt_server
78
RUN uv sync && uv cache clean
9+
ENV PATH=/gpt_server/.venv/bin:$PATH
810
# RUN uv venv --seed && uv sync && uv cache clean && \
911
# echo '[[ -f .venv/bin/activate ]] && source .venv/bin/activate' >> ~/.bashrc
1012

gpt_server/model_worker/base/model_worker_base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ def run(cls):
224224
parser.add_argument("--kv_cache_quant_policy", type=str, default="0")
225225
# vad_model
226226
parser.add_argument("--vad_model", type=str, default="")
227+
# punc_model
228+
parser.add_argument("--punc_model", type=str, default="")
227229
# log_level
228230
parser.add_argument("--log_level", type=str, default="WARNING")
229231
args = parser.parse_args()
@@ -245,6 +247,8 @@ def run(cls):
245247
os.environ["max_model_len"] = args.max_model_len
246248
if args.vad_model:
247249
os.environ["vad_model"] = args.vad_model
250+
if args.punc_model:
251+
os.environ["punc_model"] = args.punc_model
248252

249253
os.environ["enable_prefix_caching"] = args.enable_prefix_caching
250254
os.environ["gpu_memory_utilization"] = args.gpu_memory_utilization

gpt_server/model_worker/funasr.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,11 @@ def __init__(
3535
device = "cuda"
3636
logger.warning(f"使用{device}加载...")
3737
vad_model = os.environ.get("vad_model", None)
38+
punc_model = os.environ.get("punc_model", None)
3839
self.model = AutoModel(
3940
model=model_path,
4041
vad_model=vad_model,
42+
punc_model=punc_model,
4143
vad_kwargs={"max_single_segment_time": 30000},
4244
device="cuda",
4345
)

gpt_server/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ def start_model_worker(config: dict):
163163
"kv_cache_quant_policy", 0
164164
)
165165
vad_model = engine_config.get("vad_model", "")
166+
punc_model = engine_config.get("punc_model", "")
166167

167168
else:
168169
logger.error(
@@ -248,6 +249,8 @@ def start_model_worker(config: dict):
248249
cmd += f" --max_model_len '{max_model_len}'"
249250
if vad_model:
250251
cmd += f" --vad_model '{vad_model}'"
252+
if punc_model:
253+
cmd += f" --vad_model '{punc_model}'"
251254
p = Process(target=run_cmd, args=(cmd,))
252255
p.start()
253256
process.append(p)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "gpt_server"
3-
version = "0.4.4"
3+
version = "0.4.5"
44
description = "gpt_server是一个用于生产级部署LLMs或Embedding的开源框架。"
55
readme = "README.md"
66
license = { text = "Apache 2.0" }

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)