Skip to content

Commit c53e073

Browse files
authored
[Misc] Refine ray_serve_deepseek example (#17204)
Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
1 parent a0e619e commit c53e073

File tree

1 file changed

+29
-25
lines changed

1 file changed

+29
-25
lines changed

examples/online_serving/ray_serve_deepseek.py

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,37 +8,41 @@
88
"""
99

1010
from ray import serve
11-
from ray.serve.llm import LLMConfig, LLMRouter, LLMServer
11+
from ray.serve.llm import LLMConfig, build_openai_app
1212

1313
llm_config = LLMConfig(
14-
model_loading_config=dict(
15-
model_id="deepseek",
16-
# Change to model download path
17-
model_source="/path/to/the/model",
18-
),
19-
deployment_config=dict(autoscaling_config=dict(
20-
min_replicas=1,
21-
max_replicas=1,
22-
)),
14+
model_loading_config={
15+
"model_id": "deepseek",
16+
# Since DeepSeek model is huge, it is recommended to pre-download
17+
# the model to local disk, say /path/to/the/model and specify:
18+
# model_source="/path/to/the/model"
19+
"model_source": "deepseek-ai/DeepSeek-R1",
20+
},
21+
deployment_config={
22+
"autoscaling_config": {
23+
"min_replicas": 1,
24+
"max_replicas": 1,
25+
}
26+
},
2327
# Change to the accelerator type of the node
2428
accelerator_type="H100",
25-
runtime_env=dict(env_vars=dict(VLLM_USE_V1="1")),
29+
runtime_env={"env_vars": {
30+
"VLLM_USE_V1": "1"
31+
}},
2632
# Customize engine arguments as needed (e.g. vLLM engine kwargs)
27-
engine_kwargs=dict(
28-
tensor_parallel_size=8,
29-
pipeline_parallel_size=2,
30-
gpu_memory_utilization=0.92,
31-
dtype="auto",
32-
max_num_seqs=40,
33-
max_model_len=16384,
34-
enable_chunked_prefill=True,
35-
enable_prefix_caching=True,
36-
trust_remote_code=True,
37-
),
33+
engine_kwargs={
34+
"tensor_parallel_size": 8,
35+
"pipeline_parallel_size": 2,
36+
"gpu_memory_utilization": 0.92,
37+
"dtype": "auto",
38+
"max_num_seqs": 40,
39+
"max_model_len": 16384,
40+
"enable_chunked_prefill": True,
41+
"enable_prefix_caching": True,
42+
"trust_remote_code": True,
43+
},
3844
)
3945

4046
# Deploy the application
41-
deployment = LLMServer.as_deployment(
42-
llm_config.get_serve_options(name_prefix="vLLM:")).bind(llm_config)
43-
llm_app = LLMRouter.as_deployment().bind([deployment])
47+
llm_app = build_openai_app({"llm_configs": [llm_config]})
4448
serve.run(llm_app)

0 commit comments

Comments
 (0)