Skip to content

Commit afc5484

Browse files
schoennenbeckLeiWang1999
authored andcommitted
[Core] Make scheduling policy settable via EngineArgs (vllm-project#8956)
Signed-off-by: LeiWang1999 <leiwang1999@outlook.com>
1 parent c69905f commit afc5484

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

vllm/engine/arg_utils.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import dataclasses
33
import json
44
from dataclasses import dataclass
5-
from typing import (TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple,
6-
Type, Union)
5+
from typing import (TYPE_CHECKING, Any, Dict, List, Literal, Mapping, Optional,
6+
Tuple, Type, Union)
77

88
import torch
99

@@ -177,6 +177,7 @@ class EngineArgs:
177177
disable_async_output_proc: bool = False
178178
override_neuron_config: Optional[Dict[str, Any]] = None
179179
mm_processor_kwargs: Optional[Dict[str, Any]] = None
180+
scheduling_policy: Literal["fcfs", "priority"] = "fcfs"
180181

181182
def __post_init__(self):
182183
if self.tokenizer is None:
@@ -797,6 +798,16 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
797798
default=None,
798799
help="override or set neuron device configuration.")
799800

801+
parser.add_argument(
802+
'--scheduling-policy',
803+
choices=['fcfs', 'priority'],
804+
default="fcfs",
805+
help='The scheduling policy to use. "fcfs" (first come first served'
806+
', i.e. requests are handled in order of arrival; default) '
807+
'or "priority" (requests are handled based on given '
808+
'priority (lower value means earlier handling) and time of '
809+
'arrival deciding any ties).')
810+
800811
return parser
801812

802813
@classmethod
@@ -1011,6 +1022,7 @@ def create_engine_config(self) -> EngineConfig:
10111022
multi_step_stream_outputs=self.multi_step_stream_outputs,
10121023
send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER
10131024
and parallel_config.use_ray),
1025+
policy=self.scheduling_policy,
10141026
)
10151027
lora_config = LoRAConfig(
10161028
max_lora_rank=self.max_lora_rank,

0 commit comments

Comments
 (0)