Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .buildkite/nightly-benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ We test the throughput by using `vllm bench serve` with request rate = inf to co
"tensor_parallel_size": 1,
"swap_space": 16,
"disable_log_stats": "",
"disable_log_requests": "",
"load_format": "dummy"
},
"client_parameters": {
Expand Down
1 change: 0 additions & 1 deletion .buildkite/nightly-benchmarks/tests/genai-perf-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
Expand Down
6 changes: 0 additions & 6 deletions .buildkite/nightly-benchmarks/tests/nightly-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
Expand Down Expand Up @@ -90,7 +89,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
Expand Down Expand Up @@ -145,7 +143,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
Expand Down Expand Up @@ -197,7 +194,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
Expand Down Expand Up @@ -251,7 +247,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
Expand Down Expand Up @@ -305,7 +300,6 @@
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -50,7 +49,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -83,7 +81,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -117,7 +114,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -153,7 +149,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -189,7 +184,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -50,7 +49,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -84,7 +82,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -118,7 +115,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -154,7 +150,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -191,7 +186,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down
5 changes: 0 additions & 5 deletions .buildkite/nightly-benchmarks/tests/serving-tests-cpu.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -50,7 +49,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -83,7 +81,6 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -117,7 +114,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down Expand Up @@ -153,7 +149,6 @@
"trust_remote_code": "",
"enable_chunked_prefill": "",
"disable_log_stats": "",
"disable_log_requests": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
Expand Down
6 changes: 1 addition & 5 deletions .buildkite/nightly-benchmarks/tests/serving-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
"tensor_parallel_size": 1,
"swap_space": 16,
"disable_log_stats": "",
"disable_log_requests": "",
"load_format": "dummy"
},
"client_parameters": {
Expand All @@ -26,7 +25,6 @@
"tensor_parallel_size": 4,
"swap_space": 16,
"disable_log_stats": "",
"disable_log_requests": "",
"load_format": "dummy"
},
"client_parameters": {
Expand All @@ -45,7 +43,6 @@
"tensor_parallel_size": 2,
"swap_space": 16,
"disable_log_stats": "",
"disable_log_requests": "",
"load_format": "dummy"
},
"client_parameters": {
Expand All @@ -60,8 +57,7 @@
"test_name": "serving_llama70B_tp4_sharegpt_specdecode",
"qps_list": [2],
"server_parameters": {
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"disable_log_requests": "",
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"tensor_parallel_size": 4,
"swap_space": 16,
"speculative_config": {
Expand Down
1 change: 0 additions & 1 deletion tests/config/test_mp_reducer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def test_mp_reducer(monkeypatch):
max_model_len=32,
gpu_memory_utilization=0.1,
disable_log_stats=True,
disable_log_requests=True,
)

async_llm = AsyncLLM.from_engine_args(
Expand Down
2 changes: 1 addition & 1 deletion tests/mq_llm_engine/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
NUM_REQUESTS = 10000

# Scenarios to test for num generated token.
ENGINE_ARGS = AsyncEngineArgs(model=MODEL, disable_log_requests=True)
ENGINE_ARGS = AsyncEngineArgs(model=MODEL)


@pytest.fixture(scope="function")
Expand Down
4 changes: 1 addition & 3 deletions tests/v1/engine/test_async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,10 @@
TEXT_ENGINE_ARGS = AsyncEngineArgs(
model="meta-llama/Llama-3.2-1B-Instruct",
enforce_eager=True,
disable_log_requests=True,
)

VISION_ENGINE_ARGS = AsyncEngineArgs(model="Qwen/Qwen2-VL-2B-Instruct",
enforce_eager=True,
disable_log_requests=True)
enforce_eager=True)

TEXT_PROMPT = "Hello my name is Robert and"

Expand Down
1 change: 0 additions & 1 deletion tests/v1/test_async_llm_dp.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
engine_args = AsyncEngineArgs(
model="ibm-research/PowerMoE-3b",
enforce_eager=True,
disable_log_requests=True,
tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
data_parallel_size=DP_SIZE,
)
Expand Down
30 changes: 26 additions & 4 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import regex as re
import torch
from pydantic import TypeAdapter, ValidationError
from typing_extensions import TypeIs
from typing_extensions import TypeIs, deprecated

import vllm.envs as envs
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
Expand Down Expand Up @@ -1704,7 +1704,23 @@ def _set_default_args_v1(self, usage_context: UsageContext,
@dataclass
class AsyncEngineArgs(EngineArgs):
"""Arguments for asynchronous vLLM engine."""
disable_log_requests: bool = False
enable_log_requests: bool = False

@property
@deprecated(
"`disable_log_requests` is deprecated and has been replaced with "
"`enable_log_requests`. This will be removed in v0.12.0. Please use "
"`enable_log_requests` instead.")
def disable_log_requests(self) -> bool:
return not self.enable_log_requests

@disable_log_requests.setter
@deprecated(
"`disable_log_requests` is deprecated and has been replaced with "
"`enable_log_requests`. This will be removed in v0.12.0. Please use "
"`enable_log_requests` instead.")
def disable_log_requests(self, value: bool):
self.enable_log_requests = not value

@staticmethod
def add_cli_args(parser: FlexibleArgumentParser,
Expand All @@ -1715,9 +1731,15 @@ def add_cli_args(parser: FlexibleArgumentParser,
load_general_plugins()
if not async_args_only:
parser = EngineArgs.add_cli_args(parser)
parser.add_argument('--enable-log-requests',
action=argparse.BooleanOptionalAction,
default=AsyncEngineArgs.enable_log_requests,
help='Enable logging requests.')
parser.add_argument('--disable-log-requests',
action='store_true',
help='Disable logging requests.')
action=argparse.BooleanOptionalAction,
default=not AsyncEngineArgs.enable_log_requests,
help='[DEPRECATED] Disable logging requests.',
deprecated=True)
current_platform.pre_register_and_update(parser)
return parser

Expand Down
26 changes: 16 additions & 10 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from vllm.sequence import ExecuteModelRequest
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device, weak_bind
from vllm.utils import Device, deprecate_kwargs, weak_bind

logger = init_logger(__name__)
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
Expand Down Expand Up @@ -554,22 +554,28 @@ def _get_executor_cls(cls,
return LLMEngine._get_executor_cls(engine_config)

@classmethod
@deprecate_kwargs(
"disable_log_requests",
additional_message=("This argument will have no effect. "
"Use `enable_log_requests` instead."),
)
def from_vllm_config(
cls,
vllm_config: VllmConfig,
start_engine_loop: bool = True,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[dict[str, StatLoggerBase]] = None,
disable_log_requests: bool = False,
disable_log_stats: bool = False,
cls,
vllm_config: VllmConfig,
start_engine_loop: bool = True,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[dict[str, StatLoggerBase]] = None,
enable_log_requests: bool = False,
disable_log_stats: bool = False,
disable_log_requests: bool = True, # Deprecated, will be removed
) -> "AsyncLLMEngine":
"""Create an AsyncLLMEngine from the EngineArgs."""

return cls(
vllm_config=vllm_config,
executor_class=cls._get_executor_cls(vllm_config),
start_engine_loop=start_engine_loop,
log_requests=not disable_log_requests,
log_requests=enable_log_requests,
log_stats=not disable_log_stats,
usage_context=usage_context,
stat_loggers=stat_loggers,
Expand Down Expand Up @@ -598,7 +604,7 @@ def from_engine_args(
usage_context=usage_context,
stat_loggers=stat_loggers,
disable_log_stats=engine_args.disable_log_stats,
disable_log_requests=engine_args.disable_log_requests,
enable_log_requests=engine_args.enable_log_requests,
)

@property
Expand Down
Loading