Skip to content

[Misc] Add packages for benchmark as extra dependency #19089

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ vllm complete --quick "The future of AI is"

Run benchmark tests for latency online serving throughput and offline inference throughput.

To use benchmark commands, please install with extra dependencies using `pip install vllm[bench]`.

Available Commands:

```bash
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ def _read_requirements(filename: str) -> list[str]:
ext_modules=ext_modules,
install_requires=get_requirements(),
extras_require={
"bench": ["pandas", "datasets"],
"tensorizer": ["tensorizer>=2.9.0"],
"fastsafetensors": ["fastsafetensors >= 0.1.10"],
"runai": ["runai-model-streamer", "runai-model-streamer-s3", "boto3"],
Expand Down
39 changes: 17 additions & 22 deletions vllm/benchmarks/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from typing import Any, Callable, Optional, Union

import numpy as np
import pandas as pd
from PIL import Image
from transformers import PreTrainedTokenizerBase

Expand All @@ -32,6 +31,23 @@
from vllm.multimodal import MultiModalDataDict
from vllm.multimodal.image import convert_image_mode
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_lora_tokenizer
from vllm.utils import PlaceholderModule

try:
from datasets import load_dataset
except ImportError:
datasets = PlaceholderModule("datasets")
load_dataset = datasets.placeholder_attr("load_dataset")

try:
import pandas as pd
except ImportError:
pd = PlaceholderModule("pandas")

try:
import librosa
except ImportError:
librosa = PlaceholderModule("librosa")

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -635,13 +651,6 @@ def load_data(self, ):
if self.dataset_path is None:
raise ValueError("dataset_path must be provided for loading data.")

try:
import pandas as pd
except ImportError as e:
raise ImportError(
"Pandas is required for BurstGPTDataset. Please install it "
"using `pip install pandas`.") from e

df = pd.read_csv(self.dataset_path)
# Filter to keep only GPT-4 rows.
gpt4_df = df[df["Model"] == "GPT-4"]
Expand Down Expand Up @@ -716,13 +725,6 @@ def __init__(

def load_data(self) -> None:
"""Load data from HuggingFace datasets."""
try:
from datasets import load_dataset
except ImportError as e:
raise ImportError(
"Hugging Face datasets library is required for this dataset. "
"Please install it using `pip install datasets`.") from e

self.data = load_dataset(
self.dataset_path,
name=self.dataset_subset,
Expand Down Expand Up @@ -1134,13 +1136,6 @@ def sample(
output_len: Optional[int] = None,
**kwargs,
) -> list:
try:
import librosa
except ImportError as e:
raise ImportError(
"librosa is required for ASRDataset. Please install it "
"using `pip install librosa`.") from e

output_len = (output_len
if output_len is not None else self.DEFAULT_OUTPUT_LEN)
prompt = ASRDataset.TRANSCRIPTION_PREAMBLE
Expand Down