Skip to content

Commit 8065a7e

Browse files
authored
[Frontend] Add FlexibleArgumentParser to support both underscore and dash in names (vllm-project#5718)
1 parent 3f3b6b2 commit 8065a7e

22 files changed

+72
-45
lines changed

benchmarks/benchmark_latency.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from vllm.engine.arg_utils import EngineArgs
1414
from vllm.inputs import PromptStrictInputs
1515
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
16+
from vllm.utils import FlexibleArgumentParser
1617

1718

1819
def main(args: argparse.Namespace):
@@ -120,7 +121,7 @@ def run_to_completion(profile_dir: Optional[str] = None):
120121

121122

122123
if __name__ == '__main__':
123-
parser = argparse.ArgumentParser(
124+
parser = FlexibleArgumentParser(
124125
description='Benchmark the latency of processing a single batch of '
125126
'requests till completion.')
126127
parser.add_argument('--model', type=str, default='facebook/opt-125m')

benchmarks/benchmark_prefix_caching.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import argparse
21
import time
32

43
from vllm import LLM, SamplingParams
4+
from vllm.utils import FlexibleArgumentParser
55

66
PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as fellows. You need to answer my question about the table.\n# Table\n|Opening|Opening|Sl. No.|Film|Cast|Director|Music Director|Notes|\n|----|----|----|----|----|----|----|----|\n|J A N|9|1|Agni Pushpam|Jayabharathi, Kamalahasan|Jeassy|M. K. Arjunan||\n|J A N|16|2|Priyamvada|Mohan Sharma, Lakshmi, KPAC Lalitha|K. S. Sethumadhavan|V. Dakshinamoorthy||\n|J A N|23|3|Yakshagaanam|Madhu, Sheela|Sheela|M. S. Viswanathan||\n|J A N|30|4|Paalkkadal|Sheela, Sharada|T. K. Prasad|A. T. Ummer||\n|F E B|5|5|Amma|Madhu, Srividya|M. Krishnan Nair|M. K. Arjunan||\n|F E B|13|6|Appooppan|Thikkurissi Sukumaran Nair, Kamal Haasan|P. Bhaskaran|M. S. Baburaj||\n|F E B|20|7|Srishti|Chowalloor Krishnankutty, Ravi Alummoodu|K. T. Muhammad|M. S. Baburaj||\n|F E B|20|8|Vanadevatha|Prem Nazir, Madhubala|Yusufali Kechery|G. Devarajan||\n|F E B|27|9|Samasya|Madhu, Kamalahaasan|K. Thankappan|Shyam||\n|F E B|27|10|Yudhabhoomi|K. P. Ummer, Vidhubala|Crossbelt Mani|R. K. Shekhar||\n|M A R|5|11|Seemantha Puthran|Prem Nazir, Jayabharathi|A. B. Raj|M. K. Arjunan||\n|M A R|12|12|Swapnadanam|Rani Chandra, Dr. Mohandas|K. G. George|Bhaskar Chandavarkar||\n|M A R|19|13|Thulavarsham|Prem Nazir, sreedevi, Sudheer|N. Sankaran Nair|V. Dakshinamoorthy||\n|M A R|20|14|Aruthu|Kaviyoor Ponnamma, Kamalahasan|Ravi|G. Devarajan||\n|M A R|26|15|Swimming Pool|Kamal Haasan, M. G. Soman|J. Sasikumar|M. K. Arjunan||\n\n# Question\nWhat' s the content in the (1,1) cells\n" # noqa: E501
77

@@ -44,7 +44,7 @@ def main(args):
4444

4545

4646
if __name__ == "__main__":
47-
parser = argparse.ArgumentParser(
47+
parser = FlexibleArgumentParser(
4848
description='Benchmark the performance with or without automatic '
4949
'prefix caching.')
5050
parser.add_argument('--model',

benchmarks/benchmark_serving.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@
4444
except ImportError:
4545
from backend_request_func import get_tokenizer
4646

47+
try:
48+
from vllm.utils import FlexibleArgumentParser
49+
except ImportError:
50+
from argparse import ArgumentParser as FlexibleArgumentParser
51+
4752

4853
@dataclass
4954
class BenchmarkMetrics:
@@ -511,7 +516,7 @@ def main(args: argparse.Namespace):
511516

512517

513518
if __name__ == "__main__":
514-
parser = argparse.ArgumentParser(
519+
parser = FlexibleArgumentParser(
515520
description="Benchmark the online serving throughput.")
516521
parser.add_argument(
517522
"--backend",

benchmarks/benchmark_throughput.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from vllm.engine.arg_utils import EngineArgs
1414
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
15+
from vllm.utils import FlexibleArgumentParser
1516

1617

1718
def sample_requests(
@@ -261,7 +262,7 @@ def main(args: argparse.Namespace):
261262

262263

263264
if __name__ == "__main__":
264-
parser = argparse.ArgumentParser(description="Benchmark the throughput.")
265+
parser = FlexibleArgumentParser(description="Benchmark the throughput.")
265266
parser.add_argument("--backend",
266267
type=str,
267268
choices=["vllm", "hf", "mii"],

benchmarks/cutlass_benchmarks/w8a8_benchmarks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from weight_shapes import WEIGHT_SHAPES
1212

1313
from vllm import _custom_ops as ops
14+
from vllm.utils import FlexibleArgumentParser
1415

1516
DEFAULT_MODELS = list(WEIGHT_SHAPES.keys())[1:]
1617
DEFAULT_BATCH_SIZES = [1, 16, 32, 64, 128, 256, 512]
@@ -293,7 +294,7 @@ def to_torch_dtype(dt):
293294
return torch.float8_e4m3fn
294295
raise ValueError("unsupported dtype")
295296

296-
parser = argparse.ArgumentParser(
297+
parser = FlexibleArgumentParser(
297298
description="""
298299
Benchmark Cutlass GEMM.
299300

benchmarks/kernels/benchmark_aqlm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import argparse
21
import os
32
import sys
43
from typing import Optional
@@ -10,6 +9,7 @@
109
from vllm.model_executor.layers.quantization.aqlm import (
1110
dequantize_weight, generic_dequantize_gemm, get_int_dtype,
1211
optimized_dequantize_gemm)
12+
from vllm.utils import FlexibleArgumentParser
1313

1414
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
1515

@@ -137,7 +137,7 @@ def dequant_test(k: int, parts: torch.Tensor, nbooks: int, bits: int) -> None:
137137

138138
def main():
139139

140-
parser = argparse.ArgumentParser(description="Benchmark aqlm performance.")
140+
parser = FlexibleArgumentParser(description="Benchmark aqlm performance.")
141141

142142
# Add arguments
143143
parser.add_argument("--nbooks",

benchmarks/kernels/benchmark_marlin.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import argparse
21
from typing import List
32

43
import torch
@@ -16,6 +15,7 @@
1615
MarlinWorkspace, marlin_24_quantize, marlin_quantize)
1716
from vllm.model_executor.layers.quantization.utils.quant_utils import (
1817
gptq_pack, quantize_weights, sort_weights)
18+
from vllm.utils import FlexibleArgumentParser
1919

2020
DEFAULT_MODELS = ["meta-llama/Llama-2-7b-hf/TP1"]
2121
DEFAULT_BATCH_SIZES = [1, 16, 32, 64, 128, 256, 512]
@@ -211,7 +211,7 @@ def main(args):
211211
# python benchmark_marlin.py --batch-sizes 1 16 32 --limit-k 4096 --limit-n 4096 --limit-group-size 128 --limit-num-bits 4 --limit-act-order 0 --limit-k-full 1 # noqa E501
212212
#
213213
if __name__ == "__main__":
214-
parser = argparse.ArgumentParser(
214+
parser = FlexibleArgumentParser(
215215
description="Benchmark Marlin across specified models/shapes/batches")
216216
parser.add_argument(
217217
"--models",

benchmarks/kernels/benchmark_moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from transformers import AutoConfig
1111

1212
from vllm.model_executor.layers.fused_moe.fused_moe import *
13+
from vllm.utils import FlexibleArgumentParser
1314

1415

1516
class BenchmarkConfig(TypedDict):
@@ -315,7 +316,7 @@ def _distribute(method: str, inputs: List[Any]) -> List[Any]:
315316

316317

317318
if __name__ == "__main__":
318-
parser = argparse.ArgumentParser()
319+
parser = FlexibleArgumentParser()
319320
parser.add_argument("--model",
320321
type=str,
321322
default="mistralai/Mixtral-8x7B-Instruct-v0.1")

benchmarks/kernels/benchmark_paged_attention.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
import argparse
21
import random
32
import time
43
from typing import List, Optional
54

65
import torch
76

87
from vllm import _custom_ops as ops
9-
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, create_kv_caches_with_random
8+
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser,
9+
create_kv_caches_with_random)
1010

1111
NUM_BLOCKS = 1024
1212
PARTITION_SIZE = 512
@@ -161,7 +161,7 @@ def run_cuda_benchmark(num_iters: int, profile: bool = False) -> float:
161161

162162

163163
if __name__ == '__main__':
164-
parser = argparse.ArgumentParser(
164+
parser = FlexibleArgumentParser(
165165
description="Benchmark the paged attention kernel.")
166166
parser.add_argument("--version",
167167
type=str,

benchmarks/kernels/benchmark_rope.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import argparse
21
from itertools import accumulate
32
from typing import List, Optional
43

@@ -7,6 +6,7 @@
76

87
from vllm.model_executor.layers.rotary_embedding import (RotaryEmbedding,
98
get_rope)
9+
from vllm.utils import FlexibleArgumentParser
1010

1111

1212
def benchmark_rope_kernels_multi_lora(
@@ -86,7 +86,7 @@ def benchmark_rope_kernels_multi_lora(
8686

8787

8888
if __name__ == '__main__':
89-
parser = argparse.ArgumentParser(
89+
parser = FlexibleArgumentParser(
9090
description="Benchmark the rotary embedding kernels.")
9191
parser.add_argument("--is-neox-style", type=bool, default=True)
9292
parser.add_argument("--batch-size", type=int, default=16)

0 commit comments

Comments
 (0)