From 437912ebf69584f7a2c629d8b5e8cdbb4977f8e0 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Mon, 10 Jun 2024 00:17:24 +0000 Subject: [PATCH] update internal method in benchmark throughput too --- neuralmagic/benchmarks/scripts/benchmark_throughput.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/neuralmagic/benchmarks/scripts/benchmark_throughput.py b/neuralmagic/benchmarks/scripts/benchmark_throughput.py index 0607067e3817d..f49de1ec27a3f 100644 --- a/neuralmagic/benchmarks/scripts/benchmark_throughput.py +++ b/neuralmagic/benchmarks/scripts/benchmark_throughput.py @@ -10,10 +10,12 @@ import time from datetime import datetime from pathlib import Path -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, cast from transformers import AutoTokenizer +from vllm.inputs import PromptStrictInputs + from .common import (generate_synthetic_requests, num_available_gpus, print_request_outputs, warmup_vllm_engine) from .datasets_registry import DatasetArgs, get_dataset @@ -77,8 +79,7 @@ def run_vllm(requests: List[Tuple[str, int, int]], ) # FIXME(woosuk): Do not use internal method. llm._add_request( - prompt=prompt, - prompt_token_ids=None, + inputs=cast(PromptStrictInputs, prompt), params=sampling_params, )