forked from mesolitica/vllm-whisper
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FIX] Fix styles in automatic prefix caching & add a automatic prefix…
… caching benchmark (vllm-project#3158)
- Loading branch information
1 parent
ad2542d
commit 70d09b0
Showing
4 changed files
with
69 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import argparse | ||
import time | ||
|
||
from vllm import LLM | ||
from vllm import SamplingParams | ||
|
||
PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as fellows. You need to answer my question about the table.\n# Table\n|Opening|Opening|Sl. No.|Film|Cast|Director|Music Director|Notes|\n|----|----|----|----|----|----|----|----|\n|J A N|9|1|Agni Pushpam|Jayabharathi, Kamalahasan|Jeassy|M. K. Arjunan||\n|J A N|16|2|Priyamvada|Mohan Sharma, Lakshmi, KPAC Lalitha|K. S. Sethumadhavan|V. Dakshinamoorthy||\n|J A N|23|3|Yakshagaanam|Madhu, Sheela|Sheela|M. S. Viswanathan||\n|J A N|30|4|Paalkkadal|Sheela, Sharada|T. K. Prasad|A. T. Ummer||\n|F E B|5|5|Amma|Madhu, Srividya|M. Krishnan Nair|M. K. Arjunan||\n|F E B|13|6|Appooppan|Thikkurissi Sukumaran Nair, Kamal Haasan|P. Bhaskaran|M. S. Baburaj||\n|F E B|20|7|Srishti|Chowalloor Krishnankutty, Ravi Alummoodu|K. T. Muhammad|M. S. Baburaj||\n|F E B|20|8|Vanadevatha|Prem Nazir, Madhubala|Yusufali Kechery|G. Devarajan||\n|F E B|27|9|Samasya|Madhu, Kamalahaasan|K. Thankappan|Shyam||\n|F E B|27|10|Yudhabhoomi|K. P. Ummer, Vidhubala|Crossbelt Mani|R. K. Shekhar||\n|M A R|5|11|Seemantha Puthran|Prem Nazir, Jayabharathi|A. B. Raj|M. K. Arjunan||\n|M A R|12|12|Swapnadanam|Rani Chandra, Dr. Mohandas|K. G. George|Bhaskar Chandavarkar||\n|M A R|19|13|Thulavarsham|Prem Nazir, sreedevi, Sudheer|N. Sankaran Nair|V. Dakshinamoorthy||\n|M A R|20|14|Aruthu|Kaviyoor Ponnamma, Kamalahasan|Ravi|G. Devarajan||\n|M A R|26|15|Swimming Pool|Kamal Haasan, M. G. Soman|J. Sasikumar|M. K. Arjunan||\n\n# Question\nWhat' s the content in the (1,1) cells\n" | ||
|
||
|
||
def test_prefix(llm=None, sampling_params=None, prompts=None, prefix_len=None): | ||
start_time = time.time() | ||
# whether use Prefix | ||
if prefix_len != None: | ||
# start inference | ||
llm.generate(prompts, | ||
sampling_params=sampling_params, | ||
prefix_pos=prefix_len) | ||
else: | ||
llm.generate(prompts, sampling_params=sampling_params) | ||
|
||
end_time = time.time() | ||
print(f"cost time {end_time - start_time}") | ||
|
||
|
||
def main(args): | ||
llm = LLM(model="baichuan-inc/Baichuan2-13B-Chat", | ||
tokenizer_mode='auto', | ||
trust_remote_code=True, | ||
enforce_eager=True, | ||
enable_prefix_caching=args.enable_prefix_caching) | ||
|
||
num_prompts = 100 | ||
prompts = [PROMPT] * num_prompts | ||
sampling_params = SamplingParams(temperature=0, max_tokens=100) | ||
|
||
print("------warm up------") | ||
test_prefix( | ||
llm=llm, | ||
prompts=prompts[:1], | ||
sampling_params=sampling_params, | ||
) | ||
|
||
print("------start generating------") | ||
test_prefix( | ||
llm=llm, | ||
prompts=prompts, | ||
sampling_params=sampling_params, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
description='Benchmark the performance with or without automatic ' | ||
'prefix caching.') | ||
parser.add_argument('--enable-prefix-caching', | ||
action='store_true', | ||
help='enable prefix caching') | ||
args = parser.parse_args() | ||
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters