From 0b981a97ece025f4e8c5392eefb12165110160c0 Mon Sep 17 00:00:00 2001 From: Pramod Kumar <144990617+pramodkumar-habanalabs@users.noreply.github.com> Date: Thu, 17 Oct 2024 19:27:37 +0530 Subject: [PATCH] Add flag to run inference with partial dataset (#1420) --- examples/text-generation/run_generation.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py index deb932a49..a8d56ff1c 100755 --- a/examples/text-generation/run_generation.py +++ b/examples/text-generation/run_generation.py @@ -320,6 +320,11 @@ def setup_parser(parser): action="store_true", help="Whether to enable inputs_embeds or not.", ) + parser.add_argument( + "--run_partial_dataset", + action="store_true", + help="Run the inference with dataset for specified --n_iterations(default:5)", + ) args = parser.parse_args() @@ -698,6 +703,8 @@ def generate_dataset(batch): f"Output: {tokenizer.batch_decode(outputs, skip_special_tokens=True)[:args.batch_size*args.num_return_sequences]}" ) print(separator) + if args.run_partial_dataset and args.n_iterations == i + 1: + break t_end = time.time() throughput = total_new_tokens_generated / duration