From 0b981a97ece025f4e8c5392eefb12165110160c0 Mon Sep 17 00:00:00 2001
From: Pramod Kumar <144990617+pramodkumar-habanalabs@users.noreply.github.com>
Date: Thu, 17 Oct 2024 19:27:37 +0530
Subject: [PATCH] Add flag to run inference with partial dataset (#1420)

---
 examples/text-generation/run_generation.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py
index deb932a49..a8d56ff1c 100755
--- a/examples/text-generation/run_generation.py
+++ b/examples/text-generation/run_generation.py
@@ -320,6 +320,11 @@ def setup_parser(parser):
         action="store_true",
         help="Whether to enable inputs_embeds or not.",
     )
+    parser.add_argument(
+        "--run_partial_dataset",
+        action="store_true",
+        help="Run the inference with dataset for specified --n_iterations(default:5)",
+    )
 
     args = parser.parse_args()
 
@@ -698,6 +703,8 @@ def generate_dataset(batch):
                 f"Output: {tokenizer.batch_decode(outputs, skip_special_tokens=True)[:args.batch_size*args.num_return_sequences]}"
             )
             print(separator)
+            if args.run_partial_dataset and args.n_iterations == i + 1:
+                break
         t_end = time.time()
 
         throughput = total_new_tokens_generated / duration