From b89416e22ddc912afce077932798cc20dd311dec Mon Sep 17 00:00:00 2001
From: mcalman <68564154+mcalman@users.noreply.github.com>
Date: Fri, 28 Jun 2024 22:34:42 -0400
Subject: [PATCH] [Bugfix] fix missing last itl in openai completions benchmark
 (#5926)

---
 benchmarks/backend_request_func.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
index 4350b96b04a6a..5b5067090426e 100644
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -265,6 +265,9 @@ async def async_request_openai_completions(
                         else:
                             data = json.loads(chunk)
 
+                            # NOTE: Some completion API might have a last
+                            # usage summary response without a token so we
+                            # want to check a token was generated
                             if data["choices"][0]["text"]:
                                 timestamp = time.perf_counter()
                                 # First token
@@ -273,12 +276,8 @@ async def async_request_openai_completions(
                                     output.ttft = ttft
 
                                 # Decoding phase
-                                # NOTE: Some completion API might have a last
-                                # usage summary response without a token so we
-                                # do not want to include as inter-token-latency
-                                elif data.get("usage", None) is None:
-                                    output.itl.append(timestamp -
-                                                      most_recent_timestamp)
+                                output.itl.append(timestamp -
+                                                  most_recent_timestamp)
 
                                 most_recent_timestamp = timestamp
                                 generated_text += data["choices"][0]["text"]