From b89416e22ddc912afce077932798cc20dd311dec Mon Sep 17 00:00:00 2001 From: mcalman <68564154+mcalman@users.noreply.github.com> Date: Fri, 28 Jun 2024 22:34:42 -0400 Subject: [PATCH] [Bugfix] fix missing last itl in openai completions benchmark (#5926) --- benchmarks/backend_request_func.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 4350b96b04a6a..5b5067090426e 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -265,6 +265,9 @@ async def async_request_openai_completions( else: data = json.loads(chunk) + # NOTE: Some completion API might have a last + # usage summary response without a token so we + # want to check a token was generated if data["choices"][0]["text"]: timestamp = time.perf_counter() # First token @@ -273,12 +276,8 @@ async def async_request_openai_completions( output.ttft = ttft # Decoding phase - # NOTE: Some completion API might have a last - # usage summary response without a token so we - # do not want to include as inter-token-latency - elif data.get("usage", None) is None: - output.itl.append(timestamp - - most_recent_timestamp) + output.itl.append(timestamp - + most_recent_timestamp) most_recent_timestamp = timestamp generated_text += data["choices"][0]["text"]