@@ -112,7 +112,6 @@ def __init__(self, backend: Backend):
112
112
async def resolve (
113
113
self ,
114
114
request : GenerationRequest ,
115
- start_time : float ,
116
115
timeout_time : float ,
117
116
) -> ResponseSummary :
118
117
"""
@@ -121,7 +120,6 @@ async def resolve(
121
120
and handles any errors that may occur during the process.
122
121
123
122
:param request: The request to resolve.
124
- :param start_time: The time to start the request.
125
123
:param timeout_time: The time to wait for a response before timing out.
126
124
If timeout_time is math.inf, the request will not timeout.
127
125
:return: A ResponseSummary object containing the response from the backend.
@@ -140,10 +138,6 @@ async def _runner():
140
138
nonlocal response
141
139
response = resp
142
140
143
- if (wait_time := start_time - time .time ()) > 0 :
144
- await asyncio .sleep (wait_time )
145
-
146
- start_time = time .time ()
147
141
await asyncio .wait_for (
148
142
_runner (),
149
143
timeout = timeout_time - time .time () if timeout_time < math .inf else None ,
@@ -164,7 +158,7 @@ async def _runner():
164
158
except Exception as exc : # noqa: BLE001
165
159
error = str (exc )
166
160
167
- return self ._handle_response (request , response , error , start_time )
161
+ return self ._handle_response (request , response , error )
168
162
169
163
def _create_request_func_kwargs (
170
164
self ,
@@ -208,7 +202,6 @@ def _handle_response(
208
202
request : GenerationRequest ,
209
203
response : Any ,
210
204
error : Optional [str ],
211
- start_time : float ,
212
205
) -> ResponseSummary :
213
206
if response is None or not isinstance (
214
207
response , (ResponseSummary , StreamingTextResponse )
@@ -228,8 +221,8 @@ def _handle_response(
228
221
headers = {},
229
222
payload = {},
230
223
),
231
- start_time = start_time ,
232
- end_time = time . time () ,
224
+ start_time = None ,
225
+ end_time = None ,
233
226
request_id = request .request_id ,
234
227
error = error or "Unknown error" ,
235
228
)
@@ -243,7 +236,7 @@ def _handle_response(
243
236
payload = {},
244
237
),
245
238
start_time = response .start_time ,
246
- end_time = time . time () ,
239
+ end_time = None ,
247
240
request_prompt_tokens = request .stats .get ("prompt_tokens" , None ),
248
241
request_output_tokens = None ,
249
242
response_prompt_tokens = None ,
0 commit comments