@@ -247,7 +247,15 @@ async def chat_completion_stream_generator(
247247                            model = model_name )
248248                        if  (request .stream_options 
249249                                and  request .stream_options .include_usage ):
250-                             chunk .usage  =  None 
250+                             if  (request .stream_options .continuous_usage_stats ):
251+                                 prompt_tokens  =  len (res .prompt_token_ids )
252+                                 usage  =  UsageInfo (prompt_tokens = prompt_tokens ,
253+                                                   completion_tokens = 0 ,
254+                                                   total_tokens = prompt_tokens )
255+                                 chunk .usage  =  usage 
256+                             else :
257+                                 chunk .usage  =  None 
258+ 
251259                        data  =  chunk .model_dump_json (exclude_unset = True )
252260                        yield  f"data: { data } \n \n " 
253261
@@ -277,7 +285,18 @@ async def chat_completion_stream_generator(
277285                                    model = model_name )
278286                                if  (request .stream_options  and 
279287                                        request .stream_options .include_usage ):
280-                                     chunk .usage  =  None 
288+                                     if  (request .stream_options .
289+                                             continuous_usage_stats ):
290+                                         prompt_tokens  =  len (
291+                                             res .prompt_token_ids )
292+                                         usage  =  UsageInfo (
293+                                             prompt_tokens = prompt_tokens ,
294+                                             completion_tokens = 0 ,
295+                                             total_tokens = prompt_tokens )
296+                                         chunk .usage  =  usage 
297+                                     else :
298+                                         chunk .usage  =  None 
299+ 
281300                                data  =  chunk .model_dump_json (
282301                                    exclude_unset = True )
283302                                yield  f"data: { data } \n \n " 
@@ -336,7 +355,19 @@ async def chat_completion_stream_generator(
336355                            model = model_name )
337356                        if  (request .stream_options 
338357                                and  request .stream_options .include_usage ):
339-                             chunk .usage  =  None 
358+                             if  (request .stream_options .continuous_usage_stats ):
359+                                 prompt_tokens  =  len (res .prompt_token_ids )
360+                                 completion_tokens  =  len (output .token_ids )
361+                                 usage  =  UsageInfo (
362+                                     prompt_tokens = prompt_tokens ,
363+                                     completion_tokens = completion_tokens ,
364+                                     total_tokens = prompt_tokens  + 
365+                                     completion_tokens ,
366+                                 )
367+                                 chunk .usage  =  usage 
368+                             else :
369+                                 chunk .usage  =  None 
370+ 
340371                        data  =  chunk .model_dump_json (exclude_unset = True )
341372                        yield  f"data: { data } \n \n " 
342373                    else :
@@ -356,7 +387,18 @@ async def chat_completion_stream_generator(
356387                            model = model_name )
357388                        if  (request .stream_options 
358389                                and  request .stream_options .include_usage ):
359-                             chunk .usage  =  None 
390+                             if  (request .stream_options .continuous_usage_stats ):
391+                                 prompt_tokens  =  len (res .prompt_token_ids )
392+                                 completion_tokens  =  len (output .token_ids )
393+                                 usage  =  UsageInfo (
394+                                     prompt_tokens = prompt_tokens ,
395+                                     completion_tokens = completion_tokens ,
396+                                     total_tokens = prompt_tokens  + 
397+                                     completion_tokens ,
398+                                 )
399+                                 chunk .usage  =  usage 
400+                             else :
401+                                 chunk .usage  =  None 
360402                        data  =  chunk .model_dump_json (exclude_unset = True )
361403                        yield  f"data: { data } \n \n " 
362404                        finish_reason_sent [i ] =  True 
0 commit comments