@@ -32,7 +32,7 @@ async def init_app_states(state: State, node_chat_http_server):
3232
3333
3434async def v1_chat_completions (request_data : Dict , request_id : str , received_ts : int ):
35- return app .state .http_server .chat_completion (request_data , request_id , received_ts )
35+ return await app .state .http_server .chat_completion (request_data , request_id , received_ts )
3636
3737
3838async def get_cluster_status ():
@@ -158,7 +158,7 @@ def build_lattica(self):
158158 self .lattica .close ()
159159 return False
160160
161- def chat_completion (self , request_data , request_id : str , received_ts : int ):
161+ async def chat_completion (self , request_data , request_id : str , received_ts : int ):
162162 if self .scheduler_addr is not None : # central scheduler mode
163163 try :
164164 if self .scheduler_stub is None :
@@ -192,10 +192,10 @@ async def stream_generator():
192192 return resp
193193 else :
194194 response = stub .chat_completion (request_data )
195- response = next ( response ).decode ()
195+ content = ( await anext ( iterate_in_threadpool ( response )) ).decode ()
196196 logger .debug (f"Non-stream response completed for { request_id } " )
197197 # response is a JSON string; parse to Python object before returning
198- return JSONResponse (content = json .loads (response ))
198+ return JSONResponse (content = json .loads (content ))
199199 except Exception as e :
200200 logger .exception (f"Error in _forward_request: { e } " )
201201 return JSONResponse (
0 commit comments