Skip to content

Commit 4da21ce

Browse files
committed
fix: don't call exit stack close in stream iterator as it will be called by finally from on_complete anyway
1 parent ab0b783 commit 4da21ce

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

llama_cpp/server/app.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ async def get_event_publisher(
159159
request: Request,
160160
inner_send_chan: MemoryObjectSendStream[typing.Any],
161161
iterator: Iterator[typing.Any],
162-
on_complete: typing.Optional[typing.Callable[[], None]] = None,
162+
on_complete: typing.Optional[typing.Callable[[], typing.Awaitable[None]]] = None,
163163
):
164164
server_settings = next(get_server_settings())
165165
interrupt_requests = (
@@ -182,7 +182,7 @@ async def get_event_publisher(
182182
raise e
183183
finally:
184184
if on_complete:
185-
on_complete()
185+
await on_complete()
186186

187187

188188
def _logit_bias_tokens_to_input_ids(
@@ -326,7 +326,6 @@ async def create_completion(
326326
def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]:
327327
yield first_response
328328
yield from iterator_or_completion
329-
exit_stack.aclose()
330329

331330
send_chan, recv_chan = anyio.create_memory_object_stream(10)
332331
return EventSourceResponse(
@@ -518,7 +517,6 @@ async def create_chat_completion(
518517
def iterator() -> Iterator[llama_cpp.ChatCompletionChunk]:
519518
yield first_response
520519
yield from iterator_or_completion
521-
exit_stack.aclose()
522520

523521
send_chan, recv_chan = anyio.create_memory_object_stream(10)
524522
return EventSourceResponse(

0 commit comments

Comments
 (0)