From 9ae9d800ea120aef35d0819e63abe02973eaac32 Mon Sep 17 00:00:00 2001 From: Graeme Power Date: Tue, 15 Oct 2024 16:49:08 +0100 Subject: [PATCH] fix: don't call exit stack close in stream iterator as it will be called by finally from on_complete anyway --- llama_cpp/server/app.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index ffbf71491..bd2f9d8ed 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -159,7 +159,7 @@ async def get_event_publisher( request: Request, inner_send_chan: MemoryObjectSendStream[typing.Any], iterator: Iterator[typing.Any], - on_complete: typing.Optional[typing.Callable[[], None]] = None, + on_complete: typing.Optional[typing.Callable[[], typing.Awaitable[None]]] = None, ): server_settings = next(get_server_settings()) interrupt_requests = ( @@ -182,7 +182,7 @@ async def get_event_publisher( raise e finally: if on_complete: - on_complete() + await on_complete() def _logit_bias_tokens_to_input_ids( @@ -326,7 +326,6 @@ async def create_completion( def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]: yield first_response yield from iterator_or_completion - exit_stack.aclose() send_chan, recv_chan = anyio.create_memory_object_stream(10) return EventSourceResponse( @@ -518,7 +517,6 @@ async def create_chat_completion( def iterator() -> Iterator[llama_cpp.ChatCompletionChunk]: yield first_response yield from iterator_or_completion - exit_stack.aclose() send_chan, recv_chan = anyio.create_memory_object_stream(10) return EventSourceResponse(