@@ -326,7 +326,7 @@ async def create_completion(
326326 def iterator () -> Iterator [llama_cpp .CreateCompletionStreamResponse ]:
327327 yield first_response
328328 yield from iterator_or_completion
329- exit_stack .close ()
329+ exit_stack .aclose ()
330330
331331 send_chan , recv_chan = anyio .create_memory_object_stream (10 )
332332 return EventSourceResponse (
@@ -336,12 +336,13 @@ def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]:
336336 request = request ,
337337 inner_send_chan = send_chan ,
338338 iterator = iterator (),
339- on_complete = exit_stack .close ,
339+ on_complete = exit_stack .aclose ,
340340 ),
341341 sep = "\n " ,
342342 ping_message_factory = _ping_message_factory ,
343343 )
344344 else :
345+ await exit_stack .aclose ()
345346 return iterator_or_completion
346347
347348
@@ -517,7 +518,7 @@ async def create_chat_completion(
517518 def iterator () -> Iterator [llama_cpp .ChatCompletionChunk ]:
518519 yield first_response
519520 yield from iterator_or_completion
520- exit_stack .close ()
521+ exit_stack .aclose ()
521522
522523 send_chan , recv_chan = anyio .create_memory_object_stream (10 )
523524 return EventSourceResponse (
@@ -527,13 +528,13 @@ def iterator() -> Iterator[llama_cpp.ChatCompletionChunk]:
527528 request = request ,
528529 inner_send_chan = send_chan ,
529530 iterator = iterator (),
530- on_complete = exit_stack .close ,
531+ on_complete = exit_stack .aclose ,
531532 ),
532533 sep = "\n " ,
533534 ping_message_factory = _ping_message_factory ,
534535 )
535536 else :
536- exit_stack .close ()
537+ await exit_stack .aclose ()
537538 return iterator_or_completion
538539
539540
0 commit comments