From 555a7c8436214c959550c1ae43e6d774c269df8b Mon Sep 17 00:00:00 2001 From: Eduard van Valkenburg Date: Fri, 8 Mar 2024 22:10:43 +0100 Subject: [PATCH] Python: fix for streaming openai responses, and first parts of fixes for Chat With Your Data (#5387) ### Motivation and Context ### Description ### Contribution Checklist - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [ ] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone :smile: --- .../azure_chat_gpt_api.py | 33 ++-- .../azure_chat_gpt_with_data_api.py | 58 +++--- ...chat_gpt_with_data_api_function_calling.py | 5 +- .../chat_gpt_api_function_calling.py | 40 ++-- .../ai/chat_completion_client_base.py | 6 +- .../services/gp_chat_completion.py | 4 +- .../ollama/services/ollama_chat_completion.py | 18 +- .../azure_streaming_chat_message_content.py | 21 +- .../ai/open_ai/contents/function_call.py | 8 +- .../contents/open_ai_chat_message_content.py | 9 +- .../open_ai_streaming_chat_message_content.py | 63 +++++- .../ai/open_ai/contents/tool_calls.py | 2 - .../services/open_ai_chat_completion_base.py | 186 +++++++++--------- .../ai/open_ai/services/open_ai_handler.py | 1 + .../services/open_ai_text_embedding_base.py | 14 +- .../ai/open_ai/services/tool_call_behavior.py | 1 + .../streaming_chat_message_content.py | 32 +++ .../exceptions/template_engine_exceptions.py | 5 + .../functions/kernel_arguments.py | 2 +- .../functions/kernel_function.py | 6 +- .../functions/kernel_function_from_prompt.py | 4 +- python/semantic_kernel/kernel.py | 10 +- .../template_engine/blocks/var_block.py | 11 +- python/tests/conftest.py | 6 + .../test_azure_oai_chat_service.py | 16 +- .../connectors/open_ai/contents/conftest.py | 14 ++ .../open_ai/contents/test_function_call.py | 104 ++++++++-- .../open_ai/contents/test_tool_call.py | 38 ++++ .../services/test_azure_chat_completion.py | 88 ++++----- .../test_open_ai_chat_completion_base.py | 67 ++++--- .../tests/unit/contents/test_chat_history.py | 5 - 31 files changed, 523 insertions(+), 354 deletions(-) create mode 100644 python/tests/unit/connectors/open_ai/contents/conftest.py create mode 100644 python/tests/unit/connectors/open_ai/contents/test_tool_call.py diff --git a/python/samples/kernel-syntax-examples/azure_chat_gpt_api.py b/python/samples/kernel-syntax-examples/azure_chat_gpt_api.py index a785cebbb1aa..d224e9dafdcd 100644 --- a/python/samples/kernel-syntax-examples/azure_chat_gpt_api.py +++ b/python/samples/kernel-syntax-examples/azure_chat_gpt_api.py @@ -3,17 +3,12 @@ import asyncio import logging -from dotenv import load_dotenv - import semantic_kernel as sk import semantic_kernel.connectors.ai.open_ai as sk_oai from semantic_kernel.contents.chat_history import ChatHistory -from semantic_kernel.prompt_template.input_variable import InputVariable from semantic_kernel.utils.settings import azure_openai_settings_from_dot_env_as_dict -logging.basicConfig(level=logging.INFO) - -load_dotenv() +logging.basicConfig(level=logging.WARNING) system_message = """ You are a chat bot. Your name is Mosscap and @@ -45,31 +40,25 @@ ## The second method is useful when you are using a single service, and you want to have type checking on the request settings or when you are using multiple instances of the same type of service, for instance gpt-35-turbo and gpt-4, both in openai and both for chat. # noqa: E501 E266 ## 3. create the request settings from the kernel based on the registered service class: # noqa: E266 -req_settings = kernel.get_service(service_id).get_prompt_execution_settings_class()(service_id=service_id) +req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id) req_settings.max_tokens = 2000 req_settings.temperature = 0.7 req_settings.top_p = 0.8 +req_settings.auto_invoke_kernel_functions = True ## The third method is the most specific as the returned request settings class is the one that is registered for the service and has some fields already filled in, like the service_id and ai_model_id. # noqa: E501 E266 -prompt_template_config = sk.PromptTemplateConfig( - template=system_message - + """ Summarize the on-going chat history: {{$chat_history}} and respond to this statement: {{$request}}""", - name="chat", - input_variables=[ - InputVariable(name="request", description="The user input", is_required=True), - InputVariable(name="chat_history", description="The history of the conversation", is_required=True), - ], - execution_settings=req_settings, + +chat_function = kernel.create_function_from_prompt( + prompt=system_message + """{{$chat_history}}{{$user_input}}""", + function_name="chat", + plugin_name="chat", + prompt_execution_settings=req_settings, ) history = ChatHistory() history.add_user_message("Hi there, who are you?") history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") -chat_function = kernel.create_function_from_prompt( - function_name="chat", plugin_name="chat", prompt_template_config=prompt_template_config -) - async def chat() -> bool: try: @@ -89,7 +78,7 @@ async def chat() -> bool: if stream: answer = kernel.invoke_stream( chat_function, - request=user_input, + user_input=user_input, chat_history=history, ) print("Mosscap:> ", end="") @@ -99,7 +88,7 @@ async def chat() -> bool: return True answer = await kernel.invoke( chat_function, - request=user_input, + user_input=user_input, chat_history=history, ) print(f"Mosscap:> {answer}") diff --git a/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api.py b/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api.py index 864eb5ff21cc..22b0d09d4047 100644 --- a/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api.py +++ b/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api.py @@ -4,9 +4,9 @@ import semantic_kernel as sk import semantic_kernel.connectors.ai.open_ai as sk_oai -from semantic_kernel.connectors.ai.open_ai.contents.azure_streaming_chat_message_content import ( - AzureStreamingChatMessageContent, -) +from semantic_kernel.connectors.ai.open_ai.contents.azure_chat_message_content import AzureChatMessageContent +from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall +from semantic_kernel.connectors.ai.open_ai.contents.tool_calls import ToolCall from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import ( AzureAISearchDataSources, AzureChatPromptExecutionSettings, @@ -14,14 +14,19 @@ ExtraBody, ) from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.contents.chat_role import ChatRole from semantic_kernel.functions.kernel_arguments import KernelArguments from semantic_kernel.prompt_template.input_variable import InputVariable from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig +from semantic_kernel.utils.settings import ( + azure_aisearch_settings_from_dot_env_as_dict, + azure_openai_settings_from_dot_env_as_dict, +) kernel = sk.Kernel() # Load Azure OpenAI Settings -deployment, api_key, endpoint = sk.azure_openai_settings_from_dot_env() +aoai_settings = azure_openai_settings_from_dot_env_as_dict() # For example, AI Search index may contain the following document: @@ -29,7 +34,7 @@ # Bonded by their love for the natural world and shared curiosity, they uncovered a # groundbreaking phenomenon in glaciology that could potentially reshape our understanding of climate change. -azure_ai_search_settings = sk.azure_aisearch_settings_from_dot_env_as_dict() +azure_ai_search_settings = azure_aisearch_settings_from_dot_env_as_dict() # Our example index has fields "source_title", "source_text", "source_url", and "source_file". # Add fields mapping to the settings to indicate which fields to use for the title, content, URL, and file path. @@ -49,35 +54,28 @@ # When using data, set use_extensions=True and use the 2023-12-01-preview API version. chat_service = sk_oai.AzureChatCompletion( service_id="chat-gpt", - deployment_name=deployment, - api_key=api_key, - endpoint=endpoint, - api_version="2023-12-01-preview", use_extensions=True, + **aoai_settings, ) kernel.add_service(chat_service) prompt_template_config = PromptTemplateConfig( - template="{{$user_input}}", + template="{{$chat_history}}{{$user_input}}", name="chat", template_format="semantic-kernel", input_variables=[ + InputVariable(name="chat_history", description="The chat history", is_required=True), InputVariable(name="request", description="The user input", is_required=True), ], execution_settings={"default": req_settings}, ) - -chat = ChatHistory() - -chat.add_user_message("Hi there, who are you?") -chat.add_assistant_message("I am an AI assistant here to answer your questions.") - -arguments = KernelArguments() - chat_function = kernel.create_function_from_prompt( plugin_name="ChatBot", function_name="Chat", prompt_template_config=prompt_template_config ) +chat_history = ChatHistory() +chat_history.add_system_message("I am an AI assistant here to answer your questions.") + async def chat() -> bool: try: @@ -96,20 +94,34 @@ async def chat() -> bool: # Non streaming # answer = await kernel.run(chat_function, input_vars=context_vars) # print(f"Assistant:> {answer}") - arguments = KernelArguments(user_input=user_input, execution_settings=req_settings) + arguments = KernelArguments(chat_history=chat_history, user_input=user_input, execution_settings=req_settings) full_message = None print("Assistant:> ", end="") async for message in kernel.invoke_stream(chat_function, arguments=arguments): print(str(message[0]), end="") full_message = message[0] if not full_message else full_message + message[0] - chat.add_assistant_message(str(full_message)) print("\n") # The tool message containing cited sources is available in the context - if isinstance(full_message, AzureStreamingChatMessageContent): - chat.add_function_response_message(name="tool", content=full_message.tool_message) - print(f"Tool:> {full_message.tool_message}") + if full_message: + chat_history.add_user_message(user_input) + if hasattr(full_message, "tool_message"): + chat_history.add_message( + AzureChatMessageContent( + role="assistant", + tool_calls=[ + ToolCall( + id="chat_with_your_data", + function=FunctionCall(name="chat_with_your_data", arguments=""), + ) + ], + ) + ) + chat_history.add_tool_message(full_message.tool_message, {"tool_call_id": "chat_with_your_data"}) + if full_message.role is None: + full_message.role = ChatRole.ASSISTANT + chat_history.add_message(full_message) return True diff --git a/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api_function_calling.py b/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api_function_calling.py index 28e9cb8ed78f..3d333cbb4664 100644 --- a/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api_function_calling.py +++ b/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api_function_calling.py @@ -68,11 +68,11 @@ # the format for that is 'PluginName-FunctionName', (i.e. 'math-Add'). # if the model or api version do not support this you will get an error. prompt_template_config = PromptTemplateConfig( - template="{{$user_input}}", + template="{{$chat_history}}{{$user_input}}", name="chat", template_format="semantic-kernel", input_variables=[ - InputVariable(name="history", description="The history of the conversation", is_required=True), + InputVariable(name="chat_history", description="The history of the conversation", is_required=True), InputVariable(name="user_input", description="The user input", is_required=True), ], ) @@ -110,6 +110,7 @@ async def chat() -> bool: print("\n\nExiting chat...") return False + arguments["chat_history"] = history arguments["user_input"] = user_input answer = await kernel.invoke( functions=chat_function, diff --git a/python/samples/kernel-syntax-examples/chat_gpt_api_function_calling.py b/python/samples/kernel-syntax-examples/chat_gpt_api_function_calling.py index 3eed9abe14d7..4b9a38323a3b 100644 --- a/python/samples/kernel-syntax-examples/chat_gpt_api_function_calling.py +++ b/python/samples/kernel-syntax-examples/chat_gpt_api_function_calling.py @@ -14,13 +14,10 @@ from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( OpenAIPromptExecutionSettings, ) -from semantic_kernel.connectors.ai.open_ai.utils import ( - get_tool_call_object, -) +from semantic_kernel.connectors.ai.open_ai.utils import get_tool_call_object from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.core_plugins import MathPlugin, TimePlugin from semantic_kernel.functions.kernel_arguments import KernelArguments -from semantic_kernel.prompt_template.input_variable import InputVariable if TYPE_CHECKING: from semantic_kernel.functions.kernel_function import KernelFunction @@ -58,6 +55,11 @@ kernel.import_plugin_from_object(MathPlugin(), plugin_name="math") kernel.import_plugin_from_object(TimePlugin(), plugin_name="time") +chat_function = kernel.create_function_from_prompt( + prompt="{{$chat_history}}{{$user_input}}", + plugin_name="ChatBot", + function_name="Chat", +) # enabling or disabling function calling is done by setting the function_call parameter for the completion. # when the function_call parameter is set to "auto" the model will decide which function to use, if any. # if you only want to use a specific function, set the name of that function in this parameter, @@ -68,6 +70,7 @@ # If configured to be greater than one, this value will be overridden to 1. execution_settings = sk_oai.OpenAIChatPromptExecutionSettings( service_id="chat", + ai_model_id="gpt-3.5-turbo-1106", max_tokens=2000, temperature=0.7, top_p=0.8, @@ -77,30 +80,13 @@ max_auto_invoke_attempts=3, ) -prompt_template_config = sk.PromptTemplateConfig( - template="{{$user_input}}", - name="chat", - template_format="semantic-kernel", - input_variables=[ - InputVariable(name="user_input", description="The user input", is_required=True), - InputVariable(name="chat_history", description="The history of the conversation", is_required=True), - ], - execution_settings={"chat": execution_settings}, -) - history = ChatHistory() history.add_system_message(system_message) history.add_user_message("Hi there, who are you?") history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") -arguments = KernelArguments() - -chat_function = kernel.create_function_from_prompt( - prompt_template_config=prompt_template_config, - plugin_name="ChatBot", - function_name="Chat", -) +arguments = KernelArguments(settings=execution_settings) def print_tool_calls(message: Union[OpenAIChatMessageContent, OpenAIStreamingChatMessageContent]) -> None: @@ -138,7 +124,7 @@ async def handle_streaming( print("Mosscap:> ", end="") streamed_chunks: List[OpenAIStreamingChatMessageContent] = [] - tool_call_ids_by_index: Dict[int, Any] = {} + tool_call_ids_by_index: Dict[str, Any] = {} async for message in response: if not execution_settings.auto_invoke_kernel_functions and isinstance( @@ -147,11 +133,11 @@ async def handle_streaming( streamed_chunks.append(message[0]) if message[0].tool_calls is not None: for tc in message[0].tool_calls: - if tc.index not in tool_call_ids_by_index: - tool_call_ids_by_index[tc.index] = tc + if tc.id not in tool_call_ids_by_index: + tool_call_ids_by_index[tc.id] = tc else: for tc in message[0].tool_calls: - tool_call_ids_by_index[tc.index] += tc + tool_call_ids_by_index[tc.id] += tc else: print(str(message[0]), end="") @@ -178,7 +164,7 @@ async def chat() -> bool: print("\n\nExiting chat...") return False - stream = False + stream = True if stream: await handle_streaming(kernel, chat_function, user_input, history, execution_settings) else: diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py index cb291d2dd3f4..95c5f89f53fd 100644 --- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py @@ -22,7 +22,7 @@ async def complete_chat( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> List["ChatMessageContent"]: """ This is the method that is called from the kernel to get a response from a chat-optimized LLM. @@ -43,7 +43,7 @@ async def complete_chat_stream( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> AsyncIterable[List["StreamingChatMessageContent"]]: """ This is the method that is called from the kernel to get a stream response from a chat-optimized LLM. @@ -82,5 +82,5 @@ def _prepare_chat_history_for_request( def _chat_message_content_to_dict(self, message: ChatMessageContent) -> Dict[str, Optional[str]]: """can be overridden to customize the serialization of the chat message content""" - msg = message.model_dump(exclude_none=True, include=["role", "content"]) + msg = message.model_dump(include=["role", "content"]) return msg diff --git a/python/semantic_kernel/connectors/ai/google_palm/services/gp_chat_completion.py b/python/semantic_kernel/connectors/ai/google_palm/services/gp_chat_completion.py index 16b4d93d2780..8a6e80bda325 100644 --- a/python/semantic_kernel/connectors/ai/google_palm/services/gp_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google_palm/services/gp_chat_completion.py @@ -63,7 +63,7 @@ async def complete_chat( self, chat_history: ChatHistory, settings: GooglePalmPromptExecutionSettings, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> List[ChatMessageContent]: """ This is the method that is called from the kernel to get a response from a chat-optimized LLM. @@ -114,7 +114,7 @@ async def complete_chat_stream( self, messages: List[Tuple[str, str]], settings: GooglePalmPromptExecutionSettings, - **kwargs: Dict[str, Any], + **kwargs: Any, ): raise NotImplementedError("Google Palm API does not currently support streaming") diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py index f4bee9777a54..530cfc9c5223 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py @@ -2,21 +2,15 @@ import json import logging -from typing import Any, AsyncIterable, Dict, List, Optional +from typing import Any, AsyncIterable, List, Optional import aiohttp from pydantic import HttpUrl -from semantic_kernel.connectors.ai.chat_completion_client_base import ( - ChatCompletionClientBase, -) -from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import ( - OllamaChatPromptExecutionSettings, -) +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import OllamaChatPromptExecutionSettings from semantic_kernel.connectors.ai.ollama.utils import AsyncSession -from semantic_kernel.connectors.ai.text_completion_client_base import ( - TextCompletionClientBase, -) +from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent @@ -45,7 +39,7 @@ async def complete_chat( self, chat_history: ChatHistory, settings: OllamaChatPromptExecutionSettings, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> List[ChatMessageContent]: """ This is the method that is called from the kernel to get a response from a chat-optimized LLM. @@ -78,7 +72,7 @@ async def complete_chat_stream( self, chat_history: ChatHistory, settings: OllamaChatPromptExecutionSettings, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> AsyncIterable[List[StreamingChatMessageContent]]: """ Streams a text completion using a Ollama model. diff --git a/python/semantic_kernel/connectors/ai/open_ai/contents/azure_streaming_chat_message_content.py b/python/semantic_kernel/connectors/ai/open_ai/contents/azure_streaming_chat_message_content.py index 756bd96d92f9..568c5733295d 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/contents/azure_streaming_chat_message_content.py +++ b/python/semantic_kernel/connectors/ai/open_ai/contents/azure_streaming_chat_message_content.py @@ -1,5 +1,4 @@ # Copyright (c) Microsoft. All rights reserved. -from copy import copy from typing import Optional from semantic_kernel.connectors.ai.open_ai.contents.open_ai_streaming_chat_message_content import ( @@ -52,18 +51,19 @@ def __add__(self, other: "AzureStreamingChatMessageContent") -> "AzureStreamingC if self.role and other.role and self.role != other.role: raise ContentAdditionException("Cannot add StreamingChatMessageContent with different role") fc = (self.function_call + other.function_call) if self.function_call else other.function_call + tc = {} if self.tool_calls: + tc = {t.id: t for t in self.tool_calls} + last_tc_id = list(tc.keys())[-1] if other.tool_calls: - tc = copy(self.tool_calls) for new_tool in other.tool_calls: - if new_tool.index >= len(self.tool_calls): - tc.append(new_tool) + if new_tool.id is None or new_tool.id == last_tc_id: + tc[last_tc_id] += new_tool else: - tc[new_tool.index] += new_tool - else: - tc = copy(self.tool_calls) - else: - tc = copy(other.tool_calls) + tc[new_tool.id] = new_tool + elif other.tool_calls: + tc = {t.id: t for t in other.tool_calls} + tc_list = list(tc.values()) return AzureStreamingChatMessageContent( choice_index=self.choice_index, @@ -75,6 +75,7 @@ def __add__(self, other: "AzureStreamingChatMessageContent") -> "AzureStreamingC encoding=self.encoding, finish_reason=self.finish_reason or other.finish_reason, function_call=fc, - tool_calls=tc, + tool_calls=tc_list, + tool_call_id=self.tool_call_id or other.tool_call_id, tool_message=(self.tool_message or "") + (other.tool_message or ""), ) diff --git a/python/semantic_kernel/connectors/ai/open_ai/contents/function_call.py b/python/semantic_kernel/connectors/ai/open_ai/contents/function_call.py index 7a64ac3808b4..97b2eb1faa9c 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/contents/function_call.py +++ b/python/semantic_kernel/connectors/ai/open_ai/contents/function_call.py @@ -16,18 +16,12 @@ class FunctionCall(KernelBaseModel): name: Optional[str] = None arguments: Optional[str] = None - # TODO: check if needed - id: Optional[str] = None def __add__(self, other: Optional["FunctionCall"]) -> "FunctionCall": """Add two function calls together, combines the arguments, ignores the name.""" if not other: return self - return FunctionCall( - name=self.name or other.name, - arguments=(self.arguments or "") + (other.arguments or ""), - id=self.id or other.id, - ) + return FunctionCall(name=self.name or other.name, arguments=(self.arguments or "") + (other.arguments or "")) def parse_arguments(self) -> Optional[Dict[str, Any]]: """Parse the arguments into a dictionary.""" diff --git a/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_chat_message_content.py b/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_chat_message_content.py index 6924669c5e77..6558f1fdf855 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_chat_message_content.py +++ b/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_chat_message_content.py @@ -1,5 +1,4 @@ # Copyright (c) Microsoft. All rights reserved. -import json from typing import List, Optional from xml.etree.ElementTree import Element @@ -34,6 +33,7 @@ class OpenAIChatMessageContent(ChatMessageContent): inner_content: Optional[ChatCompletion] = None function_call: Optional[FunctionCall] = None tool_calls: Optional[List[ToolCall]] = None + tool_call_id: Optional[str] = None @staticmethod def ToolIdProperty(): @@ -49,11 +49,12 @@ def to_prompt(self, root_key: str) -> str: root = Element(root_key) root.set("role", self.role.value) - root.set("metadata", json.dumps(self.metadata)) if self.function_call: root.set("function_call", self.function_call.model_dump_json(exclude_none=True)) if self.tool_calls: root.set("tool_calls", "|".join([call.model_dump_json(exclude_none=True) for call in self.tool_calls])) + if self.tool_call_id: + root.set("tool_call_id", self.tool_call_id) root.text = self.content or "" return ElementTree.tostring(root, encoding=self.encoding or "unicode", short_empty_elements=False) @@ -68,10 +69,10 @@ def from_element(cls, element: Element) -> "ChatMessageContent": ChatMessageContent - The new instance of ChatMessageContent. """ args = {"role": element.get("role", ChatRole.USER.value), "content": element.text} - if metadata := element.get("metadata"): - args["metadata"] = json.loads(metadata) if function_call := element.get("function_call"): args["function_call"] = FunctionCall.model_validate_json(function_call) if tool_calls := element.get("tool_calls"): args["tool_calls"] = [ToolCall.model_validate_json(call) for call in tool_calls.split("|")] + if tool_call_id := element.get("tool_call_id"): + args["tool_call_id"] = tool_call_id return cls(**args) diff --git a/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_streaming_chat_message_content.py b/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_streaming_chat_message_content.py index bd9bc3465b6b..672743fb85e7 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_streaming_chat_message_content.py +++ b/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_streaming_chat_message_content.py @@ -1,12 +1,14 @@ # Copyright (c) Microsoft. All rights reserved. -from copy import copy from typing import List, Optional +from xml.etree.ElementTree import Element +from defusedxml import ElementTree from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall from semantic_kernel.connectors.ai.open_ai.contents.tool_calls import ToolCall from semantic_kernel.contents import StreamingChatMessageContent +from semantic_kernel.contents.chat_role import ChatRole from semantic_kernel.exceptions import ContentAdditionException @@ -38,6 +40,7 @@ class OpenAIStreamingChatMessageContent(StreamingChatMessageContent): inner_content: ChatCompletionChunk function_call: Optional[FunctionCall] = None tool_calls: Optional[List[ToolCall]] = None + tool_call_id: Optional[str] = None def __add__(self, other: "OpenAIStreamingChatMessageContent") -> "OpenAIStreamingChatMessageContent": """When combining two OpenAIStreamingChatMessageContent instances, @@ -55,18 +58,19 @@ def __add__(self, other: "OpenAIStreamingChatMessageContent") -> "OpenAIStreamin if self.role and other.role and self.role != other.role: raise ContentAdditionException("Cannot add StreamingChatMessageContent with different role") fc = (self.function_call + other.function_call) if self.function_call else other.function_call + tc = {} if self.tool_calls: + tc = {t.id: t for t in self.tool_calls} + last_tc_id = list(tc.keys())[-1] if other.tool_calls: - tc = copy(self.tool_calls) for new_tool in other.tool_calls: - if new_tool.index >= len(self.tool_calls): - tc.append(new_tool) + if new_tool.id is None or new_tool.id == last_tc_id: + tc[last_tc_id] += new_tool else: - tc[new_tool.index] += new_tool - else: - tc = copy(self.tool_calls) - else: - tc = copy(other.tool_calls) + tc[new_tool.id] = new_tool + elif other.tool_calls: + tc = {t.id: t for t in other.tool_calls} + tc_list = list(tc.values()) return OpenAIStreamingChatMessageContent( choice_index=self.choice_index, @@ -78,5 +82,44 @@ def __add__(self, other: "OpenAIStreamingChatMessageContent") -> "OpenAIStreamin encoding=self.encoding, finish_reason=self.finish_reason or other.finish_reason, function_call=fc, - tool_calls=tc, + tool_calls=tc_list, + tool_call_id=self.tool_call_id or other.tool_call_id, ) + + def to_prompt(self, root_key: str) -> str: + """Convert the OpenAIChatMessageContent to a prompt. + + Returns: + str - The prompt from the ChatMessageContent. + """ + + root = Element(root_key) + if self.role: + root.set("role", self.role.value) + if self.function_call: + root.set("function_call", self.function_call.model_dump_json(exclude_none=True)) + if self.tool_calls: + root.set("tool_calls", "|".join([call.model_dump_json(exclude_none=True) for call in self.tool_calls])) + if self.tool_call_id: + root.set("tool_call_id", self.tool_call_id) + root.text = self.content or "" + return ElementTree.tostring(root, encoding=self.encoding or "unicode", short_empty_elements=False) + + @classmethod + def from_element(cls, element: Element) -> "StreamingChatMessageContent": + """Create a new instance of OpenAIChatMessageContent from a prompt. + + Args: + prompt: str - The prompt to create the ChatMessageContent from. + + Returns: + ChatMessageContent - The new instance of ChatMessageContent. + """ + args = {"role": element.get("role", ChatRole.USER.value), "content": element.text} + if function_call := element.get("function_call"): + args["function_call"] = FunctionCall.model_validate_json(function_call) + if tool_calls := element.get("tool_calls"): + args["tool_calls"] = [ToolCall.model_validate_json(call) for call in tool_calls.split("|")] + if tool_call_id := element.get("tool_call_id"): + args["tool_call_id"] = tool_call_id + return cls(**args) diff --git a/python/semantic_kernel/connectors/ai/open_ai/contents/tool_calls.py b/python/semantic_kernel/connectors/ai/open_ai/contents/tool_calls.py index 456f0e6c0c08..8b3d86eb58a7 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/contents/tool_calls.py +++ b/python/semantic_kernel/connectors/ai/open_ai/contents/tool_calls.py @@ -8,7 +8,6 @@ class ToolCall(KernelBaseModel): """Class to hold a tool call response.""" - index: Optional[int] = None id: Optional[str] = None type: Optional[Literal["function"]] = "function" function: Optional[FunctionCall] = None @@ -18,7 +17,6 @@ def __add__(self, other: Optional["ToolCall"]) -> "ToolCall": if not other: return self return ToolCall( - index=self.index or other.index, id=self.id or other.id, type=self.type or other.type, function=self.function + other.function if self.function else other.function, diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py index 7c1f1024883c..726396d1a2bf 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py @@ -35,6 +35,10 @@ class OpenAIChatCompletionBase(OpenAIHandler, ChatCompletionClientBase): """OpenAI Chat completion class.""" + # region Overriding base class methods + # most of the methods are overridden from the ChatCompletionClientBase class, otherwise it is mentioned + + # override from AIServiceClientBase def get_prompt_execution_settings_class(self) -> "PromptExecutionSettings": """Create a request settings object.""" return OpenAIChatPromptExecutionSettings @@ -47,7 +51,7 @@ async def complete_chat( self, chat_history: ChatHistory, settings: OpenAIPromptExecutionSettings, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> List[OpenAIChatMessageContent]: """Executes a chat completion request and returns the result. @@ -60,13 +64,15 @@ async def complete_chat( Returns: List[OpenAIChatMessageContent | AzureChatMessageContent] -- The completion result(s). """ - auto_invoke_kernel_functions, max_auto_invoke_attempts = self._get_auto_invoke_execution_settings(settings) - kernel = self._validate_kernel_for_tool_calling(**kwargs) + tool_call_behavior = self._get_tool_call_behavior(settings) + kernel = kwargs.get("kernel", None) + if tool_call_behavior.auto_invoke_kernel_functions and kernel is None: + raise ServiceInvalidExecutionSettingsError("The kernel argument is required for OpenAI tool calling.") - for _ in range(max_auto_invoke_attempts): + for _ in range(tool_call_behavior.max_auto_invoke_attempts): settings = self._prepare_settings(settings, chat_history, stream_request=False) completions = await self._send_chat_request(settings) - if self._should_return_completions_response(completions, auto_invoke_kernel_functions): + if self._should_return_completions_response(completions=completions, tool_call_behavior=tool_call_behavior): return completions await self._process_chat_response_with_tool_call(completions, chat_history, kernel) @@ -74,7 +80,7 @@ async def complete_chat_stream( self, chat_history: ChatHistory, settings: OpenAIPromptExecutionSettings, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> AsyncIterable[List[OpenAIStreamingChatMessageContent]]: """Executes a streaming chat completion request and returns the result. @@ -88,56 +94,38 @@ async def complete_chat_stream( List[OpenAIStreamingChatMessageContent | AzureStreamingChatMessageContent] -- A stream of OpenAIStreamingChatMessages or AzureStreamingChatMessageContent when using Azure. """ - auto_invoke_kernel_functions, max_auto_invoke_attempts = self._get_auto_invoke_execution_settings(settings) - kernel = self._validate_kernel_for_tool_calling(**kwargs) - tool_call_behavior = None - if auto_invoke_kernel_functions: - # Only configure the tool_call_behavior if auto_invoking_functions is true - tool_call_behavior = ToolCallBehavior(auto_invoke_kernel_functions=auto_invoke_kernel_functions) - - attempts = 0 - continue_loop = True + tool_call_behavior = self._get_tool_call_behavior(settings) + kernel = kwargs.pop("kernel", None) + if tool_call_behavior.auto_invoke_kernel_functions and kernel is None: + raise ServiceInvalidExecutionSettingsError("The kernel argument is required for OpenAI tool calling.") - while attempts < max_auto_invoke_attempts and continue_loop: + for _ in range(tool_call_behavior.max_auto_invoke_attempts): settings = self._prepare_settings(settings, chat_history, stream_request=True) response = await self._send_chat_stream_request(settings) - async for content in self._process_chat_stream_response(response, chat_history, kernel, tool_call_behavior): + finish_reason = None + async for content, finish_reason in self._process_chat_stream_response( + response=response, chat_history=chat_history, kernel=kernel, tool_call_behavior=tool_call_behavior + ): yield content - if tool_call_behavior and not tool_call_behavior.auto_invoke_kernel_functions: - continue_loop = False - break - attempts += 1 - - def _validate_kernel_for_tool_calling(self, **kwargs: Dict[str, Any]) -> "Kernel": - """Validate that the arguments contains the kernel, which is used for function calling, if applicable.""" - kernel = kwargs.pop("kernel", None) - if kernel is None: - raise ServiceInvalidExecutionSettingsError("The kernel argument is required for OpenAI tool calling.") - return kernel + if finish_reason != FinishReason.TOOL_CALLS: + break - def _prepare_settings( - self, - settings: OpenAIChatPromptExecutionSettings, - chat_history: ChatHistory, - stream_request: bool = False, - ) -> OpenAIChatPromptExecutionSettings: - """Prepare the promp execution settings for the chat request.""" - settings.messages = self._prepare_chat_history_for_request(chat_history) - settings.stream = stream_request - if not settings.ai_model_id: - settings.ai_model_id = self.ai_model_id + def _chat_message_content_to_dict(self, message: ChatMessageContent) -> Dict[str, Optional[str]]: + msg = super()._chat_message_content_to_dict(message) + if message.role == "assistant": + if tool_calls := getattr(message, "tool_calls", None): + msg["tool_calls"] = [tool_call.model_dump() for tool_call in tool_calls] + if function_call := getattr(message, "function_call", None): + msg["function_call"] = function_call.model_dump_json() + if message.role == "tool": + if tool_call_id := getattr(message, "tool_call_id", None): + msg["tool_call_id"] = tool_call_id + if message.metadata and "function" in message.metadata: + msg["name"] = message.metadata["function_name"] + return msg - # If auto_invoke_kernel_functions is True and num_of_responses > 1 provide a warning - # that the num_of_responses will be configured to one. - if settings.auto_invoke_kernel_functions and settings.number_of_responses > 1: - logger.warning( - ( - "Auto invoking functions does not support more than one num_of_response. " - "The num_of_responses setting is configured as 1." - ) - ) - settings.number_of_responses = 1 - return settings + # endregion + # region internal handlers async def _send_chat_request(self, settings: OpenAIChatPromptExecutionSettings) -> List[OpenAIChatMessageContent]: """Send the chat request""" @@ -146,7 +134,6 @@ async def _send_chat_request(self, settings: OpenAIChatPromptExecutionSettings) completions = [ self._create_chat_message_content(response, choice, response_metadata) for choice in response.choices ] - return completions async def _send_chat_stream_request(self, settings: OpenAIChatPromptExecutionSettings) -> AsyncStream: @@ -172,9 +159,9 @@ async def _process_chat_stream_response( self, response: AsyncStream, chat_history: ChatHistory, - kernel: "Kernel", - tool_call_behavior: Optional[ToolCallBehavior] = None, - ) -> AsyncIterable[List[OpenAIStreamingChatMessageContent]]: + tool_call_behavior: ToolCallBehavior, + kernel: Optional["Kernel"] = None, + ) -> AsyncIterable[Tuple[List[OpenAIStreamingChatMessageContent], Optional[FinishReason]]]: """Process the chat stream response and handle tool calls if applicable.""" full_content = None async for chunk in response: @@ -185,22 +172,25 @@ async def _process_chat_stream_response( contents = [ self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) for choice in chunk.choices ] - if not tool_call_behavior or not tool_call_behavior.auto_invoke_kernel_functions: - yield contents + if not tool_call_behavior.auto_invoke_kernel_functions: + yield contents, None + continue finish_reason = getattr(contents[0], "finish_reason", None) full_content = contents[0] if full_content is None else full_content + contents[0] if not contents[0].tool_calls or finish_reason not in (FinishReason.STOP, FinishReason.TOOL_CALLS, None): - yield contents + yield contents, finish_reason if finish_reason == FinishReason.STOP: - if tool_call_behavior: - tool_call_behavior.auto_invoke_kernel_functions = False + tool_call_behavior.auto_invoke_kernel_functions = False break if finish_reason == FinishReason.TOOL_CALLS: chat_history.add_message(message=full_content) await self._process_tool_calls(full_content, kernel, chat_history) break + # endregion + # region content creation + def _create_chat_message_content( self, response: ChatCompletion, choice: Choice, response_metadata: Dict[str, Any] ) -> OpenAIChatMessageContent: @@ -271,10 +261,9 @@ def _get_tool_calls_from_chat_choice(self, choice: Union[Choice, ChunkChoice]) - return None return [ ToolCall( - index=getattr(tool, "index", None), id=tool.id, type=tool.type, - function=FunctionCall(name=tool.function.name, arguments=tool.function.arguments, id=tool.id), + function=FunctionCall(name=tool.function.name, arguments=tool.function.arguments), ) for tool in content.tool_calls ] @@ -289,20 +278,51 @@ def _get_function_call_from_chat_choice(self, choice: Union[Choice, ChunkChoice] return None return FunctionCall(name=content.function_call.name, arguments=content.function_call.arguments) - def _get_auto_invoke_execution_settings( - self, execution_settings: OpenAIPromptExecutionSettings - ) -> Tuple[bool, int]: - """Gets the auto invoke and max iterations settings.""" + def _get_tool_call_behavior(self, execution_settings: OpenAIPromptExecutionSettings) -> ToolCallBehavior: + """Gets the auto invoke and max iterations settings through ToolCallBehavior.""" + auto_invoke_kernel_functions = False + max_auto_invoke_attempts = 1 if isinstance(execution_settings, OpenAIChatPromptExecutionSettings): - auto_invoke_kernel_functions = execution_settings.auto_invoke_kernel_functions - max_auto_invoke_attempts = ( - execution_settings.max_auto_invoke_attempts if auto_invoke_kernel_functions else 1 + if execution_settings.auto_invoke_kernel_functions is not None: + auto_invoke_kernel_functions = execution_settings.auto_invoke_kernel_functions + if auto_invoke_kernel_functions and execution_settings.max_auto_invoke_attempts is not None: + max_auto_invoke_attempts = ( + execution_settings.max_auto_invoke_attempts if auto_invoke_kernel_functions else 1 + ) + + return ToolCallBehavior( + auto_invoke_kernel_functions=auto_invoke_kernel_functions, max_auto_invoke_attempts=max_auto_invoke_attempts + ) + + # endregion + # region request preparation + + def _prepare_settings( + self, + settings: OpenAIChatPromptExecutionSettings, + chat_history: ChatHistory, + stream_request: bool = False, + ) -> OpenAIChatPromptExecutionSettings: + """Prepare the promp execution settings for the chat request.""" + settings.messages = self._prepare_chat_history_for_request(chat_history) + settings.stream = stream_request + if not settings.ai_model_id: + settings.ai_model_id = self.ai_model_id + + # If auto_invoke_kernel_functions is True and num_of_responses > 1 provide a warning + # that the num_of_responses will be configured to one. + if settings.auto_invoke_kernel_functions and settings.number_of_responses > 1: + logger.warning( + ( + "Auto invoking functions does not support more than one num_of_response. " + "The num_of_responses setting is configured as 1." + ) ) - else: - auto_invoke_kernel_functions = False - max_auto_invoke_attempts = 1 + settings.number_of_responses = 1 + return settings - return auto_invoke_kernel_functions, max_auto_invoke_attempts + # endregion + # region tool calling async def _process_tool_calls( self, @@ -322,32 +342,22 @@ async def _process_tool_calls( msg = OpenAIChatMessageContent( role=ChatRole.TOOL, content=str(func_result), - metadata={"tool_call_id": tool_call.id, "function_name": tool_call.function.name}, + tool_call_id=tool_call.id, + metadata={"function_name": tool_call.function.name}, ) chat_history.add_message(message=msg) def _should_return_completions_response( self, completions: Union[List[OpenAIChatMessageContent], List[OpenAIStreamingChatMessageContent]], - auto_invoke_kernel_functions: bool, + tool_call_behavior: ToolCallBehavior, ) -> bool: """Determines if the completions should be returned.""" return ( - not auto_invoke_kernel_functions + not tool_call_behavior.auto_invoke_kernel_functions or any(not isinstance(completion, OpenAIChatMessageContent) for completion in completions) or any(not hasattr(completion, "tool_calls") or not completion.tool_calls for completion in completions) ) - def _chat_message_content_to_dict(self, message: ChatMessageContent) -> Dict[str, Optional[str]]: - msg = super()._chat_message_content_to_dict(message) - if message.role == "assistant": - if tool_calls := getattr(message, "tool_calls", None): - msg["tool_calls"] = tool_calls - if function_call := getattr(message, "function_call", None): - msg["function_call"] = function_call - if message.role == "tool": - if message.metadata and "tool_call_id" in message.metadata: - msg["tool_call_id"] = message.metadata["tool_call_id"] - if message.metadata and "function" in message.metadata: - msg["name"] = message.metadata["function_name"] - return msg + +# endregion diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py index 74cdbb74c285..fbaacf3716f4 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py @@ -51,6 +51,7 @@ async def _send_request( Returns: ChatCompletion, Completion, AsyncStream[Completion | ChatCompletionChunk] -- The completion response. """ + try: if self.ai_model_type == OpenAIModelTypes.CHAT: response = await self.client.chat.completions.create(**request_settings.prepare_settings_dict()) diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py index 531cf51c1994..9d023e68201c 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py @@ -1,25 +1,19 @@ # Copyright (c) Microsoft. All rights reserved. -from typing import Any, Dict, List, Optional +from typing import Any, List, Optional from numpy import array, ndarray -from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import ( - EmbeddingGeneratorBase, -) +from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( OpenAIEmbeddingPromptExecutionSettings, ) -from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import ( - OpenAIHandler, -) +from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIHandler from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings class OpenAITextEmbeddingBase(OpenAIHandler, EmbeddingGeneratorBase): - async def generate_embeddings( - self, texts: List[str], batch_size: Optional[int] = None, **kwargs: Dict[str, Any] - ) -> ndarray: + async def generate_embeddings(self, texts: List[str], batch_size: Optional[int] = None, **kwargs: Any) -> ndarray: """Generates embeddings for the given texts. Arguments: diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/tool_call_behavior.py b/python/semantic_kernel/connectors/ai/open_ai/services/tool_call_behavior.py index 9daa4711b21b..da012a7b74e8 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/tool_call_behavior.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/tool_call_behavior.py @@ -14,3 +14,4 @@ class ToolCallBehavior(KernelBaseModel): """ auto_invoke_kernel_functions: bool = False + max_auto_invoke_attempts: int = 1 diff --git a/python/semantic_kernel/contents/streaming_chat_message_content.py b/python/semantic_kernel/contents/streaming_chat_message_content.py index 3936930ff763..534c056ad6c5 100644 --- a/python/semantic_kernel/contents/streaming_chat_message_content.py +++ b/python/semantic_kernel/contents/streaming_chat_message_content.py @@ -1,6 +1,10 @@ # Copyright (c) Microsoft. All rights reserved. +import json from typing import Optional +from xml.etree.ElementTree import Element + +from defusedxml import ElementTree from semantic_kernel.contents.chat_role import ChatRole from semantic_kernel.contents.finish_reason import FinishReason @@ -68,3 +72,31 @@ def __add__(self, other: "StreamingChatMessageContent") -> "StreamingChatMessage encoding=self.encoding, finish_reason=self.finish_reason or other.finish_reason, ) + + def to_prompt(self, root_key: str) -> str: + """Convert the ChatMessageContent to a prompt. + + Returns: + str - The prompt from the ChatMessageContent. + """ + + root = Element(root_key) + root.set("role", self.role.value) + root.set("metadata", json.dumps(self.metadata)) + root.text = self.content or "" + return ElementTree.tostring(root, encoding=self.encoding or "unicode", short_empty_elements=False) + + @classmethod + def from_element(cls, element: Element) -> "StreamingChatMessageContent": + """Create a new instance of ChatMessageContent from a prompt. + + Args: + prompt: str - The prompt to create the ChatMessageContent from. + + Returns: + ChatMessageContent - The new instance of ChatMessageContent. + """ + args = {"role": element.get("role", ChatRole.USER.value), "content": element.text} + if metadata := element.get("metadata"): + args["metadata"] = json.loads(metadata) + return cls(**args) diff --git a/python/semantic_kernel/exceptions/template_engine_exceptions.py b/python/semantic_kernel/exceptions/template_engine_exceptions.py index bed9df8a5ce6..30802177606d 100644 --- a/python/semantic_kernel/exceptions/template_engine_exceptions.py +++ b/python/semantic_kernel/exceptions/template_engine_exceptions.py @@ -25,6 +25,10 @@ def __init__(self, content: str) -> None: ) +class VarBlockRenderError(BlockRenderException): + pass + + class ValBlockSyntaxError(BlockSyntaxError): def __init__(self, content: str) -> None: super().__init__( @@ -80,6 +84,7 @@ class TemplateRenderException(BlockRenderException): "BlockSyntaxError", "BlockRenderException", "VarBlockSyntaxError", + "VarBlockRenderError", "ValBlockSyntaxError", "NamedArgBlockSyntaxError", "FunctionIdBlockSyntaxError", diff --git a/python/semantic_kernel/functions/kernel_arguments.py b/python/semantic_kernel/functions/kernel_arguments.py index 34d9aed52778..42a79b2a504e 100644 --- a/python/semantic_kernel/functions/kernel_arguments.py +++ b/python/semantic_kernel/functions/kernel_arguments.py @@ -10,7 +10,7 @@ class KernelArguments(dict): def __init__( self, settings: Optional[Union["PromptExecutionSettings", List["PromptExecutionSettings"]]] = None, - **kwargs: Dict[str, Any], + **kwargs: Any, ): """Initializes a new instance of the KernelArguments class, this is a dict-like class with the additional field for the execution_settings. diff --git a/python/semantic_kernel/functions/kernel_function.py b/python/semantic_kernel/functions/kernel_function.py index 2839265e88c8..c18d94ff56e5 100644 --- a/python/semantic_kernel/functions/kernel_function.py +++ b/python/semantic_kernel/functions/kernel_function.py @@ -122,7 +122,7 @@ async def __call__( self, kernel: "Kernel", arguments: Optional[KernelArguments] = None, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> "FunctionResult": """Invoke the function with the given arguments. @@ -149,7 +149,7 @@ async def invoke( self, kernel: "Kernel", arguments: Optional[KernelArguments] = None, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> "FunctionResult": """Invoke the function with the given arguments. @@ -184,7 +184,7 @@ async def invoke_stream( self, kernel: "Kernel", arguments: Optional[KernelArguments] = None, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> AsyncIterable[Union[FunctionResult, List[Union[StreamingKernelContent, Any]]]]: """ Invoke a stream async function with the given arguments. diff --git a/python/semantic_kernel/functions/kernel_function_from_prompt.py b/python/semantic_kernel/functions/kernel_function_from_prompt.py index 418db2669f0b..b82cb7ec8123 100644 --- a/python/semantic_kernel/functions/kernel_function_from_prompt.py +++ b/python/semantic_kernel/functions/kernel_function_from_prompt.py @@ -5,9 +5,7 @@ from pydantic import Field, ValidationError, model_validator -from semantic_kernel.connectors.ai.chat_completion_client_base import ( - ChatCompletionClientBase, -) +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( OpenAIChatPromptExecutionSettings, ) diff --git a/python/semantic_kernel/kernel.py b/python/semantic_kernel/kernel.py index 00b85c909585..e4e17e295d15 100644 --- a/python/semantic_kernel/kernel.py +++ b/python/semantic_kernel/kernel.py @@ -143,7 +143,7 @@ async def invoke_stream( function_name: Optional[str] = None, plugin_name: Optional[str] = None, return_function_results: Optional[bool] = False, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> AsyncIterable[Union[List["StreamingKernelContent"], List[FunctionResult]]]: """Execute one or more stream functions. @@ -211,7 +211,7 @@ async def invoke_stream( async for stream_message in stream_function.invoke_stream(self, arguments): if isinstance(stream_message, FunctionResult): - exception = stream_message.metadata.get("exception", None) + exception = stream_message.metadata.get("error", None) if exception: break function_result.append(stream_message) @@ -267,7 +267,7 @@ async def invoke( arguments: Optional[KernelArguments] = None, function_name: Optional[str] = None, plugin_name: Optional[str] = None, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> Optional[Union[FunctionResult, List[FunctionResult]]]: """Execute one or more functions. @@ -371,7 +371,7 @@ async def invoke_prompt( prompt: str, arguments: Optional[KernelArguments] = None, template_format: Optional[str] = None, - **kwargs: Dict[str, Any], + **kwargs: Any, ) -> Optional[Union[FunctionResult, List[FunctionResult]]]: """ Invoke a function from the provided prompt @@ -728,7 +728,7 @@ def get_service( ValueError: If no service is found that matches the type. """ - if not service_id: + if not service_id or service_id == "default": if not type: if default_service := self.services.get("default"): return default_service diff --git a/python/semantic_kernel/template_engine/blocks/var_block.py b/python/semantic_kernel/template_engine/blocks/var_block.py index bb6c127c319b..2f05def84960 100644 --- a/python/semantic_kernel/template_engine/blocks/var_block.py +++ b/python/semantic_kernel/template_engine/blocks/var_block.py @@ -6,7 +6,7 @@ from pydantic import model_validator -from semantic_kernel.exceptions import VarBlockSyntaxError +from semantic_kernel.exceptions import VarBlockRenderError, VarBlockSyntaxError from semantic_kernel.template_engine.blocks.block import Block from semantic_kernel.template_engine.blocks.block_types import BlockTypes from semantic_kernel.template_engine.blocks.symbols import Symbols @@ -73,5 +73,10 @@ def render(self, _: "Kernel", arguments: Optional["KernelArguments"] = None) -> value = arguments.get(self.name, None) if value is None: logger.warning(f"Variable `{Symbols.VAR_PREFIX}: {self.name}` not found in the KernelArguments") - - return str(value) if value else "" + return "" + try: + return str(value) + except Exception as e: + raise VarBlockRenderError( + f"Block {self.name} failed to be parsed to a string, type is {type(value)}" + ) from e diff --git a/python/tests/conftest.py b/python/tests/conftest.py index c1b1222fbd19..cc5f01d9631a 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -9,6 +9,7 @@ import pytest +from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.events.function_invoked_event_args import FunctionInvokedEventArgs from semantic_kernel.events.function_invoking_event_args import FunctionInvokingEventArgs @@ -120,6 +121,11 @@ def create_mock_function(name: str, value: str = "test") -> KernelFunction: return create_mock_function +@pytest.fixture(scope="function") +def chat_history(): + return ChatHistory() + + @pytest.fixture(autouse=True) def enable_debug_mode(): """Set `autouse=True` to enable easy debugging for tests. diff --git a/python/tests/integration/completions/test_azure_oai_chat_service.py b/python/tests/integration/completions/test_azure_oai_chat_service.py index 71dd26067061..192f7b4e9b24 100644 --- a/python/tests/integration/completions/test_azure_oai_chat_service.py +++ b/python/tests/integration/completions/test_azure_oai_chat_service.py @@ -209,28 +209,22 @@ async def test_azure_oai_chat_service_with_tool_call_streaming(setup_tldr_functi kernel.import_plugin_from_object(MathPlugin(), plugin_name="math") + # Create the prompt function + chat_func = kernel.create_function_from_prompt(prompt="{{$input}}", function_name="chat", plugin_name="chat") execution_settings = sk_oai.AzureChatPromptExecutionSettings( service_id="chat_completion", max_tokens=2000, temperature=0.7, top_p=0.8, tool_choice="auto", - tools=get_tool_call_object(kernel, {"exclude_plugin": ["ChatBot"]}), + tools=get_tool_call_object(kernel, {"exclude_plugin": ["chat"]}), auto_invoke_kernel_functions=True, max_auto_invoke_attempts=3, ) - - prompt_template_config = PromptTemplateConfig( - template="{{$input}}", description="Do math.", execution_settings=execution_settings - ) - - # Create the prompt function - tldr_function = kernel.create_function_from_prompt( - function_name="math_fun", plugin_name="math_int_test", prompt_template_config=prompt_template_config - ) + arguments = KernelArguments(input="what is 1+1?", settings=execution_settings) result = None - async for message in kernel.invoke_stream(tldr_function, input="what is 1+1?"): + async for message in kernel.invoke_stream(chat_func, arguments=arguments): result = message[0] if not result else result + message[0] output = str(result) diff --git a/python/tests/unit/connectors/open_ai/contents/conftest.py b/python/tests/unit/connectors/open_ai/contents/conftest.py new file mode 100644 index 000000000000..68e063c493f1 --- /dev/null +++ b/python/tests/unit/connectors/open_ai/contents/conftest.py @@ -0,0 +1,14 @@ +from pytest import fixture + +from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall +from semantic_kernel.connectors.ai.open_ai.contents.tool_calls import ToolCall + + +@fixture(scope="module") +def function_call(): + return FunctionCall(name="Test-Function", arguments='{"input": "world"}') + + +@fixture(scope="module") +def tool_call(function_call: FunctionCall): + return ToolCall(id="1234", function=function_call) diff --git a/python/tests/unit/connectors/open_ai/contents/test_function_call.py b/python/tests/unit/connectors/open_ai/contents/test_function_call.py index 5cb1b85e476e..52ad15e6c1d8 100644 --- a/python/tests/unit/connectors/open_ai/contents/test_function_call.py +++ b/python/tests/unit/connectors/open_ai/contents/test_function_call.py @@ -1,28 +1,98 @@ import pytest from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall +from semantic_kernel.exceptions.content_exceptions import ( + FunctionCallInvalidArgumentsException, + FunctionCallInvalidNameException, +) from semantic_kernel.functions.kernel_arguments import KernelArguments -def test_function_call(): - # Test initialization with default values - fc = FunctionCall(name="Test-Function", arguments="""{"input": "world"}""", id="1234") - assert fc.name == "Test-Function" - assert fc.arguments == """{"input": "world"}""" - assert fc.id == "1234" +def test_function_call(function_call: FunctionCall): + assert function_call.name == "Test-Function" + assert function_call.arguments == """{"input": "world"}""" -@pytest.mark.asyncio -async def test_function_call_to_kernel_arguments(): +def test_add(function_call: FunctionCall): + # Test adding two function calls + fc2 = FunctionCall(name="Test-Function", arguments="""{"input2": "world2"}""") + fc3 = function_call + fc2 + assert fc3.name == "Test-Function" + assert fc3.arguments == """{"input": "world"}{"input2": "world2"}""" + + +def test_add_none(function_call: FunctionCall): + # Test adding two function calls with one being None + fc2 = None + fc3 = function_call + fc2 + assert fc3.name == "Test-Function" + assert fc3.arguments == """{"input": "world"}""" + + +def test_parse_arguments(function_call: FunctionCall): + # Test parsing arguments to dictionary + assert function_call.parse_arguments() == {"input": "world"} + + +def test_parse_arguments_none(): + # Test parsing arguments to dictionary + fc = FunctionCall(name="Test-Function") + assert fc.parse_arguments() is None + + +def test_parse_arguments_fail(): + # Test parsing arguments to dictionary + fc = FunctionCall(name="Test-Function", arguments="""{"input": "world}""") + with pytest.raises(FunctionCallInvalidArgumentsException): + fc.parse_arguments() + + +def test_to_kernel_arguments(function_call: FunctionCall): # Test parsing arguments to variables arguments = KernelArguments() - func_call = FunctionCall( - name="Test-Function", - arguments="""{"input": "world", "input2": "world2"}""", - id="1234", - ) - assert isinstance(func_call.to_kernel_arguments(), KernelArguments) - - arguments.update(func_call.to_kernel_arguments()) + assert isinstance(function_call.to_kernel_arguments(), KernelArguments) + arguments.update(function_call.to_kernel_arguments()) assert arguments["input"] == "world" - assert arguments["input2"] == "world2" + + +def test_to_kernel_arguments_none(): + # Test parsing arguments to variables + fc = FunctionCall(name="Test-Function") + assert fc.to_kernel_arguments() == KernelArguments() + + +def test_split_name(function_call: FunctionCall): + # Test splitting the name into plugin and function name + assert function_call.split_name() == ["Test", "Function"] + + +def test_split_name_name_only(): + # Test splitting the name into plugin and function name + fc = FunctionCall(name="Function") + assert fc.split_name() == ["", "Function"] + + +def test_split_name_dict(function_call: FunctionCall): + # Test splitting the name into plugin and function name + assert function_call.split_name_dict() == {"plugin_name": "Test", "function_name": "Function"} + + +def test_split_name_none(): + fc = FunctionCall(id="1234") + with pytest.raises(FunctionCallInvalidNameException): + fc.split_name() + + +def test_fc_dump(function_call: FunctionCall): + # Test dumping the function call to dictionary + dumped = function_call.model_dump() + assert dumped == { + "name": "Test-Function", + "arguments": '{"input": "world"}', + } + + +def test_fc_dump_json(function_call: FunctionCall): + # Test dumping the function call to dictionary + dumped = function_call.model_dump_json() + assert dumped == """{"name":"Test-Function","arguments":"{\\"input\\": \\"world\\"}"}""" diff --git a/python/tests/unit/connectors/open_ai/contents/test_tool_call.py b/python/tests/unit/connectors/open_ai/contents/test_tool_call.py new file mode 100644 index 000000000000..02e722ce2dbe --- /dev/null +++ b/python/tests/unit/connectors/open_ai/contents/test_tool_call.py @@ -0,0 +1,38 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall +from semantic_kernel.connectors.ai.open_ai.contents.tool_calls import ToolCall + + +def test_tool_call(tool_call: ToolCall): + assert tool_call.id == "1234" + assert tool_call.type == "function" + assert tool_call.function is not None + + +def test_add(tool_call: ToolCall): + # Test adding two tool calls + tool_call2 = ToolCall(id="5678", function=FunctionCall(name="Test-Function", arguments="""{"input2": "world2"}""")) + tool_call3 = tool_call + tool_call2 + assert tool_call3.id == "1234" + assert tool_call3.type == "function" + assert tool_call3.function.name == "Test-Function" + assert tool_call3.function.arguments == """{"input": "world"}{"input2": "world2"}""" + + +def test_add_none(tool_call: ToolCall): + # Test adding two tool calls with one being None + tool_call2 = None + tool_call3 = tool_call + tool_call2 + assert tool_call3.id == "1234" + assert tool_call3.type == "function" + assert tool_call3.function.name == "Test-Function" + assert tool_call3.function.arguments == """{"input": "world"}""" + + +def test_dump_json(tool_call: ToolCall): + assert ( + tool_call.model_dump_json() + == """{"id":"1234","type":"function","function":{"name":"Test-Function","arguments":"{\\"input\\": \\"world\\"}"}}""" # noqa: E501 + ) diff --git a/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py b/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py index 8f4bf277a97f..f6437bc7d511 100644 --- a/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py +++ b/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py @@ -151,15 +151,15 @@ def test_azure_chat_completion_init_with_base_url() -> None: @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) -async def test_azure_chat_completion_call_with_parameters(mock_create) -> None: +async def test_azure_chat_completion_call_with_parameters( + mock_create, kernel: Kernel, chat_history: ChatHistory +) -> None: deployment_name = "test_deployment" endpoint = "https://test-endpoint.com" api_key = "test_api_key" api_version = "2023-03-15-preview" - messages = ChatHistory() - messages.add_user_message("hello world") + chat_history.add_user_message("hello world") complete_prompt_execution_settings = AzureChatPromptExecutionSettings(service_id="test_service_id") - kernel = Kernel() azure_chat_completion = AzureChatCompletion( deployment_name=deployment_name, @@ -168,7 +168,7 @@ async def test_azure_chat_completion_call_with_parameters(mock_create) -> None: api_key=api_key, ) await azure_chat_completion.complete_chat( - chat_history=messages, settings=complete_prompt_execution_settings, kernel=kernel + chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel ) mock_create.assert_awaited_once_with( model=deployment_name, @@ -180,24 +180,22 @@ async def test_azure_chat_completion_call_with_parameters(mock_create) -> None: stream=False, temperature=complete_prompt_execution_settings.temperature, top_p=complete_prompt_execution_settings.top_p, - messages=azure_chat_completion._prepare_chat_history_for_request(messages), + messages=azure_chat_completion._prepare_chat_history_for_request(chat_history), ) @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) async def test_azure_chat_completion_call_with_parameters_and_Logit_Bias_Defined( - mock_create, + mock_create, kernel: Kernel, chat_history: ChatHistory ) -> None: deployment_name = "test_deployment" endpoint = "https://test-endpoint.com" api_key = "test_api_key" api_version = "2023-03-15-preview" - kernel = Kernel() prompt = "hello world" - messages = ChatHistory() - messages.add_user_message(prompt) + chat_history.add_user_message(prompt) complete_prompt_execution_settings = AzureChatPromptExecutionSettings() token_bias = {"1": -100} @@ -211,12 +209,12 @@ async def test_azure_chat_completion_call_with_parameters_and_Logit_Bias_Defined ) await azure_chat_completion.complete_chat( - chat_history=messages, settings=complete_prompt_execution_settings, kernel=kernel + chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel ) mock_create.assert_awaited_once_with( model=deployment_name, - messages=azure_chat_completion._prepare_chat_history_for_request(messages), + messages=azure_chat_completion._prepare_chat_history_for_request(chat_history), temperature=complete_prompt_execution_settings.temperature, top_p=complete_prompt_execution_settings.top_p, n=complete_prompt_execution_settings.number_of_responses, @@ -304,20 +302,18 @@ def test_azure_chat_completion_serialize() -> None: @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) async def test_azure_chat_completion_with_data_call_with_parameters( - mock_create, + mock_create, kernel: Kernel, chat_history: ChatHistory ) -> None: deployment_name = "test_deployment" endpoint = "https://test-endpoint.com" api_key = "test_api_key" api_version = "2023-03-15-preview" prompt = "hello world" - messages_in = ChatHistory() + messages_in = chat_history messages_in.add_user_message(prompt) messages_out = ChatHistory() messages_out.add_user_message(prompt) - kernel = Kernel() - expected_data_settings = { "dataSources": [ { @@ -363,17 +359,14 @@ async def test_azure_chat_completion_with_data_call_with_parameters( @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) async def test_azure_chat_completion_call_with_data_parameters_and_function_calling( - mock_create, + mock_create, kernel: Kernel, chat_history: ChatHistory ) -> None: deployment_name = "test_deployment" endpoint = "https://test-endpoint.com" api_key = "test_api_key" api_version = "2023-03-15-preview" prompt = "hello world" - messages = ChatHistory() - messages.add_user_message(prompt) - - kernel = Kernel() + chat_history.add_user_message(prompt) ai_source = AzureAISearchDataSources(indexName="test-index", endpoint="test-endpoint", key="test-key") extra = ExtraBody(data_sources=[AzureDataSources(type="AzureCognitiveSearch", parameters=ai_source)]) @@ -394,7 +387,7 @@ async def test_azure_chat_completion_call_with_data_parameters_and_function_call ) await azure_chat_completion.complete_chat( - chat_history=messages, + chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel, ) @@ -403,7 +396,7 @@ async def test_azure_chat_completion_call_with_data_parameters_and_function_call mock_create.assert_awaited_once_with( model=deployment_name, - messages=azure_chat_completion._prepare_chat_history_for_request(messages), + messages=azure_chat_completion._prepare_chat_history_for_request(chat_history), temperature=complete_prompt_execution_settings.temperature, top_p=complete_prompt_execution_settings.top_p, n=complete_prompt_execution_settings.number_of_responses, @@ -421,18 +414,15 @@ async def test_azure_chat_completion_call_with_data_parameters_and_function_call @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) async def test_azure_chat_completion_call_with_data_with_parameters_and_Stop_Defined( - mock_create, + mock_create, kernel: Kernel, chat_history: ChatHistory ) -> None: deployment_name = "test_deployment" endpoint = "https://test-endpoint.com" api_key = "test_api_key" api_version = "2023-03-15-preview" - messages = ChatHistory() - messages.add_user_message("hello world") + chat_history.add_user_message("hello world") complete_prompt_execution_settings = AzureChatPromptExecutionSettings() - kernel = Kernel() - stop = ["!"] complete_prompt_execution_settings.stop = stop @@ -449,13 +439,13 @@ async def test_azure_chat_completion_call_with_data_with_parameters_and_Stop_Def use_extensions=True, ) - await azure_chat_completion.complete_chat(messages, complete_prompt_execution_settings, kernel=kernel) + await azure_chat_completion.complete_chat(chat_history, complete_prompt_execution_settings, kernel=kernel) expected_data_settings = extra.model_dump(exclude_none=True, by_alias=True) mock_create.assert_awaited_once_with( model=deployment_name, - messages=azure_chat_completion._prepare_chat_history_for_request(messages), + messages=azure_chat_completion._prepare_chat_history_for_request(chat_history), temperature=complete_prompt_execution_settings.temperature, top_p=complete_prompt_execution_settings.top_p, n=complete_prompt_execution_settings.number_of_responses, @@ -485,19 +475,16 @@ async def test_azure_chat_completion_call_with_data_with_parameters_and_Stop_Def @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create") async def test_azure_chat_completion_content_filtering_raises_correct_exception( - mock_create, + mock_create, kernel: Kernel, chat_history: ChatHistory ) -> None: deployment_name = "test_deployment" endpoint = "https://test-endpoint.com" api_key = "test_api_key" api_version = "2023-03-15-preview" prompt = "some prompt that would trigger the content filtering" - messages = ChatHistory() - messages.add_user_message(prompt) + chat_history.add_user_message(prompt) complete_prompt_execution_settings = AzureChatPromptExecutionSettings() - kernel = Kernel() - mock_create.side_effect = openai.BadRequestError( CONTENT_FILTERED_ERROR_FULL_MESSAGE, response=Response(400, request=Request("POST", endpoint)), @@ -527,7 +514,7 @@ async def test_azure_chat_completion_content_filtering_raises_correct_exception( ) with pytest.raises(ContentFilterAIException, match="service encountered a content error") as exc_info: - await azure_chat_completion.complete_chat(messages, complete_prompt_execution_settings, kernel=kernel) + await azure_chat_completion.complete_chat(chat_history, complete_prompt_execution_settings, kernel=kernel) content_filter_exc = exc_info.value assert content_filter_exc.param == "prompt" @@ -538,19 +525,16 @@ async def test_azure_chat_completion_content_filtering_raises_correct_exception( @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create") async def test_azure_chat_completion_content_filtering_without_response_code_raises_with_default_code( - mock_create, + mock_create, kernel: Kernel, chat_history: ChatHistory ) -> None: deployment_name = "test_deployment" endpoint = "https://test-endpoint.com" api_key = "test_api_key" api_version = "2023-03-15-preview" prompt = "some prompt that would trigger the content filtering" - messages = ChatHistory() - messages.add_user_message(prompt) + chat_history.add_user_message(prompt) complete_prompt_execution_settings = AzureChatPromptExecutionSettings() - kernel = Kernel() - mock_create.side_effect = openai.BadRequestError( CONTENT_FILTERED_ERROR_FULL_MESSAGE, response=Response(400, request=Request("POST", endpoint)), @@ -579,25 +563,22 @@ async def test_azure_chat_completion_content_filtering_without_response_code_rai ) with pytest.raises(ContentFilterAIException, match="service encountered a content error"): - await azure_chat_completion.complete_chat(messages, complete_prompt_execution_settings, kernel=kernel) + await azure_chat_completion.complete_chat(chat_history, complete_prompt_execution_settings, kernel=kernel) @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create") async def test_azure_chat_completion_bad_request_non_content_filter( - mock_create, + mock_create, kernel: Kernel, chat_history: ChatHistory ) -> None: deployment_name = "test_deployment" endpoint = "https://test-endpoint.com" api_key = "test_api_key" api_version = "2023-03-15-preview" prompt = "some prompt that would trigger the content filtering" - messages = ChatHistory() - messages.add_user_message(prompt) + chat_history.add_user_message(prompt) complete_prompt_execution_settings = AzureChatPromptExecutionSettings() - kernel = Kernel() - mock_create.side_effect = openai.BadRequestError( "The request was bad.", response=Response(400, request=Request("POST", endpoint)), body={} ) @@ -610,22 +591,19 @@ async def test_azure_chat_completion_bad_request_non_content_filter( ) with pytest.raises(ServiceResponseException, match="service failed to complete the prompt"): - await azure_chat_completion.complete_chat(messages, complete_prompt_execution_settings, kernel=kernel) + await azure_chat_completion.complete_chat(chat_history, complete_prompt_execution_settings, kernel=kernel) @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create") -async def test_azure_chat_completion_no_kernel_provided_throws_error( - mock_create, -) -> None: +async def test_azure_chat_completion_no_kernel_provided_throws_error(mock_create, chat_history: ChatHistory) -> None: deployment_name = "test_deployment" endpoint = "https://test-endpoint.com" api_key = "test_api_key" api_version = "2023-03-15-preview" prompt = "some prompt that would trigger the content filtering" - messages = ChatHistory() - messages.add_user_message(prompt) - complete_prompt_execution_settings = AzureChatPromptExecutionSettings() + chat_history.add_user_message(prompt) + complete_prompt_execution_settings = AzureChatPromptExecutionSettings(auto_invoke_kernel_functions=True) mock_create.side_effect = openai.BadRequestError( "The request was bad.", response=Response(400, request=Request("POST", endpoint)), body={} @@ -641,4 +619,4 @@ async def test_azure_chat_completion_no_kernel_provided_throws_error( with pytest.raises( ServiceInvalidExecutionSettingsError, match="The kernel argument is required for OpenAI tool calling" ): - await azure_chat_completion.complete_chat(messages, complete_prompt_execution_settings) + await azure_chat_completion.complete_chat(chat_history, complete_prompt_execution_settings) diff --git a/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py b/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py index 06ba6117f5a6..423b6b2d490a 100644 --- a/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py +++ b/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py @@ -11,28 +11,26 @@ OpenAIStreamingChatMessageContent, ) from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletionBase +from semantic_kernel.connectors.ai.open_ai.services.tool_call_behavior import ToolCallBehavior from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.kernel import Kernel async def mock_async_process_chat_stream_response(arg1, response, tool_call_behavior, chat_history, kernel): mock_content = MagicMock(spec=OpenAIStreamingChatMessageContent) - yield [mock_content] + yield [mock_content], None @pytest.mark.asyncio -async def test_complete_chat_stream(): +async def test_complete_chat_stream(kernel: Kernel): chat_history = MagicMock() settings = MagicMock() mock_response = MagicMock() with patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._get_auto_invoke_execution_settings", - return_value=(True, 3), + "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._get_tool_call_behavior", + return_value=ToolCallBehavior(auto_invoke_kernel_functions=True, max_auto_invoke_attempts=3), ) as settings_mock, patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._validate_kernel_for_tool_calling", - return_value=MagicMock(), - ) as validate_kernel_mock, patch( "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._prepare_settings", return_value=settings, ) as prepare_settings_mock, patch( @@ -42,8 +40,6 @@ async def test_complete_chat_stream(): "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._process_chat_stream_response", new_callable=lambda: mock_async_process_chat_stream_response, ): - kernel = Kernel() - chat_completion_base = OpenAIChatCompletionBase( ai_model_id="test_model_id", service_id="test", client=MagicMock(spec=AsyncOpenAI) ) @@ -52,25 +48,21 @@ async def test_complete_chat_stream(): assert content is not None settings_mock.assert_called_once_with(settings) - validate_kernel_mock.assert_called_once_with(kernel=kernel) prepare_settings_mock.assert_called_with(settings, chat_history, stream_request=True) mock_send_chat_stream_request.assert_called_with(settings) @pytest.mark.parametrize("tool_call", [False, True]) @pytest.mark.asyncio -async def test_complete_chat(tool_call): +async def test_complete_chat(tool_call, kernel: Kernel): chat_history = MagicMock() settings = MagicMock() mock_message_content = MagicMock(spec=List[OpenAIChatMessageContent]) with patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._get_auto_invoke_execution_settings", - return_value=(True, 3), + "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._get_tool_call_behavior", + return_value=ToolCallBehavior(auto_invoke_kernel_functions=True, max_auto_invoke_attempts=3), ) as settings_mock, patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._validate_kernel_for_tool_calling", - return_value=MagicMock(), - ) as validate_kernel_mock, patch( "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._prepare_settings", return_value=settings, ) as prepare_settings_mock, patch( @@ -82,8 +74,6 @@ async def test_complete_chat(tool_call): ), patch( "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._process_chat_response_with_tool_call", ) as mock_process_chat_response_with_tool_call: - kernel = Kernel() - chat_completion_base = OpenAIChatCompletionBase( ai_model_id="test_model_id", service_id="test", client=MagicMock(spec=AsyncOpenAI) ) @@ -96,7 +86,6 @@ async def test_complete_chat(tool_call): assert result is not None settings_mock.assert_called_once_with(settings) - validate_kernel_mock.assert_called_once_with(kernel=kernel) prepare_settings_mock.assert_called_with(settings, chat_history, stream_request=False) mock_send_chat_request.assert_called_with(settings) if tool_call: @@ -142,28 +131,48 @@ async def test_process_tool_calls(): @pytest.mark.parametrize( - "completions,auto_invoke_kernel_functions,expected_result", + "completions,tool_call_behavior,expected_result", [ # Case 1: Empty completions, auto_invoke_kernel_functions=False - ([], False, True), + ([], ToolCallBehavior(auto_invoke_kernel_functions=False), True), # Case 2: Completions with OpenAIChatMessageContent, auto_invoke_kernel_functions=True - ([MagicMock(spec=OpenAIChatMessageContent)], True, True), + ([MagicMock(spec=OpenAIChatMessageContent)], ToolCallBehavior(auto_invoke_kernel_functions=True), True), # Case 3: Completions with OpenAIChatMessageContent, no tool_calls, auto_invoke_kernel_functions=True - ([MagicMock(spec=OpenAIChatMessageContent, tool_calls=[])], True, True), + ( + [MagicMock(spec=OpenAIChatMessageContent, tool_calls=[])], + ToolCallBehavior(auto_invoke_kernel_functions=True), + True, + ), # Case 4: Completions with OpenAIStreamingChatMessageContent, auto_invoke_kernel_functions=True - ([MagicMock(spec=OpenAIStreamingChatMessageContent)], True, True), + ( + [MagicMock(spec=OpenAIStreamingChatMessageContent)], + ToolCallBehavior(auto_invoke_kernel_functions=True), + True, + ), # Case 5: Completions with OpenAIStreamingChatMessageContent, auto_invoke_kernel_functions=False - ([MagicMock(spec=OpenAIStreamingChatMessageContent)], False, True), + ( + [MagicMock(spec=OpenAIStreamingChatMessageContent)], + ToolCallBehavior(auto_invoke_kernel_functions=False), + True, + ), # Case 6: Completions with both types, auto_invoke_kernel_functions=True - ([MagicMock(spec=OpenAIChatMessageContent), MagicMock(spec=OpenAIStreamingChatMessageContent)], True, True), + ( + [MagicMock(spec=OpenAIChatMessageContent), MagicMock(spec=OpenAIStreamingChatMessageContent)], + ToolCallBehavior(auto_invoke_kernel_functions=True), + True, + ), # Case 7: Completions with OpenAIChatMessageContent with tool_calls, auto_invoke_kernel_functions=True - ([MagicMock(spec=OpenAIChatMessageContent, tool_calls=[{}])], True, False), + ( + [MagicMock(spec=OpenAIChatMessageContent, tool_calls=[{}])], + ToolCallBehavior(auto_invoke_kernel_functions=True), + False, + ), ], ) @pytest.mark.asyncio -async def test_should_return_completions_response(completions, auto_invoke_kernel_functions, expected_result): +async def test_should_return_completions_response(completions, tool_call_behavior, expected_result): chat_completion_base = OpenAIChatCompletionBase( ai_model_id="test_model_id", service_id="test", client=MagicMock(spec=AsyncOpenAI) ) - result = chat_completion_base._should_return_completions_response(completions, auto_invoke_kernel_functions) + result = chat_completion_base._should_return_completions_response(completions, tool_call_behavior) assert result == expected_result diff --git a/python/tests/unit/contents/test_chat_history.py b/python/tests/unit/contents/test_chat_history.py index 81856c3aa1b2..bf0c1293e335 100644 --- a/python/tests/unit/contents/test_chat_history.py +++ b/python/tests/unit/contents/test_chat_history.py @@ -15,11 +15,6 @@ from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig -@pytest.fixture(scope="function") -def chat_history(): - return ChatHistory() - - def test_init_with_system_message_only(): system_msg = "test message" chat_history = ChatHistory(system_message=system_msg)