From 555a7c8436214c959550c1ae43e6d774c269df8b Mon Sep 17 00:00:00 2001
From: Eduard van Valkenburg <eavanvalkenburg@users.noreply.github.com>
Date: Fri, 8 Mar 2024 22:10:43 +0100
Subject: [PATCH] Python: fix for streaming openai responses, and first parts
 of fixes for Chat With Your Data (#5387)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone :smile:
---
 .../azure_chat_gpt_api.py                     |  33 ++--
 .../azure_chat_gpt_with_data_api.py           |  58 +++---
 ...chat_gpt_with_data_api_function_calling.py |   5 +-
 .../chat_gpt_api_function_calling.py          |  40 ++--
 .../ai/chat_completion_client_base.py         |   6 +-
 .../services/gp_chat_completion.py            |   4 +-
 .../ollama/services/ollama_chat_completion.py |  18 +-
 .../azure_streaming_chat_message_content.py   |  21 +-
 .../ai/open_ai/contents/function_call.py      |   8 +-
 .../contents/open_ai_chat_message_content.py  |   9 +-
 .../open_ai_streaming_chat_message_content.py |  63 +++++-
 .../ai/open_ai/contents/tool_calls.py         |   2 -
 .../services/open_ai_chat_completion_base.py  | 186 +++++++++---------
 .../ai/open_ai/services/open_ai_handler.py    |   1 +
 .../services/open_ai_text_embedding_base.py   |  14 +-
 .../ai/open_ai/services/tool_call_behavior.py |   1 +
 .../streaming_chat_message_content.py         |  32 +++
 .../exceptions/template_engine_exceptions.py  |   5 +
 .../functions/kernel_arguments.py             |   2 +-
 .../functions/kernel_function.py              |   6 +-
 .../functions/kernel_function_from_prompt.py  |   4 +-
 python/semantic_kernel/kernel.py              |  10 +-
 .../template_engine/blocks/var_block.py       |  11 +-
 python/tests/conftest.py                      |   6 +
 .../test_azure_oai_chat_service.py            |  16 +-
 .../connectors/open_ai/contents/conftest.py   |  14 ++
 .../open_ai/contents/test_function_call.py    | 104 ++++++++--
 .../open_ai/contents/test_tool_call.py        |  38 ++++
 .../services/test_azure_chat_completion.py    |  88 ++++-----
 .../test_open_ai_chat_completion_base.py      |  67 ++++---
 .../tests/unit/contents/test_chat_history.py  |   5 -
 31 files changed, 523 insertions(+), 354 deletions(-)
 create mode 100644 python/tests/unit/connectors/open_ai/contents/conftest.py
 create mode 100644 python/tests/unit/connectors/open_ai/contents/test_tool_call.py

diff --git a/python/samples/kernel-syntax-examples/azure_chat_gpt_api.py b/python/samples/kernel-syntax-examples/azure_chat_gpt_api.py
index a785cebbb1aa..d224e9dafdcd 100644
--- a/python/samples/kernel-syntax-examples/azure_chat_gpt_api.py
+++ b/python/samples/kernel-syntax-examples/azure_chat_gpt_api.py
@@ -3,17 +3,12 @@
 import asyncio
 import logging
 
-from dotenv import load_dotenv
-
 import semantic_kernel as sk
 import semantic_kernel.connectors.ai.open_ai as sk_oai
 from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.prompt_template.input_variable import InputVariable
 from semantic_kernel.utils.settings import azure_openai_settings_from_dot_env_as_dict
 
-logging.basicConfig(level=logging.INFO)
-
-load_dotenv()
+logging.basicConfig(level=logging.WARNING)
 
 system_message = """
 You are a chat bot. Your name is Mosscap and
@@ -45,31 +40,25 @@
 
 ## The second method is useful when you are using a single service, and you want to have type checking on the request settings or when you are using multiple instances of the same type of service, for instance gpt-35-turbo and gpt-4, both in openai and both for chat.  # noqa: E501 E266
 ## 3. create the request settings from the kernel based on the registered service class: # noqa: E266
-req_settings = kernel.get_service(service_id).get_prompt_execution_settings_class()(service_id=service_id)
+req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
 req_settings.max_tokens = 2000
 req_settings.temperature = 0.7
 req_settings.top_p = 0.8
+req_settings.auto_invoke_kernel_functions = True
 ## The third method is the most specific as the returned request settings class is the one that is registered for the service and has some fields already filled in, like the service_id and ai_model_id. # noqa: E501 E266
 
-prompt_template_config = sk.PromptTemplateConfig(
-    template=system_message
-    + """ Summarize the on-going chat history: {{$chat_history}} and respond to this statement: {{$request}}""",
-    name="chat",
-    input_variables=[
-        InputVariable(name="request", description="The user input", is_required=True),
-        InputVariable(name="chat_history", description="The history of the conversation", is_required=True),
-    ],
-    execution_settings=req_settings,
+
+chat_function = kernel.create_function_from_prompt(
+    prompt=system_message + """{{$chat_history}}{{$user_input}}""",
+    function_name="chat",
+    plugin_name="chat",
+    prompt_execution_settings=req_settings,
 )
 
 history = ChatHistory()
 history.add_user_message("Hi there, who are you?")
 history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")
 
-chat_function = kernel.create_function_from_prompt(
-    function_name="chat", plugin_name="chat", prompt_template_config=prompt_template_config
-)
-
 
 async def chat() -> bool:
     try:
@@ -89,7 +78,7 @@ async def chat() -> bool:
     if stream:
         answer = kernel.invoke_stream(
             chat_function,
-            request=user_input,
+            user_input=user_input,
             chat_history=history,
         )
         print("Mosscap:> ", end="")
@@ -99,7 +88,7 @@ async def chat() -> bool:
         return True
     answer = await kernel.invoke(
         chat_function,
-        request=user_input,
+        user_input=user_input,
         chat_history=history,
     )
     print(f"Mosscap:> {answer}")
diff --git a/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api.py b/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api.py
index 864eb5ff21cc..22b0d09d4047 100644
--- a/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api.py
+++ b/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api.py
@@ -4,9 +4,9 @@
 
 import semantic_kernel as sk
 import semantic_kernel.connectors.ai.open_ai as sk_oai
-from semantic_kernel.connectors.ai.open_ai.contents.azure_streaming_chat_message_content import (
-    AzureStreamingChatMessageContent,
-)
+from semantic_kernel.connectors.ai.open_ai.contents.azure_chat_message_content import AzureChatMessageContent
+from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall
+from semantic_kernel.connectors.ai.open_ai.contents.tool_calls import ToolCall
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
     AzureAISearchDataSources,
     AzureChatPromptExecutionSettings,
@@ -14,14 +14,19 @@
     ExtraBody,
 )
 from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.chat_role import ChatRole
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.prompt_template.input_variable import InputVariable
 from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig
+from semantic_kernel.utils.settings import (
+    azure_aisearch_settings_from_dot_env_as_dict,
+    azure_openai_settings_from_dot_env_as_dict,
+)
 
 kernel = sk.Kernel()
 
 # Load Azure OpenAI Settings
-deployment, api_key, endpoint = sk.azure_openai_settings_from_dot_env()
+aoai_settings = azure_openai_settings_from_dot_env_as_dict()
 
 # For example, AI Search index may contain the following document:
 
@@ -29,7 +34,7 @@
 # Bonded by their love for the natural world and shared curiosity, they uncovered a
 # groundbreaking phenomenon in glaciology that could potentially reshape our understanding of climate change.
 
-azure_ai_search_settings = sk.azure_aisearch_settings_from_dot_env_as_dict()
+azure_ai_search_settings = azure_aisearch_settings_from_dot_env_as_dict()
 
 # Our example index has fields "source_title", "source_text", "source_url", and "source_file".
 # Add fields mapping to the settings to indicate which fields to use for the title, content, URL, and file path.
@@ -49,35 +54,28 @@
 # When using data, set use_extensions=True and use the 2023-12-01-preview API version.
 chat_service = sk_oai.AzureChatCompletion(
     service_id="chat-gpt",
-    deployment_name=deployment,
-    api_key=api_key,
-    endpoint=endpoint,
-    api_version="2023-12-01-preview",
     use_extensions=True,
+    **aoai_settings,
 )
 kernel.add_service(chat_service)
 
 prompt_template_config = PromptTemplateConfig(
-    template="{{$user_input}}",
+    template="{{$chat_history}}{{$user_input}}",
     name="chat",
     template_format="semantic-kernel",
     input_variables=[
+        InputVariable(name="chat_history", description="The chat history", is_required=True),
         InputVariable(name="request", description="The user input", is_required=True),
     ],
     execution_settings={"default": req_settings},
 )
-
-chat = ChatHistory()
-
-chat.add_user_message("Hi there, who are you?")
-chat.add_assistant_message("I am an AI assistant here to answer your questions.")
-
-arguments = KernelArguments()
-
 chat_function = kernel.create_function_from_prompt(
     plugin_name="ChatBot", function_name="Chat", prompt_template_config=prompt_template_config
 )
 
+chat_history = ChatHistory()
+chat_history.add_system_message("I am an AI assistant here to answer your questions.")
+
 
 async def chat() -> bool:
     try:
@@ -96,20 +94,34 @@ async def chat() -> bool:
     # Non streaming
     # answer = await kernel.run(chat_function, input_vars=context_vars)
     # print(f"Assistant:> {answer}")
-    arguments = KernelArguments(user_input=user_input, execution_settings=req_settings)
+    arguments = KernelArguments(chat_history=chat_history, user_input=user_input, execution_settings=req_settings)
 
     full_message = None
     print("Assistant:> ", end="")
     async for message in kernel.invoke_stream(chat_function, arguments=arguments):
         print(str(message[0]), end="")
         full_message = message[0] if not full_message else full_message + message[0]
-    chat.add_assistant_message(str(full_message))
     print("\n")
 
     # The tool message containing cited sources is available in the context
-    if isinstance(full_message, AzureStreamingChatMessageContent):
-        chat.add_function_response_message(name="tool", content=full_message.tool_message)
-        print(f"Tool:> {full_message.tool_message}")
+    if full_message:
+        chat_history.add_user_message(user_input)
+        if hasattr(full_message, "tool_message"):
+            chat_history.add_message(
+                AzureChatMessageContent(
+                    role="assistant",
+                    tool_calls=[
+                        ToolCall(
+                            id="chat_with_your_data",
+                            function=FunctionCall(name="chat_with_your_data", arguments=""),
+                        )
+                    ],
+                )
+            )
+            chat_history.add_tool_message(full_message.tool_message, {"tool_call_id": "chat_with_your_data"})
+        if full_message.role is None:
+            full_message.role = ChatRole.ASSISTANT
+        chat_history.add_message(full_message)
     return True
 
 
diff --git a/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api_function_calling.py b/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api_function_calling.py
index 28e9cb8ed78f..3d333cbb4664 100644
--- a/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api_function_calling.py
+++ b/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api_function_calling.py
@@ -68,11 +68,11 @@
 # the format for that is 'PluginName-FunctionName', (i.e. 'math-Add').
 # if the model or api version do not support this you will get an error.
 prompt_template_config = PromptTemplateConfig(
-    template="{{$user_input}}",
+    template="{{$chat_history}}{{$user_input}}",
     name="chat",
     template_format="semantic-kernel",
     input_variables=[
-        InputVariable(name="history", description="The history of the conversation", is_required=True),
+        InputVariable(name="chat_history", description="The history of the conversation", is_required=True),
         InputVariable(name="user_input", description="The user input", is_required=True),
     ],
 )
@@ -110,6 +110,7 @@ async def chat() -> bool:
         print("\n\nExiting chat...")
         return False
 
+    arguments["chat_history"] = history
     arguments["user_input"] = user_input
     answer = await kernel.invoke(
         functions=chat_function,
diff --git a/python/samples/kernel-syntax-examples/chat_gpt_api_function_calling.py b/python/samples/kernel-syntax-examples/chat_gpt_api_function_calling.py
index 3eed9abe14d7..4b9a38323a3b 100644
--- a/python/samples/kernel-syntax-examples/chat_gpt_api_function_calling.py
+++ b/python/samples/kernel-syntax-examples/chat_gpt_api_function_calling.py
@@ -14,13 +14,10 @@
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIPromptExecutionSettings,
 )
-from semantic_kernel.connectors.ai.open_ai.utils import (
-    get_tool_call_object,
-)
+from semantic_kernel.connectors.ai.open_ai.utils import get_tool_call_object
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.functions.kernel_arguments import KernelArguments
-from semantic_kernel.prompt_template.input_variable import InputVariable
 
 if TYPE_CHECKING:
     from semantic_kernel.functions.kernel_function import KernelFunction
@@ -58,6 +55,11 @@
 kernel.import_plugin_from_object(MathPlugin(), plugin_name="math")
 kernel.import_plugin_from_object(TimePlugin(), plugin_name="time")
 
+chat_function = kernel.create_function_from_prompt(
+    prompt="{{$chat_history}}{{$user_input}}",
+    plugin_name="ChatBot",
+    function_name="Chat",
+)
 # enabling or disabling function calling is done by setting the function_call parameter for the completion.
 # when the function_call parameter is set to "auto" the model will decide which function to use, if any.
 # if you only want to use a specific function, set the name of that function in this parameter,
@@ -68,6 +70,7 @@
 # If configured to be greater than one, this value will be overridden to 1.
 execution_settings = sk_oai.OpenAIChatPromptExecutionSettings(
     service_id="chat",
+    ai_model_id="gpt-3.5-turbo-1106",
     max_tokens=2000,
     temperature=0.7,
     top_p=0.8,
@@ -77,30 +80,13 @@
     max_auto_invoke_attempts=3,
 )
 
-prompt_template_config = sk.PromptTemplateConfig(
-    template="{{$user_input}}",
-    name="chat",
-    template_format="semantic-kernel",
-    input_variables=[
-        InputVariable(name="user_input", description="The user input", is_required=True),
-        InputVariable(name="chat_history", description="The history of the conversation", is_required=True),
-    ],
-    execution_settings={"chat": execution_settings},
-)
-
 history = ChatHistory()
 
 history.add_system_message(system_message)
 history.add_user_message("Hi there, who are you?")
 history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")
 
-arguments = KernelArguments()
-
-chat_function = kernel.create_function_from_prompt(
-    prompt_template_config=prompt_template_config,
-    plugin_name="ChatBot",
-    function_name="Chat",
-)
+arguments = KernelArguments(settings=execution_settings)
 
 
 def print_tool_calls(message: Union[OpenAIChatMessageContent, OpenAIStreamingChatMessageContent]) -> None:
@@ -138,7 +124,7 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: List[OpenAIStreamingChatMessageContent] = []
-    tool_call_ids_by_index: Dict[int, Any] = {}
+    tool_call_ids_by_index: Dict[str, Any] = {}
 
     async for message in response:
         if not execution_settings.auto_invoke_kernel_functions and isinstance(
@@ -147,11 +133,11 @@ async def handle_streaming(
             streamed_chunks.append(message[0])
             if message[0].tool_calls is not None:
                 for tc in message[0].tool_calls:
-                    if tc.index not in tool_call_ids_by_index:
-                        tool_call_ids_by_index[tc.index] = tc
+                    if tc.id not in tool_call_ids_by_index:
+                        tool_call_ids_by_index[tc.id] = tc
                     else:
                         for tc in message[0].tool_calls:
-                            tool_call_ids_by_index[tc.index] += tc
+                            tool_call_ids_by_index[tc.id] += tc
         else:
             print(str(message[0]), end="")
 
@@ -178,7 +164,7 @@ async def chat() -> bool:
         print("\n\nExiting chat...")
         return False
 
-    stream = False
+    stream = True
     if stream:
         await handle_streaming(kernel, chat_function, user_input, history, execution_settings)
     else:
diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
index cb291d2dd3f4..95c5f89f53fd 100644
--- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
+++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
@@ -22,7 +22,7 @@ async def complete_chat(
         self,
         chat_history: "ChatHistory",
         settings: "PromptExecutionSettings",
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> List["ChatMessageContent"]:
         """
         This is the method that is called from the kernel to get a response from a chat-optimized LLM.
@@ -43,7 +43,7 @@ async def complete_chat_stream(
         self,
         chat_history: "ChatHistory",
         settings: "PromptExecutionSettings",
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> AsyncIterable[List["StreamingChatMessageContent"]]:
         """
         This is the method that is called from the kernel to get a stream response from a chat-optimized LLM.
@@ -82,5 +82,5 @@ def _prepare_chat_history_for_request(
 
     def _chat_message_content_to_dict(self, message: ChatMessageContent) -> Dict[str, Optional[str]]:
         """can be overridden to customize the serialization of the chat message content"""
-        msg = message.model_dump(exclude_none=True, include=["role", "content"])
+        msg = message.model_dump(include=["role", "content"])
         return msg
diff --git a/python/semantic_kernel/connectors/ai/google_palm/services/gp_chat_completion.py b/python/semantic_kernel/connectors/ai/google_palm/services/gp_chat_completion.py
index 16b4d93d2780..8a6e80bda325 100644
--- a/python/semantic_kernel/connectors/ai/google_palm/services/gp_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/google_palm/services/gp_chat_completion.py
@@ -63,7 +63,7 @@ async def complete_chat(
         self,
         chat_history: ChatHistory,
         settings: GooglePalmPromptExecutionSettings,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> List[ChatMessageContent]:
         """
         This is the method that is called from the kernel to get a response from a chat-optimized LLM.
@@ -114,7 +114,7 @@ async def complete_chat_stream(
         self,
         messages: List[Tuple[str, str]],
         settings: GooglePalmPromptExecutionSettings,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         raise NotImplementedError("Google Palm API does not currently support streaming")
 
diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py
index f4bee9777a54..530cfc9c5223 100644
--- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py
@@ -2,21 +2,15 @@
 
 import json
 import logging
-from typing import Any, AsyncIterable, Dict, List, Optional
+from typing import Any, AsyncIterable, List, Optional
 
 import aiohttp
 from pydantic import HttpUrl
 
-from semantic_kernel.connectors.ai.chat_completion_client_base import (
-    ChatCompletionClientBase,
-)
-from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import (
-    OllamaChatPromptExecutionSettings,
-)
+from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import OllamaChatPromptExecutionSettings
 from semantic_kernel.connectors.ai.ollama.utils import AsyncSession
-from semantic_kernel.connectors.ai.text_completion_client_base import (
-    TextCompletionClientBase,
-)
+from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
@@ -45,7 +39,7 @@ async def complete_chat(
         self,
         chat_history: ChatHistory,
         settings: OllamaChatPromptExecutionSettings,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> List[ChatMessageContent]:
         """
         This is the method that is called from the kernel to get a response from a chat-optimized LLM.
@@ -78,7 +72,7 @@ async def complete_chat_stream(
         self,
         chat_history: ChatHistory,
         settings: OllamaChatPromptExecutionSettings,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> AsyncIterable[List[StreamingChatMessageContent]]:
         """
         Streams a text completion using a Ollama model.
diff --git a/python/semantic_kernel/connectors/ai/open_ai/contents/azure_streaming_chat_message_content.py b/python/semantic_kernel/connectors/ai/open_ai/contents/azure_streaming_chat_message_content.py
index 756bd96d92f9..568c5733295d 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/contents/azure_streaming_chat_message_content.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/contents/azure_streaming_chat_message_content.py
@@ -1,5 +1,4 @@
 # Copyright (c) Microsoft. All rights reserved.
-from copy import copy
 from typing import Optional
 
 from semantic_kernel.connectors.ai.open_ai.contents.open_ai_streaming_chat_message_content import (
@@ -52,18 +51,19 @@ def __add__(self, other: "AzureStreamingChatMessageContent") -> "AzureStreamingC
         if self.role and other.role and self.role != other.role:
             raise ContentAdditionException("Cannot add StreamingChatMessageContent with different role")
         fc = (self.function_call + other.function_call) if self.function_call else other.function_call
+        tc = {}
         if self.tool_calls:
+            tc = {t.id: t for t in self.tool_calls}
+            last_tc_id = list(tc.keys())[-1]
             if other.tool_calls:
-                tc = copy(self.tool_calls)
                 for new_tool in other.tool_calls:
-                    if new_tool.index >= len(self.tool_calls):
-                        tc.append(new_tool)
+                    if new_tool.id is None or new_tool.id == last_tc_id:
+                        tc[last_tc_id] += new_tool
                     else:
-                        tc[new_tool.index] += new_tool
-            else:
-                tc = copy(self.tool_calls)
-        else:
-            tc = copy(other.tool_calls)
+                        tc[new_tool.id] = new_tool
+        elif other.tool_calls:
+            tc = {t.id: t for t in other.tool_calls}
+        tc_list = list(tc.values())
 
         return AzureStreamingChatMessageContent(
             choice_index=self.choice_index,
@@ -75,6 +75,7 @@ def __add__(self, other: "AzureStreamingChatMessageContent") -> "AzureStreamingC
             encoding=self.encoding,
             finish_reason=self.finish_reason or other.finish_reason,
             function_call=fc,
-            tool_calls=tc,
+            tool_calls=tc_list,
+            tool_call_id=self.tool_call_id or other.tool_call_id,
             tool_message=(self.tool_message or "") + (other.tool_message or ""),
         )
diff --git a/python/semantic_kernel/connectors/ai/open_ai/contents/function_call.py b/python/semantic_kernel/connectors/ai/open_ai/contents/function_call.py
index 7a64ac3808b4..97b2eb1faa9c 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/contents/function_call.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/contents/function_call.py
@@ -16,18 +16,12 @@ class FunctionCall(KernelBaseModel):
 
     name: Optional[str] = None
     arguments: Optional[str] = None
-    # TODO: check if needed
-    id: Optional[str] = None
 
     def __add__(self, other: Optional["FunctionCall"]) -> "FunctionCall":
         """Add two function calls together, combines the arguments, ignores the name."""
         if not other:
             return self
-        return FunctionCall(
-            name=self.name or other.name,
-            arguments=(self.arguments or "") + (other.arguments or ""),
-            id=self.id or other.id,
-        )
+        return FunctionCall(name=self.name or other.name, arguments=(self.arguments or "") + (other.arguments or ""))
 
     def parse_arguments(self) -> Optional[Dict[str, Any]]:
         """Parse the arguments into a dictionary."""
diff --git a/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_chat_message_content.py b/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_chat_message_content.py
index 6924669c5e77..6558f1fdf855 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_chat_message_content.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_chat_message_content.py
@@ -1,5 +1,4 @@
 # Copyright (c) Microsoft. All rights reserved.
-import json
 from typing import List, Optional
 from xml.etree.ElementTree import Element
 
@@ -34,6 +33,7 @@ class OpenAIChatMessageContent(ChatMessageContent):
     inner_content: Optional[ChatCompletion] = None
     function_call: Optional[FunctionCall] = None
     tool_calls: Optional[List[ToolCall]] = None
+    tool_call_id: Optional[str] = None
 
     @staticmethod
     def ToolIdProperty():
@@ -49,11 +49,12 @@ def to_prompt(self, root_key: str) -> str:
 
         root = Element(root_key)
         root.set("role", self.role.value)
-        root.set("metadata", json.dumps(self.metadata))
         if self.function_call:
             root.set("function_call", self.function_call.model_dump_json(exclude_none=True))
         if self.tool_calls:
             root.set("tool_calls", "|".join([call.model_dump_json(exclude_none=True) for call in self.tool_calls]))
+        if self.tool_call_id:
+            root.set("tool_call_id", self.tool_call_id)
         root.text = self.content or ""
         return ElementTree.tostring(root, encoding=self.encoding or "unicode", short_empty_elements=False)
 
@@ -68,10 +69,10 @@ def from_element(cls, element: Element) -> "ChatMessageContent":
             ChatMessageContent - The new instance of ChatMessageContent.
         """
         args = {"role": element.get("role", ChatRole.USER.value), "content": element.text}
-        if metadata := element.get("metadata"):
-            args["metadata"] = json.loads(metadata)
         if function_call := element.get("function_call"):
             args["function_call"] = FunctionCall.model_validate_json(function_call)
         if tool_calls := element.get("tool_calls"):
             args["tool_calls"] = [ToolCall.model_validate_json(call) for call in tool_calls.split("|")]
+        if tool_call_id := element.get("tool_call_id"):
+            args["tool_call_id"] = tool_call_id
         return cls(**args)
diff --git a/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_streaming_chat_message_content.py b/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_streaming_chat_message_content.py
index bd9bc3465b6b..672743fb85e7 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_streaming_chat_message_content.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/contents/open_ai_streaming_chat_message_content.py
@@ -1,12 +1,14 @@
 # Copyright (c) Microsoft. All rights reserved.
-from copy import copy
 from typing import List, Optional
+from xml.etree.ElementTree import Element
 
+from defusedxml import ElementTree
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 
 from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall
 from semantic_kernel.connectors.ai.open_ai.contents.tool_calls import ToolCall
 from semantic_kernel.contents import StreamingChatMessageContent
+from semantic_kernel.contents.chat_role import ChatRole
 from semantic_kernel.exceptions import ContentAdditionException
 
 
@@ -38,6 +40,7 @@ class OpenAIStreamingChatMessageContent(StreamingChatMessageContent):
     inner_content: ChatCompletionChunk
     function_call: Optional[FunctionCall] = None
     tool_calls: Optional[List[ToolCall]] = None
+    tool_call_id: Optional[str] = None
 
     def __add__(self, other: "OpenAIStreamingChatMessageContent") -> "OpenAIStreamingChatMessageContent":
         """When combining two OpenAIStreamingChatMessageContent instances,
@@ -55,18 +58,19 @@ def __add__(self, other: "OpenAIStreamingChatMessageContent") -> "OpenAIStreamin
         if self.role and other.role and self.role != other.role:
             raise ContentAdditionException("Cannot add StreamingChatMessageContent with different role")
         fc = (self.function_call + other.function_call) if self.function_call else other.function_call
+        tc = {}
         if self.tool_calls:
+            tc = {t.id: t for t in self.tool_calls}
+            last_tc_id = list(tc.keys())[-1]
             if other.tool_calls:
-                tc = copy(self.tool_calls)
                 for new_tool in other.tool_calls:
-                    if new_tool.index >= len(self.tool_calls):
-                        tc.append(new_tool)
+                    if new_tool.id is None or new_tool.id == last_tc_id:
+                        tc[last_tc_id] += new_tool
                     else:
-                        tc[new_tool.index] += new_tool
-            else:
-                tc = copy(self.tool_calls)
-        else:
-            tc = copy(other.tool_calls)
+                        tc[new_tool.id] = new_tool
+        elif other.tool_calls:
+            tc = {t.id: t for t in other.tool_calls}
+        tc_list = list(tc.values())
 
         return OpenAIStreamingChatMessageContent(
             choice_index=self.choice_index,
@@ -78,5 +82,44 @@ def __add__(self, other: "OpenAIStreamingChatMessageContent") -> "OpenAIStreamin
             encoding=self.encoding,
             finish_reason=self.finish_reason or other.finish_reason,
             function_call=fc,
-            tool_calls=tc,
+            tool_calls=tc_list,
+            tool_call_id=self.tool_call_id or other.tool_call_id,
         )
+
+    def to_prompt(self, root_key: str) -> str:
+        """Convert the OpenAIChatMessageContent to a prompt.
+
+        Returns:
+            str - The prompt from the ChatMessageContent.
+        """
+
+        root = Element(root_key)
+        if self.role:
+            root.set("role", self.role.value)
+        if self.function_call:
+            root.set("function_call", self.function_call.model_dump_json(exclude_none=True))
+        if self.tool_calls:
+            root.set("tool_calls", "|".join([call.model_dump_json(exclude_none=True) for call in self.tool_calls]))
+        if self.tool_call_id:
+            root.set("tool_call_id", self.tool_call_id)
+        root.text = self.content or ""
+        return ElementTree.tostring(root, encoding=self.encoding or "unicode", short_empty_elements=False)
+
+    @classmethod
+    def from_element(cls, element: Element) -> "StreamingChatMessageContent":
+        """Create a new instance of OpenAIChatMessageContent from a prompt.
+
+        Args:
+            prompt: str - The prompt to create the ChatMessageContent from.
+
+        Returns:
+            ChatMessageContent - The new instance of ChatMessageContent.
+        """
+        args = {"role": element.get("role", ChatRole.USER.value), "content": element.text}
+        if function_call := element.get("function_call"):
+            args["function_call"] = FunctionCall.model_validate_json(function_call)
+        if tool_calls := element.get("tool_calls"):
+            args["tool_calls"] = [ToolCall.model_validate_json(call) for call in tool_calls.split("|")]
+        if tool_call_id := element.get("tool_call_id"):
+            args["tool_call_id"] = tool_call_id
+        return cls(**args)
diff --git a/python/semantic_kernel/connectors/ai/open_ai/contents/tool_calls.py b/python/semantic_kernel/connectors/ai/open_ai/contents/tool_calls.py
index 456f0e6c0c08..8b3d86eb58a7 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/contents/tool_calls.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/contents/tool_calls.py
@@ -8,7 +8,6 @@
 class ToolCall(KernelBaseModel):
     """Class to hold a tool call response."""
 
-    index: Optional[int] = None
     id: Optional[str] = None
     type: Optional[Literal["function"]] = "function"
     function: Optional[FunctionCall] = None
@@ -18,7 +17,6 @@ def __add__(self, other: Optional["ToolCall"]) -> "ToolCall":
         if not other:
             return self
         return ToolCall(
-            index=self.index or other.index,
             id=self.id or other.id,
             type=self.type or other.type,
             function=self.function + other.function if self.function else other.function,
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
index 7c1f1024883c..726396d1a2bf 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
@@ -35,6 +35,10 @@
 class OpenAIChatCompletionBase(OpenAIHandler, ChatCompletionClientBase):
     """OpenAI Chat completion class."""
 
+    # region Overriding base class methods
+    # most of the methods are overridden from the ChatCompletionClientBase class, otherwise it is mentioned
+
+    # override from AIServiceClientBase
     def get_prompt_execution_settings_class(self) -> "PromptExecutionSettings":
         """Create a request settings object."""
         return OpenAIChatPromptExecutionSettings
@@ -47,7 +51,7 @@ async def complete_chat(
         self,
         chat_history: ChatHistory,
         settings: OpenAIPromptExecutionSettings,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> List[OpenAIChatMessageContent]:
         """Executes a chat completion request and returns the result.
 
@@ -60,13 +64,15 @@ async def complete_chat(
         Returns:
             List[OpenAIChatMessageContent | AzureChatMessageContent] -- The completion result(s).
         """
-        auto_invoke_kernel_functions, max_auto_invoke_attempts = self._get_auto_invoke_execution_settings(settings)
-        kernel = self._validate_kernel_for_tool_calling(**kwargs)
+        tool_call_behavior = self._get_tool_call_behavior(settings)
+        kernel = kwargs.get("kernel", None)
+        if tool_call_behavior.auto_invoke_kernel_functions and kernel is None:
+            raise ServiceInvalidExecutionSettingsError("The kernel argument is required for OpenAI tool calling.")
 
-        for _ in range(max_auto_invoke_attempts):
+        for _ in range(tool_call_behavior.max_auto_invoke_attempts):
             settings = self._prepare_settings(settings, chat_history, stream_request=False)
             completions = await self._send_chat_request(settings)
-            if self._should_return_completions_response(completions, auto_invoke_kernel_functions):
+            if self._should_return_completions_response(completions=completions, tool_call_behavior=tool_call_behavior):
                 return completions
             await self._process_chat_response_with_tool_call(completions, chat_history, kernel)
 
@@ -74,7 +80,7 @@ async def complete_chat_stream(
         self,
         chat_history: ChatHistory,
         settings: OpenAIPromptExecutionSettings,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> AsyncIterable[List[OpenAIStreamingChatMessageContent]]:
         """Executes a streaming chat completion request and returns the result.
 
@@ -88,56 +94,38 @@ async def complete_chat_stream(
             List[OpenAIStreamingChatMessageContent | AzureStreamingChatMessageContent] -- A stream of
                 OpenAIStreamingChatMessages or AzureStreamingChatMessageContent when using Azure.
         """
-        auto_invoke_kernel_functions, max_auto_invoke_attempts = self._get_auto_invoke_execution_settings(settings)
-        kernel = self._validate_kernel_for_tool_calling(**kwargs)
-        tool_call_behavior = None
-        if auto_invoke_kernel_functions:
-            # Only configure the tool_call_behavior if auto_invoking_functions is true
-            tool_call_behavior = ToolCallBehavior(auto_invoke_kernel_functions=auto_invoke_kernel_functions)
-
-        attempts = 0
-        continue_loop = True
+        tool_call_behavior = self._get_tool_call_behavior(settings)
+        kernel = kwargs.pop("kernel", None)
+        if tool_call_behavior.auto_invoke_kernel_functions and kernel is None:
+            raise ServiceInvalidExecutionSettingsError("The kernel argument is required for OpenAI tool calling.")
 
-        while attempts < max_auto_invoke_attempts and continue_loop:
+        for _ in range(tool_call_behavior.max_auto_invoke_attempts):
             settings = self._prepare_settings(settings, chat_history, stream_request=True)
             response = await self._send_chat_stream_request(settings)
-            async for content in self._process_chat_stream_response(response, chat_history, kernel, tool_call_behavior):
+            finish_reason = None
+            async for content, finish_reason in self._process_chat_stream_response(
+                response=response, chat_history=chat_history, kernel=kernel, tool_call_behavior=tool_call_behavior
+            ):
                 yield content
-                if tool_call_behavior and not tool_call_behavior.auto_invoke_kernel_functions:
-                    continue_loop = False
-                    break
-            attempts += 1
-
-    def _validate_kernel_for_tool_calling(self, **kwargs: Dict[str, Any]) -> "Kernel":
-        """Validate that the arguments contains the kernel, which is used for function calling, if applicable."""
-        kernel = kwargs.pop("kernel", None)
-        if kernel is None:
-            raise ServiceInvalidExecutionSettingsError("The kernel argument is required for OpenAI tool calling.")
-        return kernel
+            if finish_reason != FinishReason.TOOL_CALLS:
+                break
 
-    def _prepare_settings(
-        self,
-        settings: OpenAIChatPromptExecutionSettings,
-        chat_history: ChatHistory,
-        stream_request: bool = False,
-    ) -> OpenAIChatPromptExecutionSettings:
-        """Prepare the promp execution settings for the chat request."""
-        settings.messages = self._prepare_chat_history_for_request(chat_history)
-        settings.stream = stream_request
-        if not settings.ai_model_id:
-            settings.ai_model_id = self.ai_model_id
+    def _chat_message_content_to_dict(self, message: ChatMessageContent) -> Dict[str, Optional[str]]:
+        msg = super()._chat_message_content_to_dict(message)
+        if message.role == "assistant":
+            if tool_calls := getattr(message, "tool_calls", None):
+                msg["tool_calls"] = [tool_call.model_dump() for tool_call in tool_calls]
+            if function_call := getattr(message, "function_call", None):
+                msg["function_call"] = function_call.model_dump_json()
+        if message.role == "tool":
+            if tool_call_id := getattr(message, "tool_call_id", None):
+                msg["tool_call_id"] = tool_call_id
+            if message.metadata and "function" in message.metadata:
+                msg["name"] = message.metadata["function_name"]
+        return msg
 
-        # If auto_invoke_kernel_functions is True and num_of_responses > 1 provide a warning
-        # that the num_of_responses will be configured to one.
-        if settings.auto_invoke_kernel_functions and settings.number_of_responses > 1:
-            logger.warning(
-                (
-                    "Auto invoking functions does not support more than one num_of_response. "
-                    "The num_of_responses setting is configured as 1."
-                )
-            )
-            settings.number_of_responses = 1
-        return settings
+    # endregion
+    # region internal handlers
 
     async def _send_chat_request(self, settings: OpenAIChatPromptExecutionSettings) -> List[OpenAIChatMessageContent]:
         """Send the chat request"""
@@ -146,7 +134,6 @@ async def _send_chat_request(self, settings: OpenAIChatPromptExecutionSettings)
         completions = [
             self._create_chat_message_content(response, choice, response_metadata) for choice in response.choices
         ]
-
         return completions
 
     async def _send_chat_stream_request(self, settings: OpenAIChatPromptExecutionSettings) -> AsyncStream:
@@ -172,9 +159,9 @@ async def _process_chat_stream_response(
         self,
         response: AsyncStream,
         chat_history: ChatHistory,
-        kernel: "Kernel",
-        tool_call_behavior: Optional[ToolCallBehavior] = None,
-    ) -> AsyncIterable[List[OpenAIStreamingChatMessageContent]]:
+        tool_call_behavior: ToolCallBehavior,
+        kernel: Optional["Kernel"] = None,
+    ) -> AsyncIterable[Tuple[List[OpenAIStreamingChatMessageContent], Optional[FinishReason]]]:
         """Process the chat stream response and handle tool calls if applicable."""
         full_content = None
         async for chunk in response:
@@ -185,22 +172,25 @@ async def _process_chat_stream_response(
             contents = [
                 self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) for choice in chunk.choices
             ]
-            if not tool_call_behavior or not tool_call_behavior.auto_invoke_kernel_functions:
-                yield contents
+            if not tool_call_behavior.auto_invoke_kernel_functions:
+                yield contents, None
+                continue
             finish_reason = getattr(contents[0], "finish_reason", None)
             full_content = contents[0] if full_content is None else full_content + contents[0]
             if not contents[0].tool_calls or finish_reason not in (FinishReason.STOP, FinishReason.TOOL_CALLS, None):
-                yield contents
+                yield contents, finish_reason
 
             if finish_reason == FinishReason.STOP:
-                if tool_call_behavior:
-                    tool_call_behavior.auto_invoke_kernel_functions = False
+                tool_call_behavior.auto_invoke_kernel_functions = False
                 break
             if finish_reason == FinishReason.TOOL_CALLS:
                 chat_history.add_message(message=full_content)
                 await self._process_tool_calls(full_content, kernel, chat_history)
                 break
 
+    # endregion
+    # region content creation
+
     def _create_chat_message_content(
         self, response: ChatCompletion, choice: Choice, response_metadata: Dict[str, Any]
     ) -> OpenAIChatMessageContent:
@@ -271,10 +261,9 @@ def _get_tool_calls_from_chat_choice(self, choice: Union[Choice, ChunkChoice]) -
             return None
         return [
             ToolCall(
-                index=getattr(tool, "index", None),
                 id=tool.id,
                 type=tool.type,
-                function=FunctionCall(name=tool.function.name, arguments=tool.function.arguments, id=tool.id),
+                function=FunctionCall(name=tool.function.name, arguments=tool.function.arguments),
             )
             for tool in content.tool_calls
         ]
@@ -289,20 +278,51 @@ def _get_function_call_from_chat_choice(self, choice: Union[Choice, ChunkChoice]
             return None
         return FunctionCall(name=content.function_call.name, arguments=content.function_call.arguments)
 
-    def _get_auto_invoke_execution_settings(
-        self, execution_settings: OpenAIPromptExecutionSettings
-    ) -> Tuple[bool, int]:
-        """Gets the auto invoke and max iterations settings."""
+    def _get_tool_call_behavior(self, execution_settings: OpenAIPromptExecutionSettings) -> ToolCallBehavior:
+        """Gets the auto invoke and max iterations settings through ToolCallBehavior."""
+        auto_invoke_kernel_functions = False
+        max_auto_invoke_attempts = 1
         if isinstance(execution_settings, OpenAIChatPromptExecutionSettings):
-            auto_invoke_kernel_functions = execution_settings.auto_invoke_kernel_functions
-            max_auto_invoke_attempts = (
-                execution_settings.max_auto_invoke_attempts if auto_invoke_kernel_functions else 1
+            if execution_settings.auto_invoke_kernel_functions is not None:
+                auto_invoke_kernel_functions = execution_settings.auto_invoke_kernel_functions
+            if auto_invoke_kernel_functions and execution_settings.max_auto_invoke_attempts is not None:
+                max_auto_invoke_attempts = (
+                    execution_settings.max_auto_invoke_attempts if auto_invoke_kernel_functions else 1
+                )
+
+        return ToolCallBehavior(
+            auto_invoke_kernel_functions=auto_invoke_kernel_functions, max_auto_invoke_attempts=max_auto_invoke_attempts
+        )
+
+    # endregion
+    # region request preparation
+
+    def _prepare_settings(
+        self,
+        settings: OpenAIChatPromptExecutionSettings,
+        chat_history: ChatHistory,
+        stream_request: bool = False,
+    ) -> OpenAIChatPromptExecutionSettings:
+        """Prepare the promp execution settings for the chat request."""
+        settings.messages = self._prepare_chat_history_for_request(chat_history)
+        settings.stream = stream_request
+        if not settings.ai_model_id:
+            settings.ai_model_id = self.ai_model_id
+
+        # If auto_invoke_kernel_functions is True and num_of_responses > 1 provide a warning
+        # that the num_of_responses will be configured to one.
+        if settings.auto_invoke_kernel_functions and settings.number_of_responses > 1:
+            logger.warning(
+                (
+                    "Auto invoking functions does not support more than one num_of_response. "
+                    "The num_of_responses setting is configured as 1."
+                )
             )
-        else:
-            auto_invoke_kernel_functions = False
-            max_auto_invoke_attempts = 1
+            settings.number_of_responses = 1
+        return settings
 
-        return auto_invoke_kernel_functions, max_auto_invoke_attempts
+    # endregion
+    # region tool calling
 
     async def _process_tool_calls(
         self,
@@ -322,32 +342,22 @@ async def _process_tool_calls(
             msg = OpenAIChatMessageContent(
                 role=ChatRole.TOOL,
                 content=str(func_result),
-                metadata={"tool_call_id": tool_call.id, "function_name": tool_call.function.name},
+                tool_call_id=tool_call.id,
+                metadata={"function_name": tool_call.function.name},
             )
             chat_history.add_message(message=msg)
 
     def _should_return_completions_response(
         self,
         completions: Union[List[OpenAIChatMessageContent], List[OpenAIStreamingChatMessageContent]],
-        auto_invoke_kernel_functions: bool,
+        tool_call_behavior: ToolCallBehavior,
     ) -> bool:
         """Determines if the completions should be returned."""
         return (
-            not auto_invoke_kernel_functions
+            not tool_call_behavior.auto_invoke_kernel_functions
             or any(not isinstance(completion, OpenAIChatMessageContent) for completion in completions)
             or any(not hasattr(completion, "tool_calls") or not completion.tool_calls for completion in completions)
         )
 
-    def _chat_message_content_to_dict(self, message: ChatMessageContent) -> Dict[str, Optional[str]]:
-        msg = super()._chat_message_content_to_dict(message)
-        if message.role == "assistant":
-            if tool_calls := getattr(message, "tool_calls", None):
-                msg["tool_calls"] = tool_calls
-            if function_call := getattr(message, "function_call", None):
-                msg["function_call"] = function_call
-        if message.role == "tool":
-            if message.metadata and "tool_call_id" in message.metadata:
-                msg["tool_call_id"] = message.metadata["tool_call_id"]
-            if message.metadata and "function" in message.metadata:
-                msg["name"] = message.metadata["function_name"]
-        return msg
+
+# endregion
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py
index 74cdbb74c285..fbaacf3716f4 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py
@@ -51,6 +51,7 @@ async def _send_request(
         Returns:
             ChatCompletion, Completion, AsyncStream[Completion | ChatCompletionChunk] -- The completion response.
         """
+
         try:
             if self.ai_model_type == OpenAIModelTypes.CHAT:
                 response = await self.client.chat.completions.create(**request_settings.prepare_settings_dict())
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py
index 531cf51c1994..9d023e68201c 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py
@@ -1,25 +1,19 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from typing import Any, Dict, List, Optional
+from typing import Any, List, Optional
 
 from numpy import array, ndarray
 
-from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import (
-    EmbeddingGeneratorBase,
-)
+from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIEmbeddingPromptExecutionSettings,
 )
-from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import (
-    OpenAIHandler,
-)
+from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIHandler
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
 
 class OpenAITextEmbeddingBase(OpenAIHandler, EmbeddingGeneratorBase):
-    async def generate_embeddings(
-        self, texts: List[str], batch_size: Optional[int] = None, **kwargs: Dict[str, Any]
-    ) -> ndarray:
+    async def generate_embeddings(self, texts: List[str], batch_size: Optional[int] = None, **kwargs: Any) -> ndarray:
         """Generates embeddings for the given texts.
 
         Arguments:
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/tool_call_behavior.py b/python/semantic_kernel/connectors/ai/open_ai/services/tool_call_behavior.py
index 9daa4711b21b..da012a7b74e8 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/tool_call_behavior.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/tool_call_behavior.py
@@ -14,3 +14,4 @@ class ToolCallBehavior(KernelBaseModel):
     """
 
     auto_invoke_kernel_functions: bool = False
+    max_auto_invoke_attempts: int = 1
diff --git a/python/semantic_kernel/contents/streaming_chat_message_content.py b/python/semantic_kernel/contents/streaming_chat_message_content.py
index 3936930ff763..534c056ad6c5 100644
--- a/python/semantic_kernel/contents/streaming_chat_message_content.py
+++ b/python/semantic_kernel/contents/streaming_chat_message_content.py
@@ -1,6 +1,10 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import json
 from typing import Optional
+from xml.etree.ElementTree import Element
+
+from defusedxml import ElementTree
 
 from semantic_kernel.contents.chat_role import ChatRole
 from semantic_kernel.contents.finish_reason import FinishReason
@@ -68,3 +72,31 @@ def __add__(self, other: "StreamingChatMessageContent") -> "StreamingChatMessage
             encoding=self.encoding,
             finish_reason=self.finish_reason or other.finish_reason,
         )
+
+    def to_prompt(self, root_key: str) -> str:
+        """Convert the ChatMessageContent to a prompt.
+
+        Returns:
+            str - The prompt from the ChatMessageContent.
+        """
+
+        root = Element(root_key)
+        root.set("role", self.role.value)
+        root.set("metadata", json.dumps(self.metadata))
+        root.text = self.content or ""
+        return ElementTree.tostring(root, encoding=self.encoding or "unicode", short_empty_elements=False)
+
+    @classmethod
+    def from_element(cls, element: Element) -> "StreamingChatMessageContent":
+        """Create a new instance of ChatMessageContent from a prompt.
+
+        Args:
+            prompt: str - The prompt to create the ChatMessageContent from.
+
+        Returns:
+            ChatMessageContent - The new instance of ChatMessageContent.
+        """
+        args = {"role": element.get("role", ChatRole.USER.value), "content": element.text}
+        if metadata := element.get("metadata"):
+            args["metadata"] = json.loads(metadata)
+        return cls(**args)
diff --git a/python/semantic_kernel/exceptions/template_engine_exceptions.py b/python/semantic_kernel/exceptions/template_engine_exceptions.py
index bed9df8a5ce6..30802177606d 100644
--- a/python/semantic_kernel/exceptions/template_engine_exceptions.py
+++ b/python/semantic_kernel/exceptions/template_engine_exceptions.py
@@ -25,6 +25,10 @@ def __init__(self, content: str) -> None:
         )
 
 
+class VarBlockRenderError(BlockRenderException):
+    pass
+
+
 class ValBlockSyntaxError(BlockSyntaxError):
     def __init__(self, content: str) -> None:
         super().__init__(
@@ -80,6 +84,7 @@ class TemplateRenderException(BlockRenderException):
     "BlockSyntaxError",
     "BlockRenderException",
     "VarBlockSyntaxError",
+    "VarBlockRenderError",
     "ValBlockSyntaxError",
     "NamedArgBlockSyntaxError",
     "FunctionIdBlockSyntaxError",
diff --git a/python/semantic_kernel/functions/kernel_arguments.py b/python/semantic_kernel/functions/kernel_arguments.py
index 34d9aed52778..42a79b2a504e 100644
--- a/python/semantic_kernel/functions/kernel_arguments.py
+++ b/python/semantic_kernel/functions/kernel_arguments.py
@@ -10,7 +10,7 @@ class KernelArguments(dict):
     def __init__(
         self,
         settings: Optional[Union["PromptExecutionSettings", List["PromptExecutionSettings"]]] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """Initializes a new instance of the KernelArguments class,
         this is a dict-like class with the additional field for the execution_settings.
diff --git a/python/semantic_kernel/functions/kernel_function.py b/python/semantic_kernel/functions/kernel_function.py
index 2839265e88c8..c18d94ff56e5 100644
--- a/python/semantic_kernel/functions/kernel_function.py
+++ b/python/semantic_kernel/functions/kernel_function.py
@@ -122,7 +122,7 @@ async def __call__(
         self,
         kernel: "Kernel",
         arguments: Optional[KernelArguments] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> "FunctionResult":
         """Invoke the function with the given arguments.
 
@@ -149,7 +149,7 @@ async def invoke(
         self,
         kernel: "Kernel",
         arguments: Optional[KernelArguments] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> "FunctionResult":
         """Invoke the function with the given arguments.
 
@@ -184,7 +184,7 @@ async def invoke_stream(
         self,
         kernel: "Kernel",
         arguments: Optional[KernelArguments] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> AsyncIterable[Union[FunctionResult, List[Union[StreamingKernelContent, Any]]]]:
         """
         Invoke a stream async function with the given arguments.
diff --git a/python/semantic_kernel/functions/kernel_function_from_prompt.py b/python/semantic_kernel/functions/kernel_function_from_prompt.py
index 418db2669f0b..b82cb7ec8123 100644
--- a/python/semantic_kernel/functions/kernel_function_from_prompt.py
+++ b/python/semantic_kernel/functions/kernel_function_from_prompt.py
@@ -5,9 +5,7 @@
 
 from pydantic import Field, ValidationError, model_validator
 
-from semantic_kernel.connectors.ai.chat_completion_client_base import (
-    ChatCompletionClientBase,
-)
+from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIChatPromptExecutionSettings,
 )
diff --git a/python/semantic_kernel/kernel.py b/python/semantic_kernel/kernel.py
index 00b85c909585..e4e17e295d15 100644
--- a/python/semantic_kernel/kernel.py
+++ b/python/semantic_kernel/kernel.py
@@ -143,7 +143,7 @@ async def invoke_stream(
         function_name: Optional[str] = None,
         plugin_name: Optional[str] = None,
         return_function_results: Optional[bool] = False,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> AsyncIterable[Union[List["StreamingKernelContent"], List[FunctionResult]]]:
         """Execute one or more stream functions.
 
@@ -211,7 +211,7 @@ async def invoke_stream(
 
             async for stream_message in stream_function.invoke_stream(self, arguments):
                 if isinstance(stream_message, FunctionResult):
-                    exception = stream_message.metadata.get("exception", None)
+                    exception = stream_message.metadata.get("error", None)
                     if exception:
                         break
                 function_result.append(stream_message)
@@ -267,7 +267,7 @@ async def invoke(
         arguments: Optional[KernelArguments] = None,
         function_name: Optional[str] = None,
         plugin_name: Optional[str] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> Optional[Union[FunctionResult, List[FunctionResult]]]:
         """Execute one or more functions.
 
@@ -371,7 +371,7 @@ async def invoke_prompt(
         prompt: str,
         arguments: Optional[KernelArguments] = None,
         template_format: Optional[str] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> Optional[Union[FunctionResult, List[FunctionResult]]]:
         """
         Invoke a function from the provided prompt
@@ -728,7 +728,7 @@ def get_service(
             ValueError: If no service is found that matches the type.
 
         """
-        if not service_id:
+        if not service_id or service_id == "default":
             if not type:
                 if default_service := self.services.get("default"):
                     return default_service
diff --git a/python/semantic_kernel/template_engine/blocks/var_block.py b/python/semantic_kernel/template_engine/blocks/var_block.py
index bb6c127c319b..2f05def84960 100644
--- a/python/semantic_kernel/template_engine/blocks/var_block.py
+++ b/python/semantic_kernel/template_engine/blocks/var_block.py
@@ -6,7 +6,7 @@
 
 from pydantic import model_validator
 
-from semantic_kernel.exceptions import VarBlockSyntaxError
+from semantic_kernel.exceptions import VarBlockRenderError, VarBlockSyntaxError
 from semantic_kernel.template_engine.blocks.block import Block
 from semantic_kernel.template_engine.blocks.block_types import BlockTypes
 from semantic_kernel.template_engine.blocks.symbols import Symbols
@@ -73,5 +73,10 @@ def render(self, _: "Kernel", arguments: Optional["KernelArguments"] = None) ->
         value = arguments.get(self.name, None)
         if value is None:
             logger.warning(f"Variable `{Symbols.VAR_PREFIX}: {self.name}` not found in the KernelArguments")
-
-        return str(value) if value else ""
+            return ""
+        try:
+            return str(value)
+        except Exception as e:
+            raise VarBlockRenderError(
+                f"Block {self.name} failed to be parsed to a string, type is {type(value)}"
+            ) from e
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index c1b1222fbd19..cc5f01d9631a 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -9,6 +9,7 @@
 
 import pytest
 
+from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.streaming_text_content import StreamingTextContent
 from semantic_kernel.events.function_invoked_event_args import FunctionInvokedEventArgs
 from semantic_kernel.events.function_invoking_event_args import FunctionInvokingEventArgs
@@ -120,6 +121,11 @@ def create_mock_function(name: str, value: str = "test") -> KernelFunction:
     return create_mock_function
 
 
+@pytest.fixture(scope="function")
+def chat_history():
+    return ChatHistory()
+
+
 @pytest.fixture(autouse=True)
 def enable_debug_mode():
     """Set `autouse=True` to enable easy debugging for tests.
diff --git a/python/tests/integration/completions/test_azure_oai_chat_service.py b/python/tests/integration/completions/test_azure_oai_chat_service.py
index 71dd26067061..192f7b4e9b24 100644
--- a/python/tests/integration/completions/test_azure_oai_chat_service.py
+++ b/python/tests/integration/completions/test_azure_oai_chat_service.py
@@ -209,28 +209,22 @@ async def test_azure_oai_chat_service_with_tool_call_streaming(setup_tldr_functi
 
     kernel.import_plugin_from_object(MathPlugin(), plugin_name="math")
 
+    # Create the prompt function
+    chat_func = kernel.create_function_from_prompt(prompt="{{$input}}", function_name="chat", plugin_name="chat")
     execution_settings = sk_oai.AzureChatPromptExecutionSettings(
         service_id="chat_completion",
         max_tokens=2000,
         temperature=0.7,
         top_p=0.8,
         tool_choice="auto",
-        tools=get_tool_call_object(kernel, {"exclude_plugin": ["ChatBot"]}),
+        tools=get_tool_call_object(kernel, {"exclude_plugin": ["chat"]}),
         auto_invoke_kernel_functions=True,
         max_auto_invoke_attempts=3,
     )
-
-    prompt_template_config = PromptTemplateConfig(
-        template="{{$input}}", description="Do math.", execution_settings=execution_settings
-    )
-
-    # Create the prompt function
-    tldr_function = kernel.create_function_from_prompt(
-        function_name="math_fun", plugin_name="math_int_test", prompt_template_config=prompt_template_config
-    )
+    arguments = KernelArguments(input="what is 1+1?", settings=execution_settings)
 
     result = None
-    async for message in kernel.invoke_stream(tldr_function, input="what is 1+1?"):
+    async for message in kernel.invoke_stream(chat_func, arguments=arguments):
         result = message[0] if not result else result + message[0]
     output = str(result)
 
diff --git a/python/tests/unit/connectors/open_ai/contents/conftest.py b/python/tests/unit/connectors/open_ai/contents/conftest.py
new file mode 100644
index 000000000000..68e063c493f1
--- /dev/null
+++ b/python/tests/unit/connectors/open_ai/contents/conftest.py
@@ -0,0 +1,14 @@
+from pytest import fixture
+
+from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall
+from semantic_kernel.connectors.ai.open_ai.contents.tool_calls import ToolCall
+
+
+@fixture(scope="module")
+def function_call():
+    return FunctionCall(name="Test-Function", arguments='{"input": "world"}')
+
+
+@fixture(scope="module")
+def tool_call(function_call: FunctionCall):
+    return ToolCall(id="1234", function=function_call)
diff --git a/python/tests/unit/connectors/open_ai/contents/test_function_call.py b/python/tests/unit/connectors/open_ai/contents/test_function_call.py
index 5cb1b85e476e..52ad15e6c1d8 100644
--- a/python/tests/unit/connectors/open_ai/contents/test_function_call.py
+++ b/python/tests/unit/connectors/open_ai/contents/test_function_call.py
@@ -1,28 +1,98 @@
 import pytest
 
 from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall
+from semantic_kernel.exceptions.content_exceptions import (
+    FunctionCallInvalidArgumentsException,
+    FunctionCallInvalidNameException,
+)
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 
 
-def test_function_call():
-    # Test initialization with default values
-    fc = FunctionCall(name="Test-Function", arguments="""{"input": "world"}""", id="1234")
-    assert fc.name == "Test-Function"
-    assert fc.arguments == """{"input": "world"}"""
-    assert fc.id == "1234"
+def test_function_call(function_call: FunctionCall):
+    assert function_call.name == "Test-Function"
+    assert function_call.arguments == """{"input": "world"}"""
 
 
-@pytest.mark.asyncio
-async def test_function_call_to_kernel_arguments():
+def test_add(function_call: FunctionCall):
+    # Test adding two function calls
+    fc2 = FunctionCall(name="Test-Function", arguments="""{"input2": "world2"}""")
+    fc3 = function_call + fc2
+    assert fc3.name == "Test-Function"
+    assert fc3.arguments == """{"input": "world"}{"input2": "world2"}"""
+
+
+def test_add_none(function_call: FunctionCall):
+    # Test adding two function calls with one being None
+    fc2 = None
+    fc3 = function_call + fc2
+    assert fc3.name == "Test-Function"
+    assert fc3.arguments == """{"input": "world"}"""
+
+
+def test_parse_arguments(function_call: FunctionCall):
+    # Test parsing arguments to dictionary
+    assert function_call.parse_arguments() == {"input": "world"}
+
+
+def test_parse_arguments_none():
+    # Test parsing arguments to dictionary
+    fc = FunctionCall(name="Test-Function")
+    assert fc.parse_arguments() is None
+
+
+def test_parse_arguments_fail():
+    # Test parsing arguments to dictionary
+    fc = FunctionCall(name="Test-Function", arguments="""{"input": "world}""")
+    with pytest.raises(FunctionCallInvalidArgumentsException):
+        fc.parse_arguments()
+
+
+def test_to_kernel_arguments(function_call: FunctionCall):
     # Test parsing arguments to variables
     arguments = KernelArguments()
-    func_call = FunctionCall(
-        name="Test-Function",
-        arguments="""{"input": "world", "input2": "world2"}""",
-        id="1234",
-    )
-    assert isinstance(func_call.to_kernel_arguments(), KernelArguments)
-
-    arguments.update(func_call.to_kernel_arguments())
+    assert isinstance(function_call.to_kernel_arguments(), KernelArguments)
+    arguments.update(function_call.to_kernel_arguments())
     assert arguments["input"] == "world"
-    assert arguments["input2"] == "world2"
+
+
+def test_to_kernel_arguments_none():
+    # Test parsing arguments to variables
+    fc = FunctionCall(name="Test-Function")
+    assert fc.to_kernel_arguments() == KernelArguments()
+
+
+def test_split_name(function_call: FunctionCall):
+    # Test splitting the name into plugin and function name
+    assert function_call.split_name() == ["Test", "Function"]
+
+
+def test_split_name_name_only():
+    # Test splitting the name into plugin and function name
+    fc = FunctionCall(name="Function")
+    assert fc.split_name() == ["", "Function"]
+
+
+def test_split_name_dict(function_call: FunctionCall):
+    # Test splitting the name into plugin and function name
+    assert function_call.split_name_dict() == {"plugin_name": "Test", "function_name": "Function"}
+
+
+def test_split_name_none():
+    fc = FunctionCall(id="1234")
+    with pytest.raises(FunctionCallInvalidNameException):
+        fc.split_name()
+
+
+def test_fc_dump(function_call: FunctionCall):
+    # Test dumping the function call to dictionary
+    dumped = function_call.model_dump()
+    assert dumped == {
+        "name": "Test-Function",
+        "arguments": '{"input": "world"}',
+    }
+
+
+def test_fc_dump_json(function_call: FunctionCall):
+    # Test dumping the function call to dictionary
+    dumped = function_call.model_dump_json()
+    assert dumped == """{"name":"Test-Function","arguments":"{\\"input\\": \\"world\\"}"}"""
diff --git a/python/tests/unit/connectors/open_ai/contents/test_tool_call.py b/python/tests/unit/connectors/open_ai/contents/test_tool_call.py
new file mode 100644
index 000000000000..02e722ce2dbe
--- /dev/null
+++ b/python/tests/unit/connectors/open_ai/contents/test_tool_call.py
@@ -0,0 +1,38 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+
+from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall
+from semantic_kernel.connectors.ai.open_ai.contents.tool_calls import ToolCall
+
+
+def test_tool_call(tool_call: ToolCall):
+    assert tool_call.id == "1234"
+    assert tool_call.type == "function"
+    assert tool_call.function is not None
+
+
+def test_add(tool_call: ToolCall):
+    # Test adding two tool calls
+    tool_call2 = ToolCall(id="5678", function=FunctionCall(name="Test-Function", arguments="""{"input2": "world2"}"""))
+    tool_call3 = tool_call + tool_call2
+    assert tool_call3.id == "1234"
+    assert tool_call3.type == "function"
+    assert tool_call3.function.name == "Test-Function"
+    assert tool_call3.function.arguments == """{"input": "world"}{"input2": "world2"}"""
+
+
+def test_add_none(tool_call: ToolCall):
+    # Test adding two tool calls with one being None
+    tool_call2 = None
+    tool_call3 = tool_call + tool_call2
+    assert tool_call3.id == "1234"
+    assert tool_call3.type == "function"
+    assert tool_call3.function.name == "Test-Function"
+    assert tool_call3.function.arguments == """{"input": "world"}"""
+
+
+def test_dump_json(tool_call: ToolCall):
+    assert (
+        tool_call.model_dump_json()
+        == """{"id":"1234","type":"function","function":{"name":"Test-Function","arguments":"{\\"input\\": \\"world\\"}"}}"""  # noqa: E501
+    )
diff --git a/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py b/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py
index 8f4bf277a97f..f6437bc7d511 100644
--- a/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py
+++ b/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py
@@ -151,15 +151,15 @@ def test_azure_chat_completion_init_with_base_url() -> None:
 
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
-async def test_azure_chat_completion_call_with_parameters(mock_create) -> None:
+async def test_azure_chat_completion_call_with_parameters(
+    mock_create, kernel: Kernel, chat_history: ChatHistory
+) -> None:
     deployment_name = "test_deployment"
     endpoint = "https://test-endpoint.com"
     api_key = "test_api_key"
     api_version = "2023-03-15-preview"
-    messages = ChatHistory()
-    messages.add_user_message("hello world")
+    chat_history.add_user_message("hello world")
     complete_prompt_execution_settings = AzureChatPromptExecutionSettings(service_id="test_service_id")
-    kernel = Kernel()
 
     azure_chat_completion = AzureChatCompletion(
         deployment_name=deployment_name,
@@ -168,7 +168,7 @@ async def test_azure_chat_completion_call_with_parameters(mock_create) -> None:
         api_key=api_key,
     )
     await azure_chat_completion.complete_chat(
-        chat_history=messages, settings=complete_prompt_execution_settings, kernel=kernel
+        chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel
     )
     mock_create.assert_awaited_once_with(
         model=deployment_name,
@@ -180,24 +180,22 @@ async def test_azure_chat_completion_call_with_parameters(mock_create) -> None:
         stream=False,
         temperature=complete_prompt_execution_settings.temperature,
         top_p=complete_prompt_execution_settings.top_p,
-        messages=azure_chat_completion._prepare_chat_history_for_request(messages),
+        messages=azure_chat_completion._prepare_chat_history_for_request(chat_history),
     )
 
 
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
 async def test_azure_chat_completion_call_with_parameters_and_Logit_Bias_Defined(
-    mock_create,
+    mock_create, kernel: Kernel, chat_history: ChatHistory
 ) -> None:
     deployment_name = "test_deployment"
     endpoint = "https://test-endpoint.com"
     api_key = "test_api_key"
     api_version = "2023-03-15-preview"
-    kernel = Kernel()
 
     prompt = "hello world"
-    messages = ChatHistory()
-    messages.add_user_message(prompt)
+    chat_history.add_user_message(prompt)
     complete_prompt_execution_settings = AzureChatPromptExecutionSettings()
 
     token_bias = {"1": -100}
@@ -211,12 +209,12 @@ async def test_azure_chat_completion_call_with_parameters_and_Logit_Bias_Defined
     )
 
     await azure_chat_completion.complete_chat(
-        chat_history=messages, settings=complete_prompt_execution_settings, kernel=kernel
+        chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel
     )
 
     mock_create.assert_awaited_once_with(
         model=deployment_name,
-        messages=azure_chat_completion._prepare_chat_history_for_request(messages),
+        messages=azure_chat_completion._prepare_chat_history_for_request(chat_history),
         temperature=complete_prompt_execution_settings.temperature,
         top_p=complete_prompt_execution_settings.top_p,
         n=complete_prompt_execution_settings.number_of_responses,
@@ -304,20 +302,18 @@ def test_azure_chat_completion_serialize() -> None:
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
 async def test_azure_chat_completion_with_data_call_with_parameters(
-    mock_create,
+    mock_create, kernel: Kernel, chat_history: ChatHistory
 ) -> None:
     deployment_name = "test_deployment"
     endpoint = "https://test-endpoint.com"
     api_key = "test_api_key"
     api_version = "2023-03-15-preview"
     prompt = "hello world"
-    messages_in = ChatHistory()
+    messages_in = chat_history
     messages_in.add_user_message(prompt)
     messages_out = ChatHistory()
     messages_out.add_user_message(prompt)
 
-    kernel = Kernel()
-
     expected_data_settings = {
         "dataSources": [
             {
@@ -363,17 +359,14 @@ async def test_azure_chat_completion_with_data_call_with_parameters(
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
 async def test_azure_chat_completion_call_with_data_parameters_and_function_calling(
-    mock_create,
+    mock_create, kernel: Kernel, chat_history: ChatHistory
 ) -> None:
     deployment_name = "test_deployment"
     endpoint = "https://test-endpoint.com"
     api_key = "test_api_key"
     api_version = "2023-03-15-preview"
     prompt = "hello world"
-    messages = ChatHistory()
-    messages.add_user_message(prompt)
-
-    kernel = Kernel()
+    chat_history.add_user_message(prompt)
 
     ai_source = AzureAISearchDataSources(indexName="test-index", endpoint="test-endpoint", key="test-key")
     extra = ExtraBody(data_sources=[AzureDataSources(type="AzureCognitiveSearch", parameters=ai_source)])
@@ -394,7 +387,7 @@ async def test_azure_chat_completion_call_with_data_parameters_and_function_call
     )
 
     await azure_chat_completion.complete_chat(
-        chat_history=messages,
+        chat_history=chat_history,
         settings=complete_prompt_execution_settings,
         kernel=kernel,
     )
@@ -403,7 +396,7 @@ async def test_azure_chat_completion_call_with_data_parameters_and_function_call
 
     mock_create.assert_awaited_once_with(
         model=deployment_name,
-        messages=azure_chat_completion._prepare_chat_history_for_request(messages),
+        messages=azure_chat_completion._prepare_chat_history_for_request(chat_history),
         temperature=complete_prompt_execution_settings.temperature,
         top_p=complete_prompt_execution_settings.top_p,
         n=complete_prompt_execution_settings.number_of_responses,
@@ -421,18 +414,15 @@ async def test_azure_chat_completion_call_with_data_parameters_and_function_call
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
 async def test_azure_chat_completion_call_with_data_with_parameters_and_Stop_Defined(
-    mock_create,
+    mock_create, kernel: Kernel, chat_history: ChatHistory
 ) -> None:
     deployment_name = "test_deployment"
     endpoint = "https://test-endpoint.com"
     api_key = "test_api_key"
     api_version = "2023-03-15-preview"
-    messages = ChatHistory()
-    messages.add_user_message("hello world")
+    chat_history.add_user_message("hello world")
     complete_prompt_execution_settings = AzureChatPromptExecutionSettings()
 
-    kernel = Kernel()
-
     stop = ["!"]
     complete_prompt_execution_settings.stop = stop
 
@@ -449,13 +439,13 @@ async def test_azure_chat_completion_call_with_data_with_parameters_and_Stop_Def
         use_extensions=True,
     )
 
-    await azure_chat_completion.complete_chat(messages, complete_prompt_execution_settings, kernel=kernel)
+    await azure_chat_completion.complete_chat(chat_history, complete_prompt_execution_settings, kernel=kernel)
 
     expected_data_settings = extra.model_dump(exclude_none=True, by_alias=True)
 
     mock_create.assert_awaited_once_with(
         model=deployment_name,
-        messages=azure_chat_completion._prepare_chat_history_for_request(messages),
+        messages=azure_chat_completion._prepare_chat_history_for_request(chat_history),
         temperature=complete_prompt_execution_settings.temperature,
         top_p=complete_prompt_execution_settings.top_p,
         n=complete_prompt_execution_settings.number_of_responses,
@@ -485,19 +475,16 @@ async def test_azure_chat_completion_call_with_data_with_parameters_and_Stop_Def
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create")
 async def test_azure_chat_completion_content_filtering_raises_correct_exception(
-    mock_create,
+    mock_create, kernel: Kernel, chat_history: ChatHistory
 ) -> None:
     deployment_name = "test_deployment"
     endpoint = "https://test-endpoint.com"
     api_key = "test_api_key"
     api_version = "2023-03-15-preview"
     prompt = "some prompt that would trigger the content filtering"
-    messages = ChatHistory()
-    messages.add_user_message(prompt)
+    chat_history.add_user_message(prompt)
     complete_prompt_execution_settings = AzureChatPromptExecutionSettings()
 
-    kernel = Kernel()
-
     mock_create.side_effect = openai.BadRequestError(
         CONTENT_FILTERED_ERROR_FULL_MESSAGE,
         response=Response(400, request=Request("POST", endpoint)),
@@ -527,7 +514,7 @@ async def test_azure_chat_completion_content_filtering_raises_correct_exception(
     )
 
     with pytest.raises(ContentFilterAIException, match="service encountered a content error") as exc_info:
-        await azure_chat_completion.complete_chat(messages, complete_prompt_execution_settings, kernel=kernel)
+        await azure_chat_completion.complete_chat(chat_history, complete_prompt_execution_settings, kernel=kernel)
 
     content_filter_exc = exc_info.value
     assert content_filter_exc.param == "prompt"
@@ -538,19 +525,16 @@ async def test_azure_chat_completion_content_filtering_raises_correct_exception(
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create")
 async def test_azure_chat_completion_content_filtering_without_response_code_raises_with_default_code(
-    mock_create,
+    mock_create, kernel: Kernel, chat_history: ChatHistory
 ) -> None:
     deployment_name = "test_deployment"
     endpoint = "https://test-endpoint.com"
     api_key = "test_api_key"
     api_version = "2023-03-15-preview"
     prompt = "some prompt that would trigger the content filtering"
-    messages = ChatHistory()
-    messages.add_user_message(prompt)
+    chat_history.add_user_message(prompt)
     complete_prompt_execution_settings = AzureChatPromptExecutionSettings()
 
-    kernel = Kernel()
-
     mock_create.side_effect = openai.BadRequestError(
         CONTENT_FILTERED_ERROR_FULL_MESSAGE,
         response=Response(400, request=Request("POST", endpoint)),
@@ -579,25 +563,22 @@ async def test_azure_chat_completion_content_filtering_without_response_code_rai
     )
 
     with pytest.raises(ContentFilterAIException, match="service encountered a content error"):
-        await azure_chat_completion.complete_chat(messages, complete_prompt_execution_settings, kernel=kernel)
+        await azure_chat_completion.complete_chat(chat_history, complete_prompt_execution_settings, kernel=kernel)
 
 
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create")
 async def test_azure_chat_completion_bad_request_non_content_filter(
-    mock_create,
+    mock_create, kernel: Kernel, chat_history: ChatHistory
 ) -> None:
     deployment_name = "test_deployment"
     endpoint = "https://test-endpoint.com"
     api_key = "test_api_key"
     api_version = "2023-03-15-preview"
     prompt = "some prompt that would trigger the content filtering"
-    messages = ChatHistory()
-    messages.add_user_message(prompt)
+    chat_history.add_user_message(prompt)
     complete_prompt_execution_settings = AzureChatPromptExecutionSettings()
 
-    kernel = Kernel()
-
     mock_create.side_effect = openai.BadRequestError(
         "The request was bad.", response=Response(400, request=Request("POST", endpoint)), body={}
     )
@@ -610,22 +591,19 @@ async def test_azure_chat_completion_bad_request_non_content_filter(
     )
 
     with pytest.raises(ServiceResponseException, match="service failed to complete the prompt"):
-        await azure_chat_completion.complete_chat(messages, complete_prompt_execution_settings, kernel=kernel)
+        await azure_chat_completion.complete_chat(chat_history, complete_prompt_execution_settings, kernel=kernel)
 
 
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create")
-async def test_azure_chat_completion_no_kernel_provided_throws_error(
-    mock_create,
-) -> None:
+async def test_azure_chat_completion_no_kernel_provided_throws_error(mock_create, chat_history: ChatHistory) -> None:
     deployment_name = "test_deployment"
     endpoint = "https://test-endpoint.com"
     api_key = "test_api_key"
     api_version = "2023-03-15-preview"
     prompt = "some prompt that would trigger the content filtering"
-    messages = ChatHistory()
-    messages.add_user_message(prompt)
-    complete_prompt_execution_settings = AzureChatPromptExecutionSettings()
+    chat_history.add_user_message(prompt)
+    complete_prompt_execution_settings = AzureChatPromptExecutionSettings(auto_invoke_kernel_functions=True)
 
     mock_create.side_effect = openai.BadRequestError(
         "The request was bad.", response=Response(400, request=Request("POST", endpoint)), body={}
@@ -641,4 +619,4 @@ async def test_azure_chat_completion_no_kernel_provided_throws_error(
     with pytest.raises(
         ServiceInvalidExecutionSettingsError, match="The kernel argument is required for OpenAI tool calling"
     ):
-        await azure_chat_completion.complete_chat(messages, complete_prompt_execution_settings)
+        await azure_chat_completion.complete_chat(chat_history, complete_prompt_execution_settings)
diff --git a/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py b/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py
index 06ba6117f5a6..423b6b2d490a 100644
--- a/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py
+++ b/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py
@@ -11,28 +11,26 @@
     OpenAIStreamingChatMessageContent,
 )
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletionBase
+from semantic_kernel.connectors.ai.open_ai.services.tool_call_behavior import ToolCallBehavior
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.kernel import Kernel
 
 
 async def mock_async_process_chat_stream_response(arg1, response, tool_call_behavior, chat_history, kernel):
     mock_content = MagicMock(spec=OpenAIStreamingChatMessageContent)
-    yield [mock_content]
+    yield [mock_content], None
 
 
 @pytest.mark.asyncio
-async def test_complete_chat_stream():
+async def test_complete_chat_stream(kernel: Kernel):
     chat_history = MagicMock()
     settings = MagicMock()
     mock_response = MagicMock()
 
     with patch(
-        "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._get_auto_invoke_execution_settings",
-        return_value=(True, 3),
+        "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._get_tool_call_behavior",
+        return_value=ToolCallBehavior(auto_invoke_kernel_functions=True, max_auto_invoke_attempts=3),
     ) as settings_mock, patch(
-        "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._validate_kernel_for_tool_calling",
-        return_value=MagicMock(),
-    ) as validate_kernel_mock, patch(
         "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._prepare_settings",
         return_value=settings,
     ) as prepare_settings_mock, patch(
@@ -42,8 +40,6 @@ async def test_complete_chat_stream():
         "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._process_chat_stream_response",
         new_callable=lambda: mock_async_process_chat_stream_response,
     ):
-        kernel = Kernel()
-
         chat_completion_base = OpenAIChatCompletionBase(
             ai_model_id="test_model_id", service_id="test", client=MagicMock(spec=AsyncOpenAI)
         )
@@ -52,25 +48,21 @@ async def test_complete_chat_stream():
             assert content is not None
 
         settings_mock.assert_called_once_with(settings)
-        validate_kernel_mock.assert_called_once_with(kernel=kernel)
         prepare_settings_mock.assert_called_with(settings, chat_history, stream_request=True)
         mock_send_chat_stream_request.assert_called_with(settings)
 
 
 @pytest.mark.parametrize("tool_call", [False, True])
 @pytest.mark.asyncio
-async def test_complete_chat(tool_call):
+async def test_complete_chat(tool_call, kernel: Kernel):
     chat_history = MagicMock()
     settings = MagicMock()
     mock_message_content = MagicMock(spec=List[OpenAIChatMessageContent])
 
     with patch(
-        "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._get_auto_invoke_execution_settings",
-        return_value=(True, 3),
+        "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._get_tool_call_behavior",
+        return_value=ToolCallBehavior(auto_invoke_kernel_functions=True, max_auto_invoke_attempts=3),
     ) as settings_mock, patch(
-        "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._validate_kernel_for_tool_calling",
-        return_value=MagicMock(),
-    ) as validate_kernel_mock, patch(
         "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._prepare_settings",
         return_value=settings,
     ) as prepare_settings_mock, patch(
@@ -82,8 +74,6 @@ async def test_complete_chat(tool_call):
     ), patch(
         "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._process_chat_response_with_tool_call",
     ) as mock_process_chat_response_with_tool_call:
-        kernel = Kernel()
-
         chat_completion_base = OpenAIChatCompletionBase(
             ai_model_id="test_model_id", service_id="test", client=MagicMock(spec=AsyncOpenAI)
         )
@@ -96,7 +86,6 @@ async def test_complete_chat(tool_call):
             assert result is not None
 
         settings_mock.assert_called_once_with(settings)
-        validate_kernel_mock.assert_called_once_with(kernel=kernel)
         prepare_settings_mock.assert_called_with(settings, chat_history, stream_request=False)
         mock_send_chat_request.assert_called_with(settings)
         if tool_call:
@@ -142,28 +131,48 @@ async def test_process_tool_calls():
 
 
 @pytest.mark.parametrize(
-    "completions,auto_invoke_kernel_functions,expected_result",
+    "completions,tool_call_behavior,expected_result",
     [
         # Case 1: Empty completions, auto_invoke_kernel_functions=False
-        ([], False, True),
+        ([], ToolCallBehavior(auto_invoke_kernel_functions=False), True),
         # Case 2: Completions with OpenAIChatMessageContent, auto_invoke_kernel_functions=True
-        ([MagicMock(spec=OpenAIChatMessageContent)], True, True),
+        ([MagicMock(spec=OpenAIChatMessageContent)], ToolCallBehavior(auto_invoke_kernel_functions=True), True),
         # Case 3: Completions with OpenAIChatMessageContent, no tool_calls, auto_invoke_kernel_functions=True
-        ([MagicMock(spec=OpenAIChatMessageContent, tool_calls=[])], True, True),
+        (
+            [MagicMock(spec=OpenAIChatMessageContent, tool_calls=[])],
+            ToolCallBehavior(auto_invoke_kernel_functions=True),
+            True,
+        ),
         # Case 4: Completions with OpenAIStreamingChatMessageContent, auto_invoke_kernel_functions=True
-        ([MagicMock(spec=OpenAIStreamingChatMessageContent)], True, True),
+        (
+            [MagicMock(spec=OpenAIStreamingChatMessageContent)],
+            ToolCallBehavior(auto_invoke_kernel_functions=True),
+            True,
+        ),
         # Case 5: Completions with OpenAIStreamingChatMessageContent, auto_invoke_kernel_functions=False
-        ([MagicMock(spec=OpenAIStreamingChatMessageContent)], False, True),
+        (
+            [MagicMock(spec=OpenAIStreamingChatMessageContent)],
+            ToolCallBehavior(auto_invoke_kernel_functions=False),
+            True,
+        ),
         # Case 6: Completions with both types, auto_invoke_kernel_functions=True
-        ([MagicMock(spec=OpenAIChatMessageContent), MagicMock(spec=OpenAIStreamingChatMessageContent)], True, True),
+        (
+            [MagicMock(spec=OpenAIChatMessageContent), MagicMock(spec=OpenAIStreamingChatMessageContent)],
+            ToolCallBehavior(auto_invoke_kernel_functions=True),
+            True,
+        ),
         # Case 7: Completions with OpenAIChatMessageContent with tool_calls, auto_invoke_kernel_functions=True
-        ([MagicMock(spec=OpenAIChatMessageContent, tool_calls=[{}])], True, False),
+        (
+            [MagicMock(spec=OpenAIChatMessageContent, tool_calls=[{}])],
+            ToolCallBehavior(auto_invoke_kernel_functions=True),
+            False,
+        ),
     ],
 )
 @pytest.mark.asyncio
-async def test_should_return_completions_response(completions, auto_invoke_kernel_functions, expected_result):
+async def test_should_return_completions_response(completions, tool_call_behavior, expected_result):
     chat_completion_base = OpenAIChatCompletionBase(
         ai_model_id="test_model_id", service_id="test", client=MagicMock(spec=AsyncOpenAI)
     )
-    result = chat_completion_base._should_return_completions_response(completions, auto_invoke_kernel_functions)
+    result = chat_completion_base._should_return_completions_response(completions, tool_call_behavior)
     assert result == expected_result
diff --git a/python/tests/unit/contents/test_chat_history.py b/python/tests/unit/contents/test_chat_history.py
index 81856c3aa1b2..bf0c1293e335 100644
--- a/python/tests/unit/contents/test_chat_history.py
+++ b/python/tests/unit/contents/test_chat_history.py
@@ -15,11 +15,6 @@
 from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig
 
 
-@pytest.fixture(scope="function")
-def chat_history():
-    return ChatHistory()
-
-
 def test_init_with_system_message_only():
     system_msg = "test message"
     chat_history = ChatHistory(system_message=system_msg)