Skip to content

Commit

Permalink
Python: fix for streaming openai responses, and first parts of fixes …
Browse files Browse the repository at this point in the history
…for Chat With Your Data (#5387)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone 😄
  • Loading branch information
eavanvalkenburg authored Mar 8, 2024
1 parent bf5d21c commit 555a7c8
Show file tree
Hide file tree
Showing 31 changed files with 523 additions and 354 deletions.
33 changes: 11 additions & 22 deletions python/samples/kernel-syntax-examples/azure_chat_gpt_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@
import asyncio
import logging

from dotenv import load_dotenv

import semantic_kernel as sk
import semantic_kernel.connectors.ai.open_ai as sk_oai
from semantic_kernel.contents.chat_history import ChatHistory
from semantic_kernel.prompt_template.input_variable import InputVariable
from semantic_kernel.utils.settings import azure_openai_settings_from_dot_env_as_dict

logging.basicConfig(level=logging.INFO)

load_dotenv()
logging.basicConfig(level=logging.WARNING)

system_message = """
You are a chat bot. Your name is Mosscap and
Expand Down Expand Up @@ -45,31 +40,25 @@

## The second method is useful when you are using a single service, and you want to have type checking on the request settings or when you are using multiple instances of the same type of service, for instance gpt-35-turbo and gpt-4, both in openai and both for chat. # noqa: E501 E266
## 3. create the request settings from the kernel based on the registered service class: # noqa: E266
req_settings = kernel.get_service(service_id).get_prompt_execution_settings_class()(service_id=service_id)
req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
req_settings.max_tokens = 2000
req_settings.temperature = 0.7
req_settings.top_p = 0.8
req_settings.auto_invoke_kernel_functions = True
## The third method is the most specific as the returned request settings class is the one that is registered for the service and has some fields already filled in, like the service_id and ai_model_id. # noqa: E501 E266

prompt_template_config = sk.PromptTemplateConfig(
template=system_message
+ """ Summarize the on-going chat history: {{$chat_history}} and respond to this statement: {{$request}}""",
name="chat",
input_variables=[
InputVariable(name="request", description="The user input", is_required=True),
InputVariable(name="chat_history", description="The history of the conversation", is_required=True),
],
execution_settings=req_settings,

chat_function = kernel.create_function_from_prompt(
prompt=system_message + """{{$chat_history}}{{$user_input}}""",
function_name="chat",
plugin_name="chat",
prompt_execution_settings=req_settings,
)

history = ChatHistory()
history.add_user_message("Hi there, who are you?")
history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")

chat_function = kernel.create_function_from_prompt(
function_name="chat", plugin_name="chat", prompt_template_config=prompt_template_config
)


async def chat() -> bool:
try:
Expand All @@ -89,7 +78,7 @@ async def chat() -> bool:
if stream:
answer = kernel.invoke_stream(
chat_function,
request=user_input,
user_input=user_input,
chat_history=history,
)
print("Mosscap:> ", end="")
Expand All @@ -99,7 +88,7 @@ async def chat() -> bool:
return True
answer = await kernel.invoke(
chat_function,
request=user_input,
user_input=user_input,
chat_history=history,
)
print(f"Mosscap:> {answer}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,37 @@

import semantic_kernel as sk
import semantic_kernel.connectors.ai.open_ai as sk_oai
from semantic_kernel.connectors.ai.open_ai.contents.azure_streaming_chat_message_content import (
AzureStreamingChatMessageContent,
)
from semantic_kernel.connectors.ai.open_ai.contents.azure_chat_message_content import AzureChatMessageContent
from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall
from semantic_kernel.connectors.ai.open_ai.contents.tool_calls import ToolCall
from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
AzureAISearchDataSources,
AzureChatPromptExecutionSettings,
AzureDataSources,
ExtraBody,
)
from semantic_kernel.contents.chat_history import ChatHistory
from semantic_kernel.contents.chat_role import ChatRole
from semantic_kernel.functions.kernel_arguments import KernelArguments
from semantic_kernel.prompt_template.input_variable import InputVariable
from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig
from semantic_kernel.utils.settings import (
azure_aisearch_settings_from_dot_env_as_dict,
azure_openai_settings_from_dot_env_as_dict,
)

kernel = sk.Kernel()

# Load Azure OpenAI Settings
deployment, api_key, endpoint = sk.azure_openai_settings_from_dot_env()
aoai_settings = azure_openai_settings_from_dot_env_as_dict()

# For example, AI Search index may contain the following document:

# Emily and David, two passionate scientists, met during a research expedition to Antarctica.
# Bonded by their love for the natural world and shared curiosity, they uncovered a
# groundbreaking phenomenon in glaciology that could potentially reshape our understanding of climate change.

azure_ai_search_settings = sk.azure_aisearch_settings_from_dot_env_as_dict()
azure_ai_search_settings = azure_aisearch_settings_from_dot_env_as_dict()

# Our example index has fields "source_title", "source_text", "source_url", and "source_file".
# Add fields mapping to the settings to indicate which fields to use for the title, content, URL, and file path.
Expand All @@ -49,35 +54,28 @@
# When using data, set use_extensions=True and use the 2023-12-01-preview API version.
chat_service = sk_oai.AzureChatCompletion(
service_id="chat-gpt",
deployment_name=deployment,
api_key=api_key,
endpoint=endpoint,
api_version="2023-12-01-preview",
use_extensions=True,
**aoai_settings,
)
kernel.add_service(chat_service)

prompt_template_config = PromptTemplateConfig(
template="{{$user_input}}",
template="{{$chat_history}}{{$user_input}}",
name="chat",
template_format="semantic-kernel",
input_variables=[
InputVariable(name="chat_history", description="The chat history", is_required=True),
InputVariable(name="request", description="The user input", is_required=True),
],
execution_settings={"default": req_settings},
)

chat = ChatHistory()

chat.add_user_message("Hi there, who are you?")
chat.add_assistant_message("I am an AI assistant here to answer your questions.")

arguments = KernelArguments()

chat_function = kernel.create_function_from_prompt(
plugin_name="ChatBot", function_name="Chat", prompt_template_config=prompt_template_config
)

chat_history = ChatHistory()
chat_history.add_system_message("I am an AI assistant here to answer your questions.")


async def chat() -> bool:
try:
Expand All @@ -96,20 +94,34 @@ async def chat() -> bool:
# Non streaming
# answer = await kernel.run(chat_function, input_vars=context_vars)
# print(f"Assistant:> {answer}")
arguments = KernelArguments(user_input=user_input, execution_settings=req_settings)
arguments = KernelArguments(chat_history=chat_history, user_input=user_input, execution_settings=req_settings)

full_message = None
print("Assistant:> ", end="")
async for message in kernel.invoke_stream(chat_function, arguments=arguments):
print(str(message[0]), end="")
full_message = message[0] if not full_message else full_message + message[0]
chat.add_assistant_message(str(full_message))
print("\n")

# The tool message containing cited sources is available in the context
if isinstance(full_message, AzureStreamingChatMessageContent):
chat.add_function_response_message(name="tool", content=full_message.tool_message)
print(f"Tool:> {full_message.tool_message}")
if full_message:
chat_history.add_user_message(user_input)
if hasattr(full_message, "tool_message"):
chat_history.add_message(
AzureChatMessageContent(
role="assistant",
tool_calls=[
ToolCall(
id="chat_with_your_data",
function=FunctionCall(name="chat_with_your_data", arguments=""),
)
],
)
)
chat_history.add_tool_message(full_message.tool_message, {"tool_call_id": "chat_with_your_data"})
if full_message.role is None:
full_message.role = ChatRole.ASSISTANT
chat_history.add_message(full_message)
return True


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@
# the format for that is 'PluginName-FunctionName', (i.e. 'math-Add').
# if the model or api version do not support this you will get an error.
prompt_template_config = PromptTemplateConfig(
template="{{$user_input}}",
template="{{$chat_history}}{{$user_input}}",
name="chat",
template_format="semantic-kernel",
input_variables=[
InputVariable(name="history", description="The history of the conversation", is_required=True),
InputVariable(name="chat_history", description="The history of the conversation", is_required=True),
InputVariable(name="user_input", description="The user input", is_required=True),
],
)
Expand Down Expand Up @@ -110,6 +110,7 @@ async def chat() -> bool:
print("\n\nExiting chat...")
return False

arguments["chat_history"] = history
arguments["user_input"] = user_input
answer = await kernel.invoke(
functions=chat_function,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,10 @@
from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
OpenAIPromptExecutionSettings,
)
from semantic_kernel.connectors.ai.open_ai.utils import (
get_tool_call_object,
)
from semantic_kernel.connectors.ai.open_ai.utils import get_tool_call_object
from semantic_kernel.contents.chat_history import ChatHistory
from semantic_kernel.core_plugins import MathPlugin, TimePlugin
from semantic_kernel.functions.kernel_arguments import KernelArguments
from semantic_kernel.prompt_template.input_variable import InputVariable

if TYPE_CHECKING:
from semantic_kernel.functions.kernel_function import KernelFunction
Expand Down Expand Up @@ -58,6 +55,11 @@
kernel.import_plugin_from_object(MathPlugin(), plugin_name="math")
kernel.import_plugin_from_object(TimePlugin(), plugin_name="time")

chat_function = kernel.create_function_from_prompt(
prompt="{{$chat_history}}{{$user_input}}",
plugin_name="ChatBot",
function_name="Chat",
)
# enabling or disabling function calling is done by setting the function_call parameter for the completion.
# when the function_call parameter is set to "auto" the model will decide which function to use, if any.
# if you only want to use a specific function, set the name of that function in this parameter,
Expand All @@ -68,6 +70,7 @@
# If configured to be greater than one, this value will be overridden to 1.
execution_settings = sk_oai.OpenAIChatPromptExecutionSettings(
service_id="chat",
ai_model_id="gpt-3.5-turbo-1106",
max_tokens=2000,
temperature=0.7,
top_p=0.8,
Expand All @@ -77,30 +80,13 @@
max_auto_invoke_attempts=3,
)

prompt_template_config = sk.PromptTemplateConfig(
template="{{$user_input}}",
name="chat",
template_format="semantic-kernel",
input_variables=[
InputVariable(name="user_input", description="The user input", is_required=True),
InputVariable(name="chat_history", description="The history of the conversation", is_required=True),
],
execution_settings={"chat": execution_settings},
)

history = ChatHistory()

history.add_system_message(system_message)
history.add_user_message("Hi there, who are you?")
history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")

arguments = KernelArguments()

chat_function = kernel.create_function_from_prompt(
prompt_template_config=prompt_template_config,
plugin_name="ChatBot",
function_name="Chat",
)
arguments = KernelArguments(settings=execution_settings)


def print_tool_calls(message: Union[OpenAIChatMessageContent, OpenAIStreamingChatMessageContent]) -> None:
Expand Down Expand Up @@ -138,7 +124,7 @@ async def handle_streaming(

print("Mosscap:> ", end="")
streamed_chunks: List[OpenAIStreamingChatMessageContent] = []
tool_call_ids_by_index: Dict[int, Any] = {}
tool_call_ids_by_index: Dict[str, Any] = {}

async for message in response:
if not execution_settings.auto_invoke_kernel_functions and isinstance(
Expand All @@ -147,11 +133,11 @@ async def handle_streaming(
streamed_chunks.append(message[0])
if message[0].tool_calls is not None:
for tc in message[0].tool_calls:
if tc.index not in tool_call_ids_by_index:
tool_call_ids_by_index[tc.index] = tc
if tc.id not in tool_call_ids_by_index:
tool_call_ids_by_index[tc.id] = tc
else:
for tc in message[0].tool_calls:
tool_call_ids_by_index[tc.index] += tc
tool_call_ids_by_index[tc.id] += tc
else:
print(str(message[0]), end="")

Expand All @@ -178,7 +164,7 @@ async def chat() -> bool:
print("\n\nExiting chat...")
return False

stream = False
stream = True
if stream:
await handle_streaming(kernel, chat_function, user_input, history, execution_settings)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ async def complete_chat(
self,
chat_history: "ChatHistory",
settings: "PromptExecutionSettings",
**kwargs: Dict[str, Any],
**kwargs: Any,
) -> List["ChatMessageContent"]:
"""
This is the method that is called from the kernel to get a response from a chat-optimized LLM.
Expand All @@ -43,7 +43,7 @@ async def complete_chat_stream(
self,
chat_history: "ChatHistory",
settings: "PromptExecutionSettings",
**kwargs: Dict[str, Any],
**kwargs: Any,
) -> AsyncIterable[List["StreamingChatMessageContent"]]:
"""
This is the method that is called from the kernel to get a stream response from a chat-optimized LLM.
Expand Down Expand Up @@ -82,5 +82,5 @@ def _prepare_chat_history_for_request(

def _chat_message_content_to_dict(self, message: ChatMessageContent) -> Dict[str, Optional[str]]:
"""can be overridden to customize the serialization of the chat message content"""
msg = message.model_dump(exclude_none=True, include=["role", "content"])
msg = message.model_dump(include=["role", "content"])
return msg
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ async def complete_chat(
self,
chat_history: ChatHistory,
settings: GooglePalmPromptExecutionSettings,
**kwargs: Dict[str, Any],
**kwargs: Any,
) -> List[ChatMessageContent]:
"""
This is the method that is called from the kernel to get a response from a chat-optimized LLM.
Expand Down Expand Up @@ -114,7 +114,7 @@ async def complete_chat_stream(
self,
messages: List[Tuple[str, str]],
settings: GooglePalmPromptExecutionSettings,
**kwargs: Dict[str, Any],
**kwargs: Any,
):
raise NotImplementedError("Google Palm API does not currently support streaming")

Expand Down
Loading

0 comments on commit 555a7c8

Please sign in to comment.