From 469092f35cb0d70062b63822d43ffc315a6fbf43 Mon Sep 17 00:00:00 2001 From: Evan Mattson <35585003+moonbox3@users.noreply.github.com> Date: Tue, 9 Jul 2024 14:32:06 -0400 Subject: [PATCH] Python: Enable mypy for the HuggingFace connectors. Increase unit test code coverage. (#7176) ### Motivation and Context We have mypy enabled on parts of the code base, but not all. The goal is to enable it across the entire SK python code. As part of this, we've broken up the work to tackle different sections. Additionally, we're working to improve the unit test code coverage for these sections of code. ### Description This PR: - turns on mypy for the HuggingFace connector - adds more unit test coverage for the text completion and embeddings connector to achieve >95% code coverage. - closes #7133 ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- python/mypy.ini | 4 - .../services/hf_text_completion.py | 60 ++++--- .../services/hf_text_embedding.py | 10 +- .../hugging_face/test_hf_text_completions.py | 153 +++++++++++++++++- .../hugging_face/test_hf_text_embedding.py | 66 ++++++++ 5 files changed, 258 insertions(+), 35 deletions(-) create mode 100644 python/tests/unit/connectors/hugging_face/test_hf_text_embedding.py diff --git a/python/mypy.ini b/python/mypy.ini index 9505beba81df..30d9947c2100 100644 --- a/python/mypy.ini +++ b/python/mypy.ini @@ -21,10 +21,6 @@ ignore_errors = true ignore_errors = true # TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7132 -[mypy-semantic_kernel.connectors.ai.hugging_face.*] -ignore_errors = true -# TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7133 - [mypy-semantic_kernel.connectors.ai.ollama.*] ignore_errors = true # TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7134 diff --git a/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py b/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py index 05465ef607a6..61dd1554ec9d 100644 --- a/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py +++ b/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py @@ -1,22 +1,26 @@ # Copyright (c) Microsoft. All rights reserved. import logging +import sys from collections.abc import AsyncGenerator from threading import Thread -from typing import TYPE_CHECKING, Any, Literal +from typing import Any, Literal + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover import torch from transformers import AutoTokenizer, TextIteratorStreamer, pipeline from semantic_kernel.connectors.ai.hugging_face.hf_prompt_execution_settings import HuggingFacePromptExecutionSettings +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.contents.text_content import TextContent from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError, ServiceResponseException -if TYPE_CHECKING: - from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings - logger: logging.Logger = logging.getLogger(__name__) @@ -29,7 +33,7 @@ def __init__( self, ai_model_id: str, task: str | None = "text2text-generation", - device: int | None = -1, + device: int = -1, service_id: str | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_kwargs: dict[str, Any] | None = None, @@ -39,22 +43,21 @@ def __init__( Args: ai_model_id (str): Hugging Face model card string, see https://huggingface.co/models - device (Optional[int]): Device to run the model on, defaults to CPU, 0+ for GPU, - -- None if using device_map instead. (If both device and device_map - are specified, device overrides device_map. If unintended, - it can lead to unexpected behavior.) - service_id (Optional[str]): Service ID for the AI service. - task (Optional[str]): Model completion task type, options are: + device (int): Device to run the model on, defaults to CPU, 0+ for GPU, + -- None if using device_map instead. (If both device and device_map + are specified, device overrides device_map. If unintended, + it can lead to unexpected behavior.) (optional) + service_id (str): Service ID for the AI service. (optional) + task (str): Model completion task type, options are: - summarization: takes a long text and returns a shorter summary. - text-generation: takes incomplete text and returns a set of completion candidates. - text2text-generation (default): takes an input prompt and returns a completion. - text2text-generation is the default as it behaves more like GPT-3+. - log : Logger instance. (Deprecated) - model_kwargs (Optional[Dict[str, Any]]): Additional dictionary of keyword arguments - passed along to the model's `from_pretrained(..., **model_kwargs)` function. - pipeline_kwargs (Optional[Dict[str, Any]]): Additional keyword arguments passed along + text2text-generation is the default as it behaves more like GPT-3+. (optional) + model_kwargs (dict[str, Any]): Additional dictionary of keyword arguments + passed along to the model's `from_pretrained(..., **model_kwargs)` function. (optional) + pipeline_kwargs (dict[str, Any]): Additional keyword arguments passed along to the specific pipeline init (see the documentation for the corresponding pipeline class - for possible values). + for possible values). (optional) Note that this model will be downloaded from the Hugging Face model hub. """ @@ -65,18 +68,19 @@ def __init__( model_kwargs=model_kwargs, **pipeline_kwargs or {}, ) + resolved_device = f"cuda:{device}" if device >= 0 and torch.cuda.is_available() else "cpu" super().__init__( service_id=service_id, ai_model_id=ai_model_id, task=task, - device=(f"cuda:{device}" if device >= 0 and torch.cuda.is_available() else "cpu"), + device=resolved_device, generator=generator, ) async def get_text_contents( self, prompt: str, - settings: HuggingFacePromptExecutionSettings, + settings: PromptExecutionSettings, ) -> list[TextContent]: """This is the method that is called from the kernel to get a response from a text-optimized LLM. @@ -87,10 +91,14 @@ async def get_text_contents( Returns: List[TextContent]: A list of TextContent objects representing the response(s) from the LLM. """ + if not isinstance(settings, HuggingFacePromptExecutionSettings): + settings = self.get_prompt_execution_settings_from_settings(settings) + assert isinstance(settings, HuggingFacePromptExecutionSettings) # nosec + try: results = self.generator(prompt, **settings.prepare_settings_dict()) except Exception as e: - raise ServiceResponseException("Hugging Face completion failed", e) from e + raise ServiceResponseException("Hugging Face completion failed") from e if isinstance(results, list): return [self._create_text_content(results, result) for result in results] return [self._create_text_content(results, results)] @@ -105,7 +113,7 @@ def _create_text_content(self, response: Any, candidate: dict[str, str]) -> Text async def get_streaming_text_contents( self, prompt: str, - settings: HuggingFacePromptExecutionSettings, + settings: PromptExecutionSettings, ) -> AsyncGenerator[list[StreamingTextContent], Any]: """Streams a text completion using a Hugging Face model. @@ -118,6 +126,10 @@ async def get_streaming_text_contents( Yields: List[StreamingTextContent]: List of StreamingTextContent objects. """ + if not isinstance(settings, HuggingFacePromptExecutionSettings): + settings = self.get_prompt_execution_settings_from_settings(settings) + assert isinstance(settings, HuggingFacePromptExecutionSettings) # nosec + if settings.num_return_sequences > 1: raise ServiceInvalidExecutionSettingsError( "HuggingFace TextIteratorStreamer does not stream multiple responses in a parseable format. \ @@ -139,10 +151,10 @@ async def get_streaming_text_contents( ] thread.join() - except Exception as e: - raise ServiceResponseException("Hugging Face completion failed", e) from e + raise ServiceResponseException("Hugging Face completion failed") from e - def get_prompt_execution_settings_class(self) -> "PromptExecutionSettings": + @override + def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]: """Create a request settings object.""" return HuggingFacePromptExecutionSettings diff --git a/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py b/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py index fd54c14d7e4f..057ec5be46dd 100644 --- a/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py +++ b/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py @@ -5,9 +5,9 @@ from typing import Any if sys.version_info >= (3, 12): - from typing import override + from typing import override # pragma: no cover else: - from typing_extensions import override + from typing_extensions import override # pragma: no cover import sentence_transformers import torch @@ -28,7 +28,7 @@ class HuggingFaceTextEmbedding(EmbeddingGeneratorBase): def __init__( self, ai_model_id: str, - device: int | None = -1, + device: int = -1, service_id: str | None = None, ) -> None: """Initializes a new instance of the HuggingFaceTextEmbedding class. @@ -36,8 +36,8 @@ def __init__( Args: ai_model_id (str): Hugging Face model card string, see https://huggingface.co/sentence-transformers - device (Optional[int]): Device to run the model on, -1 for CPU, 0+ for GPU. - service_id (Optional[str]): Service ID for the model. + device (int): Device to run the model on, -1 for CPU, 0+ for GPU. (optional) + service_id (str): Service ID for the model. (optional) Note that this model will be downloaded from the Hugging Face model hub. """ diff --git a/python/tests/unit/connectors/hugging_face/test_hf_text_completions.py b/python/tests/unit/connectors/hugging_face/test_hf_text_completions.py index 4dd4959d0755..96099d8cf5b8 100644 --- a/python/tests/unit/connectors/hugging_face/test_hf_text_completions.py +++ b/python/tests/unit/connectors/hugging_face/test_hf_text_completions.py @@ -1,11 +1,14 @@ # Copyright (c) Microsoft. All rights reserved. -from unittest.mock import Mock, patch +from threading import Thread +from unittest.mock import MagicMock, Mock, patch import pytest +from transformers import TextIteratorStreamer from semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion import HuggingFaceTextCompletion from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.exceptions import KernelInvokeException, ServiceResponseException from semantic_kernel.functions.kernel_arguments import KernelArguments from semantic_kernel.kernel import Kernel from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig @@ -46,8 +49,9 @@ async def test_text_completion(model_name, task, input_str): # Configure LLM service with patch("semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.pipeline") as patched_pipeline: patched_pipeline.return_value = mock_pipeline + service = HuggingFaceTextCompletion(service_id=model_name, ai_model_id=model_name, task=task) kernel.add_service( - service=HuggingFaceTextCompletion(service_id=model_name, ai_model_id=model_name, task=task), + service=service, ) exec_settings = PromptExecutionSettings(service_id=model_name, extension_data={"max_new_tokens": 25}) @@ -68,3 +72,148 @@ async def test_text_completion(model_name, task, input_str): await kernel.invoke(function_name="TestFunction", plugin_name="TestPlugin", arguments=arguments) assert mock_pipeline.call_args.args[0] == input_str + + +@pytest.mark.asyncio +async def test_text_completion_throws(): + kernel = Kernel() + + model_name = "patrickvonplaten/t5-tiny-random" + task = "text2text-generation" + input_str = "translate English to Dutch: Hello, how are you?" + + with patch("semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.pipeline") as patched_pipeline: + mock_generator = Mock() + mock_generator.side_effect = Exception("Test exception") + patched_pipeline.return_value = mock_generator + service = HuggingFaceTextCompletion(service_id=model_name, ai_model_id=model_name, task=task) + kernel.add_service(service=service) + + exec_settings = PromptExecutionSettings(service_id=model_name, extension_data={"max_new_tokens": 25}) + + prompt = "{{$input}}" + prompt_template_config = PromptTemplateConfig(template=prompt, execution_settings=exec_settings) + + kernel.add_function( + prompt_template_config=prompt_template_config, + function_name="TestFunction", + plugin_name="TestPlugin", + prompt_execution_settings=exec_settings, + ) + + arguments = KernelArguments(input=input_str) + + with pytest.raises( + KernelInvokeException, match="Error occurred while invoking function: 'TestPlugin-TestFunction'" + ): + await kernel.invoke(function_name="TestFunction", plugin_name="TestPlugin", arguments=arguments) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("model_name", "task", "input_str"), + [ + ( + "patrickvonplaten/t5-tiny-random", + "text2text-generation", + "translate English to Dutch: Hello, how are you?", + ), + ("HuggingFaceM4/tiny-random-LlamaForCausalLM", "text-generation", "Hello, I like sleeping and "), + ], + ids=["text2text-generation", "text-generation"], +) +async def test_text_completion_streaming(model_name, task, input_str): + ret = {"summary_text": "test"} if task == "summarization" else {"generated_text": "test"} + mock_pipeline = Mock(return_value=ret) + + mock_streamer = MagicMock(spec=TextIteratorStreamer) + mock_streamer.__iter__.return_value = iter(["mocked_text"]) + + with ( + patch( + "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.pipeline", + return_value=mock_pipeline, + ), + patch( + "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.Thread", + side_effect=Mock(spec=Thread), + ), + patch( + "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.TextIteratorStreamer", + return_value=mock_streamer, + ) as mock_stream, + ): + mock_stream.return_value = mock_streamer + service = HuggingFaceTextCompletion(service_id=model_name, ai_model_id=model_name, task=task) + prompt = "test prompt" + exec_settings = PromptExecutionSettings(service_id=model_name, extension_data={"max_new_tokens": 25}) + + result = [] + async for content in service.get_streaming_text_contents(prompt, exec_settings): + result.append(content) + + assert len(result) == 1 + assert result[0][0].inner_content == "mocked_text" + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("model_name", "task", "input_str"), + [ + ( + "patrickvonplaten/t5-tiny-random", + "text2text-generation", + "translate English to Dutch: Hello, how are you?", + ), + ("HuggingFaceM4/tiny-random-LlamaForCausalLM", "text-generation", "Hello, I like sleeping and "), + ], + ids=["text2text-generation", "text-generation"], +) +async def test_text_completion_streaming_throws(model_name, task, input_str): + ret = {"summary_text": "test"} if task == "summarization" else {"generated_text": "test"} + mock_pipeline = Mock(return_value=ret) + + mock_streamer = MagicMock(spec=TextIteratorStreamer) + mock_streamer.__iter__.return_value = Exception() + + with ( + patch( + "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.pipeline", + return_value=mock_pipeline, + ), + patch( + "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.Thread", + side_effect=Exception(), + ), + patch( + "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.TextIteratorStreamer", + return_value=mock_streamer, + ) as mock_stream, + ): + mock_stream.return_value = mock_streamer + service = HuggingFaceTextCompletion(service_id=model_name, ai_model_id=model_name, task=task) + prompt = "test prompt" + exec_settings = PromptExecutionSettings(service_id=model_name, extension_data={"max_new_tokens": 25}) + + with pytest.raises(ServiceResponseException, match=("Hugging Face completion failed")): + async for _ in service.get_streaming_text_contents(prompt, exec_settings): + pass + + +def test_hugging_face_text_completion_init(): + with ( + patch("semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.pipeline") as patched_pipeline, + patch( + "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.torch.cuda.is_available" + ) as mock_torch_cuda_is_available, + ): + patched_pipeline.return_value = patched_pipeline + mock_torch_cuda_is_available.return_value = False + + ai_model_id = "test-model" + task = "summarization" + device = -1 + + service = HuggingFaceTextCompletion(service_id="test", ai_model_id=ai_model_id, task=task, device=device) + + assert service is not None diff --git a/python/tests/unit/connectors/hugging_face/test_hf_text_embedding.py b/python/tests/unit/connectors/hugging_face/test_hf_text_embedding.py new file mode 100644 index 000000000000..ea4c4b6f7a7a --- /dev/null +++ b/python/tests/unit/connectors/hugging_face/test_hf_text_embedding.py @@ -0,0 +1,66 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import patch + +import pytest +from numpy import array, ndarray + +from semantic_kernel.connectors.ai.hugging_face.services.hf_text_embedding import ( + HuggingFaceTextEmbedding, +) +from semantic_kernel.exceptions import ServiceResponseException + + +def test_huggingface_text_embedding_initialization(): + model_name = "sentence-transformers/all-MiniLM-L6-v2" + device = -1 + + with patch( + "semantic_kernel.connectors.ai.hugging_face.services.hf_text_embedding.sentence_transformers.SentenceTransformer" + ) as mock_transformer: + mock_instance = mock_transformer.return_value + service = HuggingFaceTextEmbedding(service_id="test", ai_model_id=model_name, device=device) + + assert service.ai_model_id == model_name + assert service.device == "cpu" + assert service.generator == mock_instance + mock_transformer.assert_called_once_with(model_name_or_path=model_name, device="cpu") + + +@pytest.mark.asyncio +async def test_generate_embeddings_success(): + model_name = "sentence-transformers/all-MiniLM-L6-v2" + device = -1 + texts = ["Hello world!", "How are you?"] + mock_embeddings = array([[0.1, 0.2], [0.3, 0.4]]) + + with patch( + "semantic_kernel.connectors.ai.hugging_face.services.hf_text_embedding.sentence_transformers.SentenceTransformer" + ) as mock_transformer: + mock_instance = mock_transformer.return_value + mock_instance.encode.return_value = mock_embeddings + + service = HuggingFaceTextEmbedding(service_id="test", ai_model_id=model_name, device=device) + embeddings = await service.generate_embeddings(texts) + + assert isinstance(embeddings, ndarray) + assert embeddings.shape == (2, 2) + assert (embeddings == mock_embeddings).all() + + +@pytest.mark.asyncio +async def test_generate_embeddings_throws(): + model_name = "sentence-transformers/all-MiniLM-L6-v2" + device = -1 + texts = ["Hello world!", "How are you?"] + + with patch( + "semantic_kernel.connectors.ai.hugging_face.services.hf_text_embedding.sentence_transformers.SentenceTransformer" + ) as mock_transformer: + mock_instance = mock_transformer.return_value + mock_instance.encode.side_effect = Exception("Test exception") + + service = HuggingFaceTextEmbedding(service_id="test", ai_model_id=model_name, device=device) + + with pytest.raises(ServiceResponseException, match="Hugging Face embeddings failed"): + await service.generate_embeddings(texts)