From 469092f35cb0d70062b63822d43ffc315a6fbf43 Mon Sep 17 00:00:00 2001
From: Evan Mattson <35585003+moonbox3@users.noreply.github.com>
Date: Tue, 9 Jul 2024 14:32:06 -0400
Subject: [PATCH] Python: Enable mypy for the HuggingFace connectors. Increase
 unit test code coverage. (#7176)

### Motivation and Context

We have mypy enabled on parts of the code base, but not all. The goal is
to enable it across the entire SK python code. As part of this, we've
broken up the work to tackle different sections. Additionally, we're
working to improve the unit test code coverage for these sections of
code.

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

### Description

This PR:
- turns on mypy for the HuggingFace connector
- adds more unit test coverage for the text completion and embeddings
connector to achieve >95% code coverage.
- closes #7133

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [X] The code builds clean without any errors or warnings
- [X] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [X] All unit tests pass, and I have added new tests where possible
- [X] I didn't break anyone :smile:
---
 python/mypy.ini                               |   4 -
 .../services/hf_text_completion.py            |  60 ++++---
 .../services/hf_text_embedding.py             |  10 +-
 .../hugging_face/test_hf_text_completions.py  | 153 +++++++++++++++++-
 .../hugging_face/test_hf_text_embedding.py    |  66 ++++++++
 5 files changed, 258 insertions(+), 35 deletions(-)
 create mode 100644 python/tests/unit/connectors/hugging_face/test_hf_text_embedding.py

diff --git a/python/mypy.ini b/python/mypy.ini
index 9505beba81df..30d9947c2100 100644
--- a/python/mypy.ini
+++ b/python/mypy.ini
@@ -21,10 +21,6 @@ ignore_errors = true
 ignore_errors = true
 # TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7132
 
-[mypy-semantic_kernel.connectors.ai.hugging_face.*]
-ignore_errors = true
-# TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7133
-
 [mypy-semantic_kernel.connectors.ai.ollama.*]
 ignore_errors = true
 # TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7134
diff --git a/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py b/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py
index 05465ef607a6..61dd1554ec9d 100644
--- a/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py
+++ b/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py
@@ -1,22 +1,26 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
+import sys
 from collections.abc import AsyncGenerator
 from threading import Thread
-from typing import TYPE_CHECKING, Any, Literal
+from typing import Any, Literal
+
+if sys.version_info >= (3, 12):
+    from typing import override  # pragma: no cover
+else:
+    from typing_extensions import override  # pragma: no cover
 
 import torch
 from transformers import AutoTokenizer, TextIteratorStreamer, pipeline
 
 from semantic_kernel.connectors.ai.hugging_face.hf_prompt_execution_settings import HuggingFacePromptExecutionSettings
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase
 from semantic_kernel.contents.streaming_text_content import StreamingTextContent
 from semantic_kernel.contents.text_content import TextContent
 from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError, ServiceResponseException
 
-if TYPE_CHECKING:
-    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
-
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -29,7 +33,7 @@ def __init__(
         self,
         ai_model_id: str,
         task: str | None = "text2text-generation",
-        device: int | None = -1,
+        device: int = -1,
         service_id: str | None = None,
         model_kwargs: dict[str, Any] | None = None,
         pipeline_kwargs: dict[str, Any] | None = None,
@@ -39,22 +43,21 @@ def __init__(
         Args:
             ai_model_id (str): Hugging Face model card string, see
                 https://huggingface.co/models
-            device (Optional[int]): Device to run the model on, defaults to CPU, 0+ for GPU,
-                                   -- None if using device_map instead. (If both device and device_map
-                                      are specified, device overrides device_map. If unintended,
-                                      it can lead to unexpected behavior.)
-            service_id (Optional[str]): Service ID for the AI service.
-            task (Optional[str]): Model completion task type, options are:
+            device (int): Device to run the model on, defaults to CPU, 0+ for GPU,
+                -- None if using device_map instead. (If both device and device_map
+                are specified, device overrides device_map. If unintended,
+                it can lead to unexpected behavior.) (optional)
+            service_id (str): Service ID for the AI service. (optional)
+            task (str): Model completion task type, options are:
                 - summarization: takes a long text and returns a shorter summary.
                 - text-generation: takes incomplete text and returns a set of completion candidates.
                 - text2text-generation (default): takes an input prompt and returns a completion.
-                text2text-generation is the default as it behaves more like GPT-3+.
-            log : Logger instance. (Deprecated)
-            model_kwargs (Optional[Dict[str, Any]]): Additional dictionary of keyword arguments
-                passed along to the model's `from_pretrained(..., **model_kwargs)` function.
-            pipeline_kwargs (Optional[Dict[str, Any]]): Additional keyword arguments passed along
+                text2text-generation is the default as it behaves more like GPT-3+. (optional)
+            model_kwargs (dict[str, Any]): Additional dictionary of keyword arguments
+                passed along to the model's `from_pretrained(..., **model_kwargs)` function. (optional)
+            pipeline_kwargs (dict[str, Any]): Additional keyword arguments passed along
                 to the specific pipeline init (see the documentation for the corresponding pipeline class
-                for possible values).
+                for possible values). (optional)
 
         Note that this model will be downloaded from the Hugging Face model hub.
         """
@@ -65,18 +68,19 @@ def __init__(
             model_kwargs=model_kwargs,
             **pipeline_kwargs or {},
         )
+        resolved_device = f"cuda:{device}" if device >= 0 and torch.cuda.is_available() else "cpu"
         super().__init__(
             service_id=service_id,
             ai_model_id=ai_model_id,
             task=task,
-            device=(f"cuda:{device}" if device >= 0 and torch.cuda.is_available() else "cpu"),
+            device=resolved_device,
             generator=generator,
         )
 
     async def get_text_contents(
         self,
         prompt: str,
-        settings: HuggingFacePromptExecutionSettings,
+        settings: PromptExecutionSettings,
     ) -> list[TextContent]:
         """This is the method that is called from the kernel to get a response from a text-optimized LLM.
 
@@ -87,10 +91,14 @@ async def get_text_contents(
         Returns:
             List[TextContent]: A list of TextContent objects representing the response(s) from the LLM.
         """
+        if not isinstance(settings, HuggingFacePromptExecutionSettings):
+            settings = self.get_prompt_execution_settings_from_settings(settings)
+        assert isinstance(settings, HuggingFacePromptExecutionSettings)  # nosec
+
         try:
             results = self.generator(prompt, **settings.prepare_settings_dict())
         except Exception as e:
-            raise ServiceResponseException("Hugging Face completion failed", e) from e
+            raise ServiceResponseException("Hugging Face completion failed") from e
         if isinstance(results, list):
             return [self._create_text_content(results, result) for result in results]
         return [self._create_text_content(results, results)]
@@ -105,7 +113,7 @@ def _create_text_content(self, response: Any, candidate: dict[str, str]) -> Text
     async def get_streaming_text_contents(
         self,
         prompt: str,
-        settings: HuggingFacePromptExecutionSettings,
+        settings: PromptExecutionSettings,
     ) -> AsyncGenerator[list[StreamingTextContent], Any]:
         """Streams a text completion using a Hugging Face model.
 
@@ -118,6 +126,10 @@ async def get_streaming_text_contents(
         Yields:
             List[StreamingTextContent]: List of StreamingTextContent objects.
         """
+        if not isinstance(settings, HuggingFacePromptExecutionSettings):
+            settings = self.get_prompt_execution_settings_from_settings(settings)
+        assert isinstance(settings, HuggingFacePromptExecutionSettings)  # nosec
+
         if settings.num_return_sequences > 1:
             raise ServiceInvalidExecutionSettingsError(
                 "HuggingFace TextIteratorStreamer does not stream multiple responses in a parseable format. \
@@ -139,10 +151,10 @@ async def get_streaming_text_contents(
                 ]
 
             thread.join()
-
         except Exception as e:
-            raise ServiceResponseException("Hugging Face completion failed", e) from e
+            raise ServiceResponseException("Hugging Face completion failed") from e
 
-    def get_prompt_execution_settings_class(self) -> "PromptExecutionSettings":
+    @override
+    def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]:
         """Create a request settings object."""
         return HuggingFacePromptExecutionSettings
diff --git a/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py b/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py
index fd54c14d7e4f..057ec5be46dd 100644
--- a/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py
+++ b/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py
@@ -5,9 +5,9 @@
 from typing import Any
 
 if sys.version_info >= (3, 12):
-    from typing import override
+    from typing import override  # pragma: no cover
 else:
-    from typing_extensions import override
+    from typing_extensions import override  # pragma: no cover
 
 import sentence_transformers
 import torch
@@ -28,7 +28,7 @@ class HuggingFaceTextEmbedding(EmbeddingGeneratorBase):
     def __init__(
         self,
         ai_model_id: str,
-        device: int | None = -1,
+        device: int = -1,
         service_id: str | None = None,
     ) -> None:
         """Initializes a new instance of the HuggingFaceTextEmbedding class.
@@ -36,8 +36,8 @@ def __init__(
         Args:
             ai_model_id (str): Hugging Face model card string, see
                 https://huggingface.co/sentence-transformers
-            device (Optional[int]): Device to run the model on, -1 for CPU, 0+ for GPU.
-            service_id (Optional[str]): Service ID for the model.
+            device (int): Device to run the model on, -1 for CPU, 0+ for GPU. (optional)
+            service_id (str): Service ID for the model. (optional)
 
         Note that this model will be downloaded from the Hugging Face model hub.
         """
diff --git a/python/tests/unit/connectors/hugging_face/test_hf_text_completions.py b/python/tests/unit/connectors/hugging_face/test_hf_text_completions.py
index 4dd4959d0755..96099d8cf5b8 100644
--- a/python/tests/unit/connectors/hugging_face/test_hf_text_completions.py
+++ b/python/tests/unit/connectors/hugging_face/test_hf_text_completions.py
@@ -1,11 +1,14 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from unittest.mock import Mock, patch
+from threading import Thread
+from unittest.mock import MagicMock, Mock, patch
 
 import pytest
+from transformers import TextIteratorStreamer
 
 from semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion import HuggingFaceTextCompletion
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+from semantic_kernel.exceptions import KernelInvokeException, ServiceResponseException
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.kernel import Kernel
 from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig
@@ -46,8 +49,9 @@ async def test_text_completion(model_name, task, input_str):
     # Configure LLM service
     with patch("semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.pipeline") as patched_pipeline:
         patched_pipeline.return_value = mock_pipeline
+        service = HuggingFaceTextCompletion(service_id=model_name, ai_model_id=model_name, task=task)
         kernel.add_service(
-            service=HuggingFaceTextCompletion(service_id=model_name, ai_model_id=model_name, task=task),
+            service=service,
         )
 
         exec_settings = PromptExecutionSettings(service_id=model_name, extension_data={"max_new_tokens": 25})
@@ -68,3 +72,148 @@ async def test_text_completion(model_name, task, input_str):
 
         await kernel.invoke(function_name="TestFunction", plugin_name="TestPlugin", arguments=arguments)
         assert mock_pipeline.call_args.args[0] == input_str
+
+
+@pytest.mark.asyncio
+async def test_text_completion_throws():
+    kernel = Kernel()
+
+    model_name = "patrickvonplaten/t5-tiny-random"
+    task = "text2text-generation"
+    input_str = "translate English to Dutch: Hello, how are you?"
+
+    with patch("semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.pipeline") as patched_pipeline:
+        mock_generator = Mock()
+        mock_generator.side_effect = Exception("Test exception")
+        patched_pipeline.return_value = mock_generator
+        service = HuggingFaceTextCompletion(service_id=model_name, ai_model_id=model_name, task=task)
+        kernel.add_service(service=service)
+
+        exec_settings = PromptExecutionSettings(service_id=model_name, extension_data={"max_new_tokens": 25})
+
+        prompt = "{{$input}}"
+        prompt_template_config = PromptTemplateConfig(template=prompt, execution_settings=exec_settings)
+
+        kernel.add_function(
+            prompt_template_config=prompt_template_config,
+            function_name="TestFunction",
+            plugin_name="TestPlugin",
+            prompt_execution_settings=exec_settings,
+        )
+
+        arguments = KernelArguments(input=input_str)
+
+        with pytest.raises(
+            KernelInvokeException, match="Error occurred while invoking function: 'TestPlugin-TestFunction'"
+        ):
+            await kernel.invoke(function_name="TestFunction", plugin_name="TestPlugin", arguments=arguments)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("model_name", "task", "input_str"),
+    [
+        (
+            "patrickvonplaten/t5-tiny-random",
+            "text2text-generation",
+            "translate English to Dutch: Hello, how are you?",
+        ),
+        ("HuggingFaceM4/tiny-random-LlamaForCausalLM", "text-generation", "Hello, I like sleeping and "),
+    ],
+    ids=["text2text-generation", "text-generation"],
+)
+async def test_text_completion_streaming(model_name, task, input_str):
+    ret = {"summary_text": "test"} if task == "summarization" else {"generated_text": "test"}
+    mock_pipeline = Mock(return_value=ret)
+
+    mock_streamer = MagicMock(spec=TextIteratorStreamer)
+    mock_streamer.__iter__.return_value = iter(["mocked_text"])
+
+    with (
+        patch(
+            "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.pipeline",
+            return_value=mock_pipeline,
+        ),
+        patch(
+            "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.Thread",
+            side_effect=Mock(spec=Thread),
+        ),
+        patch(
+            "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.TextIteratorStreamer",
+            return_value=mock_streamer,
+        ) as mock_stream,
+    ):
+        mock_stream.return_value = mock_streamer
+        service = HuggingFaceTextCompletion(service_id=model_name, ai_model_id=model_name, task=task)
+        prompt = "test prompt"
+        exec_settings = PromptExecutionSettings(service_id=model_name, extension_data={"max_new_tokens": 25})
+
+        result = []
+        async for content in service.get_streaming_text_contents(prompt, exec_settings):
+            result.append(content)
+
+        assert len(result) == 1
+        assert result[0][0].inner_content == "mocked_text"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("model_name", "task", "input_str"),
+    [
+        (
+            "patrickvonplaten/t5-tiny-random",
+            "text2text-generation",
+            "translate English to Dutch: Hello, how are you?",
+        ),
+        ("HuggingFaceM4/tiny-random-LlamaForCausalLM", "text-generation", "Hello, I like sleeping and "),
+    ],
+    ids=["text2text-generation", "text-generation"],
+)
+async def test_text_completion_streaming_throws(model_name, task, input_str):
+    ret = {"summary_text": "test"} if task == "summarization" else {"generated_text": "test"}
+    mock_pipeline = Mock(return_value=ret)
+
+    mock_streamer = MagicMock(spec=TextIteratorStreamer)
+    mock_streamer.__iter__.return_value = Exception()
+
+    with (
+        patch(
+            "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.pipeline",
+            return_value=mock_pipeline,
+        ),
+        patch(
+            "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.Thread",
+            side_effect=Exception(),
+        ),
+        patch(
+            "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.TextIteratorStreamer",
+            return_value=mock_streamer,
+        ) as mock_stream,
+    ):
+        mock_stream.return_value = mock_streamer
+        service = HuggingFaceTextCompletion(service_id=model_name, ai_model_id=model_name, task=task)
+        prompt = "test prompt"
+        exec_settings = PromptExecutionSettings(service_id=model_name, extension_data={"max_new_tokens": 25})
+
+        with pytest.raises(ServiceResponseException, match=("Hugging Face completion failed")):
+            async for _ in service.get_streaming_text_contents(prompt, exec_settings):
+                pass
+
+
+def test_hugging_face_text_completion_init():
+    with (
+        patch("semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.pipeline") as patched_pipeline,
+        patch(
+            "semantic_kernel.connectors.ai.hugging_face.services.hf_text_completion.torch.cuda.is_available"
+        ) as mock_torch_cuda_is_available,
+    ):
+        patched_pipeline.return_value = patched_pipeline
+        mock_torch_cuda_is_available.return_value = False
+
+        ai_model_id = "test-model"
+        task = "summarization"
+        device = -1
+
+        service = HuggingFaceTextCompletion(service_id="test", ai_model_id=ai_model_id, task=task, device=device)
+
+        assert service is not None
diff --git a/python/tests/unit/connectors/hugging_face/test_hf_text_embedding.py b/python/tests/unit/connectors/hugging_face/test_hf_text_embedding.py
new file mode 100644
index 000000000000..ea4c4b6f7a7a
--- /dev/null
+++ b/python/tests/unit/connectors/hugging_face/test_hf_text_embedding.py
@@ -0,0 +1,66 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from unittest.mock import patch
+
+import pytest
+from numpy import array, ndarray
+
+from semantic_kernel.connectors.ai.hugging_face.services.hf_text_embedding import (
+    HuggingFaceTextEmbedding,
+)
+from semantic_kernel.exceptions import ServiceResponseException
+
+
+def test_huggingface_text_embedding_initialization():
+    model_name = "sentence-transformers/all-MiniLM-L6-v2"
+    device = -1
+
+    with patch(
+        "semantic_kernel.connectors.ai.hugging_face.services.hf_text_embedding.sentence_transformers.SentenceTransformer"
+    ) as mock_transformer:
+        mock_instance = mock_transformer.return_value
+        service = HuggingFaceTextEmbedding(service_id="test", ai_model_id=model_name, device=device)
+
+        assert service.ai_model_id == model_name
+        assert service.device == "cpu"
+        assert service.generator == mock_instance
+        mock_transformer.assert_called_once_with(model_name_or_path=model_name, device="cpu")
+
+
+@pytest.mark.asyncio
+async def test_generate_embeddings_success():
+    model_name = "sentence-transformers/all-MiniLM-L6-v2"
+    device = -1
+    texts = ["Hello world!", "How are you?"]
+    mock_embeddings = array([[0.1, 0.2], [0.3, 0.4]])
+
+    with patch(
+        "semantic_kernel.connectors.ai.hugging_face.services.hf_text_embedding.sentence_transformers.SentenceTransformer"
+    ) as mock_transformer:
+        mock_instance = mock_transformer.return_value
+        mock_instance.encode.return_value = mock_embeddings
+
+        service = HuggingFaceTextEmbedding(service_id="test", ai_model_id=model_name, device=device)
+        embeddings = await service.generate_embeddings(texts)
+
+        assert isinstance(embeddings, ndarray)
+        assert embeddings.shape == (2, 2)
+        assert (embeddings == mock_embeddings).all()
+
+
+@pytest.mark.asyncio
+async def test_generate_embeddings_throws():
+    model_name = "sentence-transformers/all-MiniLM-L6-v2"
+    device = -1
+    texts = ["Hello world!", "How are you?"]
+
+    with patch(
+        "semantic_kernel.connectors.ai.hugging_face.services.hf_text_embedding.sentence_transformers.SentenceTransformer"
+    ) as mock_transformer:
+        mock_instance = mock_transformer.return_value
+        mock_instance.encode.side_effect = Exception("Test exception")
+
+        service = HuggingFaceTextEmbedding(service_id="test", ai_model_id=model_name, device=device)
+
+        with pytest.raises(ServiceResponseException, match="Hugging Face embeddings failed"):
+            await service.generate_embeddings(texts)