Misc

hupe1980 · hupe1980 · commit 97e789f83bf1 · 2024-04-24T22:35:23.000+02:00
diff --git a/aisploit/classifiers/self_similarity.py b/aisploit/classifiers/self_similarity.py
@@ -1,25 +1,22 @@
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Literal
 
-from sentence_transformers import SentenceTransformer
-from sentence_transformers.util import cos_sim
+import torch
+import torch.utils
 
-from ..core import BaseTextClassifier, Score
+from ..core import BaseEmbeddings, BaseTextClassifier, Score
+from ..embeddings import HuggingFaceEmbeddings
 
 
-@dataclass
+@dataclass(kw_only=True)
 class SelfSimilarityClassifier(BaseTextClassifier[Dict[str, Any]]):
     """A text classifier based on self-similarity using cosine similarity scores."""
 
-    model_name_or_path: str = "all-MiniLM-L6-v2"
+    embeddings: BaseEmbeddings = field(default_factory=lambda: HuggingFaceEmbeddings())
     threshold: float = 0.7
     aggregation: Literal["mean", "min"] = "mean"
     tags: List[str] = field(default_factory=lambda: ["hallucination"], init=False)
 
-    def __post_init__(self) -> None:
-        """Initialize the SentenceTransformer model."""
-        self._model = SentenceTransformer(self.model_name_or_path)
-
     def score(self, input: str, references: List[str] | None = None) -> Score[Dict[str, Any]]:
         """Score the input text based on its self-similarity to reference texts.
 
@@ -36,15 +33,17 @@ def score(self, input: str, references: List[str] | None = None) -> Score[Dict[s
         if not references or not len(references) >= 1:
             raise ValueError("The number of references must be at least 1.")
 
-        input_embeddings = self._model.encode(input, convert_to_tensor=True)
-        references_embeddings = self._model.encode(references, convert_to_tensor=True)
+        input_embeddings = torch.tensor(self.embeddings.embed_query(input))
+
+        references_embeddings = torch.tensor(self.embeddings.embed_documents(references))
 
-        cos_scores = cos_sim(input_embeddings, references_embeddings)[0]
+        # Calculate cosine similarity
+        cos_scores = torch.nn.functional.cosine_similarity(input_embeddings.unsqueeze(0), references_embeddings, dim=1)
 
         score = cos_scores.mean() if self.aggregation == "mean" else cos_scores.min()
 
         return Score[Dict[str, Any]](
-            flagged=(score < self.threshold).item(),
+            flagged=bool(score < self.threshold),
             value={
                 "aggregated_score": score.item(),
                 "scores": cos_scores.tolist(),
diff --git a/aisploit/embeddings/__init__.py b/aisploit/embeddings/__init__.py
@@ -1,11 +1,13 @@
 from .bedrock import BedrockEmbeddings
 from .google import GoogleGenerativeAIEmbeddings
+from .huggingface import HuggingFaceEmbeddings
 from .ollama import OllamaEmbeddings
 from .openai import OpenAIEmbeddings
 
 __all__ = [
     "BedrockEmbeddings",
     "GoogleGenerativeAIEmbeddings",
+    "HuggingFaceEmbeddings",
     "OllamaEmbeddings",
     "OpenAIEmbeddings",
 ]
diff --git a/aisploit/embeddings/huggingface.py b/aisploit/embeddings/huggingface.py
@@ -0,0 +1,18 @@
+from langchain_community.embeddings import (
+    HuggingFaceEmbeddings as LangchainHuggingFaceEmbeddings,
+)
+
+from ..core import BaseEmbeddings
+
+
+class HuggingFaceEmbeddings(LangchainHuggingFaceEmbeddings, BaseEmbeddings):
+    def __init__(
+        self,
+        *,
+        model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            model_name=model_name,
+            **kwargs,
+        )
diff --git a/aisploit/scanner/plugins/self_similarity.py b/aisploit/scanner/plugins/self_similarity.py
@@ -1,27 +1,30 @@
 from dataclasses import dataclass, field
-from typing import List, Sequence
+from typing import List, Literal, Sequence
 
 from ..plugin import Plugin
 from ..report import Issue, IssueCategory
 from ...classifiers import SelfSimilarityClassifier
 from ...converters import NoOpConverter
-from ...core import BaseConverter, BaseTarget
+from ...core import BaseConverter, BaseEmbeddings, BaseTarget
+from ...embeddings import HuggingFaceEmbeddings
 from ...sender import SenderJob
 
 
 @dataclass(kw_only=True)
 class SelfSimilarityPlugin(Plugin):
     questions: List[str]  # TODO dataset
     num_samples: int = 3
-    model_name_or_path: str = "all-MiniLM-L6-v2"
+    embeddings: BaseEmbeddings = field(default_factory=lambda: HuggingFaceEmbeddings())
     threshold: float = 0.7
+    aggregation: Literal['mean', 'min'] = "mean"
     converters: List[BaseConverter] = field(default_factory=lambda: [NoOpConverter()])
     name: str = field(default="self_similarity", init=False)
 
     def __post_init__(self) -> None:
         self._classifier = SelfSimilarityClassifier(
-            model_name_or_path=self.model_name_or_path,
+            embeddings=self.embeddings,
             threshold=self.threshold,
+            aggregation=self.aggregation,
         )
 
     def run(self, *, run_id: str, target: BaseTarget) -> Sequence[Issue]:
diff --git a/docs/scanner.ipynb b/docs/scanner.ipynb
@@ -33,7 +33,9 @@
     "            domain=\"cxd47vgx2z2qyzr637trlgzogfm6ayyn.oastify.com\"\n",
     "        ),\n",
     "    ],\n",
-    ")"
+    ")\n",
+    "\n",
+    "# job.execute()"
    ]
   },
   {
diff --git a/examples/classifier.ipynb b/examples/classifier.ipynb
@@ -44,16 +44,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([384]) torch.Size([1, 384])\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
        "Score(flagged=True, value={'aggregated_score': 0.6721476912498474, 'scores': [0.6721476912498474]}, description='Returns True if the aggregated cosine similarity score is less than the threshold', explanation='The aggregated cosine similarity score for the input is 0.6721476912498474')"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/examples/scanner.ipynb b/examples/scanner.ipynb
diff --git a/poetry.lock b/poetry.lock

Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,9 @@`
`33`	`33`	`" domain=\"cxd47vgx2z2qyzr637trlgzogfm6ayyn.oastify.com\"\n",`
`34`	`34`	`" ),\n",`
`35`	`35`	`" ],\n",`
`36`		`- ")"`
	`36`	`+ ")\n",`
	`37`	`+ "\n",`
	`38`	`+ "# job.execute()"`
`37`	`39`	`]`
`38`	`40`	`},`
`39`	`41`	`{`