From 3d92689d472c61b2615efbb1de6dca2fd0435618 Mon Sep 17 00:00:00 2001
From: Dillon Laird <dillonalaird@gmail.com>
Date: Sat, 8 Jun 2024 15:58:30 -0700
Subject: [PATCH] Fix azure OpenAI Embeddings (#121)

* update azure openai llm/lmm

* added azure openai sim

* added azure to docs

* added azure to docs

* added import
---
 README.md                      | 16 ++++++-------
 docs/api/llm.md                |  2 ++
 docs/api/lmm.md                |  2 ++
 docs/index.md                  | 16 +++++++------
 vision_agent/llm/llm.py        |  2 +-
 vision_agent/lmm/lmm.py        |  9 ++++++--
 vision_agent/utils/__init__.py |  2 +-
 vision_agent/utils/sim.py      | 42 +++++++++++++++++++++++++++++++---
 8 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 0506b8cb..f9c66156 100644
--- a/README.md
+++ b/README.md
@@ -159,12 +159,12 @@ export AZURE_OPENAI_ENDPOINT="your-endpoint"
 You can then run Vision Agent using the Azure OpenAI models:
 
 ```python
->>> import vision_agent as va
->>> agent = va.agent.VisionAgent(
->>>     planner=va.llm.AzureOpenAILLM(),
->>>     coder=va.lmm.AzureOpenAILMM(),
->>>     tester=va.lmm.AzureOpenAILMM(),
->>>     debugger=va.lmm.AzureOpenAILMM(),
->>> )
+import vision_agent as va
+agent = va.agent.VisionAgent(
+    planner=va.llm.AzureOpenAILLM(),
+    coder=va.lmm.AzureOpenAILMM(),
+    tester=va.lmm.AzureOpenAILMM(),
+    debugger=va.lmm.AzureOpenAILMM(),
+    tool_recommender=va.utils.AzureSim(),
+)
 ```
-
diff --git a/docs/api/llm.md b/docs/api/llm.md
index 684d327e..ff9537e3 100644
--- a/docs/api/llm.md
+++ b/docs/api/llm.md
@@ -1 +1,3 @@
 ::: vision_agent.llm.OpenAILLM
+
+::: vision_agent.llm.AzureOpenAILLM
diff --git a/docs/api/lmm.md b/docs/api/lmm.md
index 8f6f8fe7..496b6959 100644
--- a/docs/api/lmm.md
+++ b/docs/api/lmm.md
@@ -1 +1,3 @@
 ::: vision_agent.lmm.OpenAILMM
+
+::: vision_agent.lmm.AzureOpenAILMM
diff --git a/docs/index.md b/docs/index.md
index 8c42a288..dc174285 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -149,10 +149,12 @@ export AZURE_OPENAI_ENDPOINT="your-endpoint"
 You can then run Vision Agent using the Azure OpenAI models:
 
 ```python
->>> import vision_agent as va
->>> agent = va.agent.VisionAgent(
->>>     planner=va.llm.AzureOpenAILLM(),
->>>     coder=va.lmm.AzureOpenAILMM(),
->>>     tester=va.lmm.AzureOpenAILMM(),
->>>     debugger=va.lmm.AzureOpenAILMM(),
->>> )
+import vision_agent as va
+agent = va.agent.VisionAgent(
+    planner=va.llm.AzureOpenAILLM(),
+    coder=va.lmm.AzureOpenAILMM(),
+    tester=va.lmm.AzureOpenAILMM(),
+    debugger=va.lmm.AzureOpenAILMM(),
+    tool_recommender=va.utils.AzureSim(),
+)
+```
diff --git a/vision_agent/llm/llm.py b/vision_agent/llm/llm.py
index 7904cea0..417fe6b8 100644
--- a/vision_agent/llm/llm.py
+++ b/vision_agent/llm/llm.py
@@ -148,7 +148,7 @@ def generate_image_qa_tool(self, question: str) -> Callable:
 class AzureOpenAILLM(OpenAILLM):
     def __init__(
         self,
-        model_name: str = "gpt-4-turbo-preview",
+        model_name: str = "gpt-4o",
         api_key: Optional[str] = None,
         api_version: str = "2024-02-01",
         azure_endpoint: Optional[str] = None,
diff --git a/vision_agent/lmm/lmm.py b/vision_agent/lmm/lmm.py
index a8fa8312..633192f4 100644
--- a/vision_agent/lmm/lmm.py
+++ b/vision_agent/lmm/lmm.py
@@ -286,11 +286,12 @@ def generate_image_qa_tool(self, question: str) -> Callable:
 class AzureOpenAILMM(OpenAILMM):
     def __init__(
         self,
-        model_name: str = "gpt-4-vision-preview",
+        model_name: str = "gpt-4o",
         api_key: Optional[str] = None,
         api_version: str = "2024-02-01",
         azure_endpoint: Optional[str] = None,
         max_tokens: int = 1024,
+        json_mode: bool = False,
         **kwargs: Any,
     ):
         if not api_key:
@@ -307,7 +308,11 @@ def __init__(
             api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
         )
         self.model_name = model_name
-        self.max_tokens = max_tokens
+
+        if "max_tokens" not in kwargs:
+            kwargs["max_tokens"] = max_tokens
+        if json_mode:
+            kwargs["response_format"] = {"type": "json_object"}
         self.kwargs = kwargs
 
 
diff --git a/vision_agent/utils/__init__.py b/vision_agent/utils/__init__.py
index 18c2a53b..b440db51 100644
--- a/vision_agent/utils/__init__.py
+++ b/vision_agent/utils/__init__.py
@@ -6,5 +6,5 @@
     Logs,
     Result,
 )
-from .sim import Sim, load_sim, merge_sim
+from .sim import AzureSim, Sim, load_sim, merge_sim
 from .video import extract_frames_from_video
diff --git a/vision_agent/utils/sim.py b/vision_agent/utils/sim.py
index cdfab0d0..edd69d96 100644
--- a/vision_agent/utils/sim.py
+++ b/vision_agent/utils/sim.py
@@ -1,9 +1,10 @@
+import os
 from pathlib import Path
 from typing import Dict, List, Optional, Sequence, Union
 
 import numpy as np
 import pandas as pd
-from openai import Client
+from openai import AzureOpenAI, Client, OpenAI
 from scipy.spatial.distance import cosine  # type: ignore
 
 
@@ -33,9 +34,9 @@ def __init__(
         """
         self.df = df
         if not api_key:
-            self.client = Client()
+            self.client = OpenAI()
         else:
-            self.client = Client(api_key=api_key)
+            self.client = OpenAI(api_key=api_key)
 
         self.model = model
         if "embs" not in df.columns and sim_key is None:
@@ -78,6 +79,41 @@ def top_k(
         return res[[c for c in res.columns if c != "embs"]].to_dict(orient="records")
 
 
+class AzureSim(Sim):
+    def __init__(
+        self,
+        df: pd.DataFrame,
+        sim_key: Optional[str] = None,
+        api_key: Optional[str] = None,
+        api_version: str = "2024-02-01",
+        azure_endpoint: Optional[str] = None,
+        model: str = "text-embedding-3-small",
+    ) -> None:
+        if not api_key:
+            api_key = os.getenv("AZURE_OPENAI_API_KEY")
+        if not azure_endpoint:
+            azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+
+        if not api_key:
+            raise ValueError("Azure OpenAI API key is required.")
+        if not azure_endpoint:
+            raise ValueError("Azure OpenAI endpoint is required.")
+
+        self.df = df
+        self.client = AzureOpenAI(
+            api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
+        )
+
+        self.model = model
+        if "embs" not in df.columns and sim_key is None:
+            raise ValueError("key is required if no column 'embs' is present.")
+
+        if sim_key is not None:
+            self.df["embs"] = self.df[sim_key].apply(
+                lambda x: get_embedding(self.client, x, model=self.model)
+            )
+
+
 def merge_sim(sim1: Sim, sim2: Sim) -> Sim:
     return Sim(pd.concat([sim1.df, sim2.df], ignore_index=True))