From 3d92689d472c61b2615efbb1de6dca2fd0435618 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Sat, 8 Jun 2024 15:58:30 -0700 Subject: [PATCH] Fix azure OpenAI Embeddings (#121) * update azure openai llm/lmm * added azure openai sim * added azure to docs * added azure to docs * added import --- README.md | 16 ++++++------- docs/api/llm.md | 2 ++ docs/api/lmm.md | 2 ++ docs/index.md | 16 +++++++------ vision_agent/llm/llm.py | 2 +- vision_agent/lmm/lmm.py | 9 ++++++-- vision_agent/utils/__init__.py | 2 +- vision_agent/utils/sim.py | 42 +++++++++++++++++++++++++++++++--- 8 files changed, 69 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 0506b8cb..f9c66156 100644 --- a/README.md +++ b/README.md @@ -159,12 +159,12 @@ export AZURE_OPENAI_ENDPOINT="your-endpoint" You can then run Vision Agent using the Azure OpenAI models: ```python ->>> import vision_agent as va ->>> agent = va.agent.VisionAgent( ->>> planner=va.llm.AzureOpenAILLM(), ->>> coder=va.lmm.AzureOpenAILMM(), ->>> tester=va.lmm.AzureOpenAILMM(), ->>> debugger=va.lmm.AzureOpenAILMM(), ->>> ) +import vision_agent as va +agent = va.agent.VisionAgent( + planner=va.llm.AzureOpenAILLM(), + coder=va.lmm.AzureOpenAILMM(), + tester=va.lmm.AzureOpenAILMM(), + debugger=va.lmm.AzureOpenAILMM(), + tool_recommender=va.utils.AzureSim(), +) ``` - diff --git a/docs/api/llm.md b/docs/api/llm.md index 684d327e..ff9537e3 100644 --- a/docs/api/llm.md +++ b/docs/api/llm.md @@ -1 +1,3 @@ ::: vision_agent.llm.OpenAILLM + +::: vision_agent.llm.AzureOpenAILLM diff --git a/docs/api/lmm.md b/docs/api/lmm.md index 8f6f8fe7..496b6959 100644 --- a/docs/api/lmm.md +++ b/docs/api/lmm.md @@ -1 +1,3 @@ ::: vision_agent.lmm.OpenAILMM + +::: vision_agent.lmm.AzureOpenAILMM diff --git a/docs/index.md b/docs/index.md index 8c42a288..dc174285 100644 --- a/docs/index.md +++ b/docs/index.md @@ -149,10 +149,12 @@ export AZURE_OPENAI_ENDPOINT="your-endpoint" You can then run Vision Agent using the Azure OpenAI models: ```python ->>> import vision_agent as va ->>> agent = va.agent.VisionAgent( ->>> planner=va.llm.AzureOpenAILLM(), ->>> coder=va.lmm.AzureOpenAILMM(), ->>> tester=va.lmm.AzureOpenAILMM(), ->>> debugger=va.lmm.AzureOpenAILMM(), ->>> ) +import vision_agent as va +agent = va.agent.VisionAgent( + planner=va.llm.AzureOpenAILLM(), + coder=va.lmm.AzureOpenAILMM(), + tester=va.lmm.AzureOpenAILMM(), + debugger=va.lmm.AzureOpenAILMM(), + tool_recommender=va.utils.AzureSim(), +) +``` diff --git a/vision_agent/llm/llm.py b/vision_agent/llm/llm.py index 7904cea0..417fe6b8 100644 --- a/vision_agent/llm/llm.py +++ b/vision_agent/llm/llm.py @@ -148,7 +148,7 @@ def generate_image_qa_tool(self, question: str) -> Callable: class AzureOpenAILLM(OpenAILLM): def __init__( self, - model_name: str = "gpt-4-turbo-preview", + model_name: str = "gpt-4o", api_key: Optional[str] = None, api_version: str = "2024-02-01", azure_endpoint: Optional[str] = None, diff --git a/vision_agent/lmm/lmm.py b/vision_agent/lmm/lmm.py index a8fa8312..633192f4 100644 --- a/vision_agent/lmm/lmm.py +++ b/vision_agent/lmm/lmm.py @@ -286,11 +286,12 @@ def generate_image_qa_tool(self, question: str) -> Callable: class AzureOpenAILMM(OpenAILMM): def __init__( self, - model_name: str = "gpt-4-vision-preview", + model_name: str = "gpt-4o", api_key: Optional[str] = None, api_version: str = "2024-02-01", azure_endpoint: Optional[str] = None, max_tokens: int = 1024, + json_mode: bool = False, **kwargs: Any, ): if not api_key: @@ -307,7 +308,11 @@ def __init__( api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint ) self.model_name = model_name - self.max_tokens = max_tokens + + if "max_tokens" not in kwargs: + kwargs["max_tokens"] = max_tokens + if json_mode: + kwargs["response_format"] = {"type": "json_object"} self.kwargs = kwargs diff --git a/vision_agent/utils/__init__.py b/vision_agent/utils/__init__.py index 18c2a53b..b440db51 100644 --- a/vision_agent/utils/__init__.py +++ b/vision_agent/utils/__init__.py @@ -6,5 +6,5 @@ Logs, Result, ) -from .sim import Sim, load_sim, merge_sim +from .sim import AzureSim, Sim, load_sim, merge_sim from .video import extract_frames_from_video diff --git a/vision_agent/utils/sim.py b/vision_agent/utils/sim.py index cdfab0d0..edd69d96 100644 --- a/vision_agent/utils/sim.py +++ b/vision_agent/utils/sim.py @@ -1,9 +1,10 @@ +import os from pathlib import Path from typing import Dict, List, Optional, Sequence, Union import numpy as np import pandas as pd -from openai import Client +from openai import AzureOpenAI, Client, OpenAI from scipy.spatial.distance import cosine # type: ignore @@ -33,9 +34,9 @@ def __init__( """ self.df = df if not api_key: - self.client = Client() + self.client = OpenAI() else: - self.client = Client(api_key=api_key) + self.client = OpenAI(api_key=api_key) self.model = model if "embs" not in df.columns and sim_key is None: @@ -78,6 +79,41 @@ def top_k( return res[[c for c in res.columns if c != "embs"]].to_dict(orient="records") +class AzureSim(Sim): + def __init__( + self, + df: pd.DataFrame, + sim_key: Optional[str] = None, + api_key: Optional[str] = None, + api_version: str = "2024-02-01", + azure_endpoint: Optional[str] = None, + model: str = "text-embedding-3-small", + ) -> None: + if not api_key: + api_key = os.getenv("AZURE_OPENAI_API_KEY") + if not azure_endpoint: + azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + + if not api_key: + raise ValueError("Azure OpenAI API key is required.") + if not azure_endpoint: + raise ValueError("Azure OpenAI endpoint is required.") + + self.df = df + self.client = AzureOpenAI( + api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint + ) + + self.model = model + if "embs" not in df.columns and sim_key is None: + raise ValueError("key is required if no column 'embs' is present.") + + if sim_key is not None: + self.df["embs"] = self.df[sim_key].apply( + lambda x: get_embedding(self.client, x, model=self.model) + ) + + def merge_sim(sim1: Sim, sim2: Sim) -> Sim: return Sim(pd.concat([sim1.df, sim2.df], ignore_index=True))