From 78971c4037e8cb9aa07b56168392b7a93c3c2913 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Fri, 5 Apr 2024 16:29:36 -0700 Subject: [PATCH 1/6] added api_key in init arg --- vision_agent/llm/llm.py | 6 +++++- vision_agent/lmm/lmm.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/vision_agent/llm/llm.py b/vision_agent/llm/llm.py index e97bcdeb..d2eef9c6 100644 --- a/vision_agent/llm/llm.py +++ b/vision_agent/llm/llm.py @@ -33,11 +33,15 @@ class OpenAILLM(LLM): def __init__( self, model_name: str = "gpt-4-turbo-preview", + api_key: str = "", json_mode: bool = False, **kwargs: Any ): self.model_name = model_name - self.client = OpenAI() + if api_key: + self.client = OpenAI(api_key=api_key) + else: + self.client = OpenAI() self.kwargs = kwargs if json_mode: self.kwargs["response_format"] = {"type": "json_object"} diff --git a/vision_agent/lmm/lmm.py b/vision_agent/lmm/lmm.py index 7ae65eb2..0bff8e85 100644 --- a/vision_agent/lmm/lmm.py +++ b/vision_agent/lmm/lmm.py @@ -99,12 +99,16 @@ class OpenAILMM(LMM): def __init__( self, model_name: str = "gpt-4-vision-preview", + api_key: str = "", max_tokens: int = 1024, **kwargs: Any, ): self.model_name = model_name self.max_tokens = max_tokens - self.client = OpenAI() + if api_key: + self.client = OpenAI(api_key=api_key) + else: + self.client = OpenAI() self.kwargs = kwargs def __call__( From fe1ff8d051f42d5e2530bf905318efc22f9c33cb Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Fri, 5 Apr 2024 17:18:30 -0700 Subject: [PATCH 2/6] added azure openai --- vision_agent/llm/llm.py | 47 +++++++++++++++++++++++++++++++++++------ vision_agent/lmm/lmm.py | 44 ++++++++++++++++++++++++++++++++------ 2 files changed, 78 insertions(+), 13 deletions(-) diff --git a/vision_agent/llm/llm.py b/vision_agent/llm/llm.py index d2eef9c6..1ccc5809 100644 --- a/vision_agent/llm/llm.py +++ b/vision_agent/llm/llm.py @@ -1,8 +1,9 @@ import json +import os from abc import ABC, abstractmethod -from typing import Any, Callable, Dict, List, Mapping, Union, cast +from typing import Any, Callable, Dict, List, Mapping, Optional, Union, cast -from openai import OpenAI +from openai import AzureOpenAI, OpenAI from vision_agent.tools import ( CHOOSE_PARAMS, @@ -33,15 +34,18 @@ class OpenAILLM(LLM): def __init__( self, model_name: str = "gpt-4-turbo-preview", - api_key: str = "", + api_key: Optional[str] = None, json_mode: bool = False, **kwargs: Any ): + if not api_key: + api_key = os.getenv("OPENAI_API_KEY") + + if not api_key: + raise ValueError("OpenAI API key is required.") + + self.client = OpenAI(api_key=api_key) self.model_name = model_name - if api_key: - self.client = OpenAI(api_key=api_key) - else: - self.client = OpenAI() self.kwargs = kwargs if json_mode: self.kwargs["response_format"] = {"type": "json_object"} @@ -124,3 +128,32 @@ def generate_segmentor(self, question: str) -> Callable: ] return lambda x: GroundingSAM()(**{"prompt": params["prompt"], "image": x}) + + +class AzureOpenAILLM(OpenAILLM): + def __init__( + self, + model_name: str = "gpt-4-turbo-preview", + api_key: Optional[str] = None, + api_version: str = "2024-02-01", + azure_endpoint: Optional[str] = None, + json_mode: bool = False, + **kwargs: Any + ): + if not api_key: + api_key = os.getenv("AZURE_OPENAI_API_KEY") + if not azure_endpoint: + azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + + if not api_key: + raise ValueError("Azure OpenAI API key is required.") + if not azure_endpoint: + raise ValueError("Azure OpenAI endpoint is required.") + + self.client = AzureOpenAI( + api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint + ) + self.model_name = model_name + self.kwargs = kwargs + if json_mode: + self.kwargs["response_format"] = {"type": "json_object"} diff --git a/vision_agent/lmm/lmm.py b/vision_agent/lmm/lmm.py index 0bff8e85..3eee8766 100644 --- a/vision_agent/lmm/lmm.py +++ b/vision_agent/lmm/lmm.py @@ -1,12 +1,13 @@ import base64 import json import logging +import os from abc import ABC, abstractmethod from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Union, cast import requests -from openai import OpenAI +from openai import AzureOpenAI, OpenAI from vision_agent.tools import ( CHOOSE_PARAMS, @@ -99,16 +100,19 @@ class OpenAILMM(LMM): def __init__( self, model_name: str = "gpt-4-vision-preview", - api_key: str = "", + api_key: Optional[str] = None, max_tokens: int = 1024, **kwargs: Any, ): + if not api_key: + api_key = os.getenv("OPENAI_API_KEY") + + if not api_key: + raise ValueError("OpenAI API key is required.") + + self.client = OpenAI(api_key=api_key) self.model_name = model_name self.max_tokens = max_tokens - if api_key: - self.client = OpenAI(api_key=api_key) - else: - self.client = OpenAI() self.kwargs = kwargs def __call__( @@ -252,6 +256,34 @@ def generate_segmentor(self, question: str) -> Callable: return lambda x: GroundingSAM()(**{"prompt": params["prompt"], "image": x}) +class AzureOpenAILMM(OpenAILMM): + def __init__( + self, + model_name: str = "gpt-4-vision-preview", + api_key: Optional[str] = None, + api_version: str = "2021-02-01", + azure_endpoint: Optional[str] = None, + max_tokens: int = 1024, + **kwargs: Any, + ): + if not api_key: + api_key = os.getenv("OPENAI_API_KEY") + if not azure_endpoint: + azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + + if not api_key: + raise ValueError("OpenAI API key is required.") + if not azure_endpoint: + raise ValueError("Azure OpenAI endpoint is required.") + + self.client = AzureOpenAI( + api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint + ) + self.model_name = model_name + self.max_tokens = max_tokens + self.kwargs = kwargs + + def get_lmm(name: str) -> LMM: if name == "openai": return OpenAILMM(name) From c18f74929b89564d43ef6059e0fb536a0de7335a Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Fri, 5 Apr 2024 17:24:05 -0700 Subject: [PATCH 3/6] added azure openai to modules --- vision_agent/llm/__init__.py | 2 +- vision_agent/lmm/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vision_agent/llm/__init__.py b/vision_agent/llm/__init__.py index dd5f5c54..e482f69d 100644 --- a/vision_agent/llm/__init__.py +++ b/vision_agent/llm/__init__.py @@ -1 +1 @@ -from .llm import LLM, OpenAILLM +from .llm import LLM, AzureOpenAILLM, OpenAILLM diff --git a/vision_agent/lmm/__init__.py b/vision_agent/lmm/__init__.py index 26bc23c1..9c7ace7a 100644 --- a/vision_agent/lmm/__init__.py +++ b/vision_agent/lmm/__init__.py @@ -1 +1 @@ -from .lmm import LMM, LLaVALMM, OpenAILMM, get_lmm +from .lmm import LMM, AzureOpenAILMM, LLaVALMM, OpenAILMM, get_lmm From 6cdb50f02604fb2541089d67285a323e9da04b2e Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Fri, 5 Apr 2024 17:29:56 -0700 Subject: [PATCH 4/6] fix for passing tests --- vision_agent/llm/llm.py | 8 +++----- vision_agent/lmm/lmm.py | 7 +++---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/vision_agent/llm/llm.py b/vision_agent/llm/llm.py index 1ccc5809..9022ef73 100644 --- a/vision_agent/llm/llm.py +++ b/vision_agent/llm/llm.py @@ -39,12 +39,10 @@ def __init__( **kwargs: Any ): if not api_key: - api_key = os.getenv("OPENAI_API_KEY") + self.client = OpenAI() + else: + self.client = OpenAI(api_key=api_key) - if not api_key: - raise ValueError("OpenAI API key is required.") - - self.client = OpenAI(api_key=api_key) self.model_name = model_name self.kwargs = kwargs if json_mode: diff --git a/vision_agent/lmm/lmm.py b/vision_agent/lmm/lmm.py index 3eee8766..99e5b01b 100644 --- a/vision_agent/lmm/lmm.py +++ b/vision_agent/lmm/lmm.py @@ -105,10 +105,9 @@ def __init__( **kwargs: Any, ): if not api_key: - api_key = os.getenv("OPENAI_API_KEY") - - if not api_key: - raise ValueError("OpenAI API key is required.") + self.client = OpenAI() + else: + self.client = OpenAI(api_key=api_key) self.client = OpenAI(api_key=api_key) self.model_name = model_name From 56b0ff92da25fb837ef77f31a49828e0d7599f88 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Fri, 5 Apr 2024 17:50:28 -0700 Subject: [PATCH 5/6] added azure openai to readme --- README.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 181be5ed..bff0a12a 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,8 @@ To get started, you can install the library using pip: pip install vision-agent ``` -Ensure you have an OpenAI API key and set it as an environment variable: +Ensure you have an OpenAI API key and set it as an environment variable (if you are +using Azure OpenAI please see the additional setup section): ```bash export OPENAI_API_KEY="your-api-key" @@ -109,3 +110,23 @@ you. For example: It also has a basic set of calculate tools such as add, subtract, multiply and divide. + +### Additional Setup +If you want to use Azure OpenAI models, you can set the environment variable: + +```bash +export AZURE_OPENAI_API_KEY="your-api-key" +export AZURE_OPENAI_ENDPOINT="your-endpoint" +``` + +You can then run Vision Agent using the Azure OpenAI models: + +```python +>>> import vision_agent as va +>>> agent = va.agent.VisionAgent( +>>> task_model=va.llm.AzureOpenAILLM(), +>>> answer_model=va.lmm.AzureOpenAILMM(), +>>> reflection_model=va.lmm.AzureOpenAILMM(), +>>> ) +``` + From 7d4ec4a4da8b19df5f9f3d97645fc47d2d20bb92 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Fri, 5 Apr 2024 17:53:58 -0700 Subject: [PATCH 6/6] fixed typo --- vision_agent/lmm/lmm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vision_agent/lmm/lmm.py b/vision_agent/lmm/lmm.py index 99e5b01b..0d63b158 100644 --- a/vision_agent/lmm/lmm.py +++ b/vision_agent/lmm/lmm.py @@ -260,13 +260,13 @@ def __init__( self, model_name: str = "gpt-4-vision-preview", api_key: Optional[str] = None, - api_version: str = "2021-02-01", + api_version: str = "2024-02-01", azure_endpoint: Optional[str] = None, max_tokens: int = 1024, **kwargs: Any, ): if not api_key: - api_key = os.getenv("OPENAI_API_KEY") + api_key = os.getenv("AZURE_OPENAI_API_KEY") if not azure_endpoint: azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")