From e50dacd74f438a81abda6f971a09829ef72b150b Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Tue, 12 Mar 2024 17:50:30 -0700 Subject: [PATCH 1/5] fixed bad test case --- tests/test_llm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_llm.py b/tests/test_llm.py index 74453a4b..1dd39dcb 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -54,6 +54,6 @@ def test_generate_detector(openai_llm_mock): # noqa: F811 def test_generate_segmentor(openai_llm_mock): # noqa: F811 llm = OpenAILLM() prompt = "Can you generate a cat segmentor?" - segmentor = llm.generate_detector(prompt) - assert isinstance(segmentor, GroundingDINO) + segmentor = llm.generate_segmentor(prompt) + assert isinstance(segmentor, GroundingSAM) assert segmentor.prompt == "cat" From 922be2b9efedc34738e19a9512c9ea6767118a51 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Tue, 12 Mar 2024 17:50:38 -0700 Subject: [PATCH 2/5] fixed typo --- vision_agent/tools/tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index b539dfbb..de8960dd 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -55,7 +55,7 @@ class GroundingDINO(ImageTool): 'Example 2: User Question: "Can you detect the person on the left?" {{"Parameters":{{"prompt": "person on the left"}}\n' 'Exmaple 3: User Question: "Can you build me a tool that detects red shirts and green shirts?" {{"Parameters":{{"prompt": "red shirt. green shirt"}}}}\n' "The tool returns a list of dictionaries, each containing the following keys:\n" - " - 'lable': The label of the detected object.\n" + " - 'label': The label of the detected object.\n" " - 'score': The confidence score of the detection.\n" " - 'bbox': The bounding box of the detected object. The box coordinates are normalize to [0, 1]\n" "An example output would be: [{'label': ['car'], 'score': [0.99], 'bbox': [[0.1, 0.2, 0.3, 0.4]]}]\n" From 52c9c270aaf64c0e387b0e7aee206d57fd86f653 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Tue, 12 Mar 2024 17:52:08 -0700 Subject: [PATCH 3/5] fix html error --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 85fb45d6..dd435503 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@
- vision_agent
# 🔍🤖 Vision Agent From 3abfb50d0824f63af8c2b70dfbe41de705b76f60 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Tue, 12 Mar 2024 17:54:51 -0700 Subject: [PATCH 4/5] center title --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dd435503..954ed93e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@
vision_agent -
# 🔍🤖 Vision Agent @@ -8,6 +7,7 @@ ![ci_status](https://github.com/landing-ai/vision-agent/actions/workflows/ci_cd.yml/badge.svg) [![PyPI version](https://badge.fury.io/py/vision-agent.svg)](https://badge.fury.io/py/vision-agent) ![version](https://img.shields.io/pypi/pyversions/vision-agent) + Vision Agent is a library for that helps you to use multimodal models to organize and structure your image data. Check out our discord for roadmaps and updates! From c870480c21bdcd0dfd767ca4e8c9f97e5ec2e535 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Tue, 12 Mar 2024 17:58:51 -0700 Subject: [PATCH 5/5] fix import --- tests/test_llm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_llm.py b/tests/test_llm.py index 1dd39dcb..a8070f30 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -1,8 +1,7 @@ import pytest from vision_agent.llm.llm import OpenAILLM -from vision_agent.tools import CLIP -from vision_agent.tools.tools import GroundingDINO +from vision_agent.tools import CLIP, GroundingDINO, GroundingSAM from .fixtures import openai_llm_mock # noqa: F401