Minor fixes (#15)

* fixed bad test case * fixed typo * fix html error * center title * fix import
landing-ai · Mar 13, 2024 · aec8903 · aec8903
1 parent 8ccada3
commit aec8903
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -1,13 +1,13 @@
 <div align="center">
- <img alt="vision_agent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo.jpg?raw=true"
-</div>
+ <img alt="vision_agent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo.jpg?raw=true">
 
 # 🔍🤖 Vision Agent
 
 [![](https://dcbadge.vercel.app/api/server/wPdN8RCYew?compact=true&style=flat)](https://discord.gg/wPdN8RCYew)
 ![ci_status](https://github.com/landing-ai/vision-agent/actions/workflows/ci_cd.yml/badge.svg)
 [![PyPI version](https://badge.fury.io/py/vision-agent.svg)](https://badge.fury.io/py/vision-agent)
 ![version](https://img.shields.io/pypi/pyversions/vision-agent)
+</div>
 
 
 Vision Agent is a library for that helps you to use multimodal models to organize and structure your image data. Check out our discord for roadmaps and updates! 

diff --git a/tests/test_llm.py b/tests/test_llm.py
@@ -1,8 +1,7 @@
 import pytest
 
 from vision_agent.llm.llm import OpenAILLM
-from vision_agent.tools import CLIP
-from vision_agent.tools.tools import GroundingDINO
+from vision_agent.tools import CLIP, GroundingDINO, GroundingSAM
 
 from .fixtures import openai_llm_mock # noqa: F401
 
@@ -54,6 +53,6 @@ def test_generate_detector(openai_llm_mock): # noqa: F811
 def test_generate_segmentor(openai_llm_mock): # noqa: F811
  llm = OpenAILLM()
  prompt = "Can you generate a cat segmentor?"
- segmentor = llm.generate_detector(prompt)
- assert isinstance(segmentor, GroundingDINO)
+ segmentor = llm.generate_segmentor(prompt)
+ assert isinstance(segmentor, GroundingSAM)
  assert segmentor.prompt == "cat"
diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py
@@ -55,7 +55,7 @@ class GroundingDINO(ImageTool):
  'Example 2: User Question: "Can you detect the person on the left?" {{"Parameters":{{"prompt": "person on the left"}}\n'
  'Exmaple 3: User Question: "Can you build me a tool that detects red shirts and green shirts?" {{"Parameters":{{"prompt": "red shirt. green shirt"}}}}\n'
  "The tool returns a list of dictionaries, each containing the following keys:\n"
- " - 'lable': The label of the detected object.\n"
+ " - 'label': The label of the detected object.\n"
  " - 'score': The confidence score of the detection.\n"
  " - 'bbox': The bounding box of the detected object. The box coordinates are normalize to [0, 1]\n"
  "An example output would be: [{'label': ['car'], 'score': [0.99], 'bbox': [[0.1, 0.2, 0.3, 0.4]]}]\n"