Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update generic OD Tool #192

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions tests/integ/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,10 @@ def test_object_detection():
img = ski.data.coins()
result = florencev2_object_detection(
image=img,
prompt="coin",
)
assert len(result) == 24
assert [res["label"] for res in result] == ["coin"] * 24
assert len(result) == 25
assert [res["label"] for res in result] == ["coin"] * 25


def test_template_match():
Expand Down
21 changes: 12 additions & 9 deletions vision_agent/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,13 +614,17 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
return answer["text"][0] # type: ignore


def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
"""'florencev2_object_detection' is a tool that can detect common objects in an
image without any text prompt or thresholding. It returns a list of detected objects
as labels and their location as bounding boxes.
def florencev2_object_detection(
image: np.ndarray,
prompt: str,
) -> List[Dict[str, Any]]:
"""'florencev2_object_detection' is a tool that can detect objects given a text
prompt such as a phrase or class names separated by commas. It returns a list of
detected objects as labels and their location as bounding boxes with score of 1.0.

Parameters:
image (np.ndarray): The image to used to detect objects
prompt (str): Phrase or classes to detect objects in the image

Returns:
List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
Expand All @@ -631,17 +635,17 @@ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:

Example
-------
>>> florencev2_object_detection(image)
>>> florencev2_object_detection(image, 'person looking out at coyote')
[
{'score': 1.0, 'label': 'window', 'bbox': [0.1, 0.11, 0.35, 0.4]},
{'score': 1.0, 'label': 'car', 'bbox': [0.2, 0.21, 0.45, 0.5},
{'score': 1.0, 'label': 'person', 'bbox': [0.34, 0.21, 0.85, 0.5},
{'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
{'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
]
"""
image_size = image.shape[:2]
image_b64 = convert_to_b64(image)
data = {
"image": image_b64,
"prompt": prompt,
"tool": "object_detection",
"function_name": "florencev2_object_detection",
}
Expand Down Expand Up @@ -1253,7 +1257,6 @@ def overlay_heat_map(
loca_visual_prompt_counting,
florencev2_roberta_vqa,
florencev2_image_caption,
florencev2_object_detection,
detr_segmentation,
depth_anything_v2,
generate_soft_edge_image,
Expand Down
Loading