Skip to content

Commit

Permalink
fixed florence OD as phrase grounding
Browse files Browse the repository at this point in the history
  • Loading branch information
shankar-vision-eng committed Aug 27, 2024
1 parent da3eed1 commit 46343e0
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
4 changes: 2 additions & 2 deletions tests/integ/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
detr_segmentation,
dpt_hybrid_midas,
florence2_image_caption,
florence2_object_detection,
florence2_phrase_grounding,
florence2_ocr,
florence2_roberta_vqa,
florence2_sam2_image,
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_owl():

def test_object_detection():
img = ski.data.coins()
result = florence2_object_detection(
result = florence2_phrase_grounding(
image=img,
prompt="coin",
)
Expand Down
2 changes: 1 addition & 1 deletion vision_agent/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
dpt_hybrid_midas,
extract_frames,
florence2_image_caption,
florence2_object_detection,
florence2_phrase_grounding,
florence2_ocr,
florence2_roberta_vqa,
florence2_sam2_image,
Expand Down
14 changes: 7 additions & 7 deletions vision_agent/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,10 +760,10 @@ def florence2_image_caption(image: np.ndarray, detail_caption: bool = True) -> s
return answer[task] # type: ignore


def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]:
"""'florencev2_object_detection' is a tool that can detect and count multiple
objects given a text prompt such as category names or referring expressions. You
can optionally separate the categories in the text with commas. It returns a list
def florence2_phrase_grounding(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]:
"""'florence2_phrase_grounding' is a tool that can detect multiple
objects given a text prompt which can be object names or caption. You
can optionally separate the object names in the text with commas. It returns a list
of bounding boxes with normalized coordinates, label names and associated
probability scores of 1.0.
Expand All @@ -780,7 +780,7 @@ def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str,
Example
-------
>>> florence2_object_detection('person looking at a coyote', image)
>>> florence2_phrase_grounding('person looking at a coyote', image)
[
{'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
{'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
Expand All @@ -792,7 +792,7 @@ def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str,
"image": image_b64,
"task": "<CAPTION_TO_PHRASE_GROUNDING>",
"prompt": prompt,
"function_name": "florence2_object_detection",
"function_name": "florence2_phrase_grounding",
}

detections = send_inference_request(data, "florence2", v2=True)
Expand Down Expand Up @@ -1663,7 +1663,7 @@ def florencev2_fine_tuned_object_detection(
florence2_ocr,
florence2_sam2_image,
florence2_sam2_video,
florence2_object_detection,
florence2_phrase_grounding,
ixc25_image_vqa,
ixc25_video_vqa,
detr_segmentation,
Expand Down

0 comments on commit 46343e0

Please sign in to comment.