From 78671695ad2161e55c520999dd53cdbd9fb1c421 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Tue, 20 Aug 2024 16:34:35 -0700 Subject: [PATCH] added florence2 od --- vision_agent/tools/tools.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index 7025a823..a894800c 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -738,9 +738,10 @@ def florence2_image_caption(image: np.ndarray, detail_caption: bool = True) -> s def florence2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]: - """'florence2_object_detection' is a tool that can detect objects given a text - prompt such as a phrase or class names separated by commas. It returns a list of - detected objects as labels and their location as bounding boxes with score of 1.0. + """'florence2_object_detection' that can detect and count multiple objects given a + text prompt such as category names or referring expressions. The categories in text + prompt are separated by commas. It returns a list of bounding boxes with normalized + coordinates, label names and associated probability scores set to 1.0 Parameters: prompt (str): The prompt to ground to the image. @@ -1525,6 +1526,7 @@ def overlay_heat_map( florence2_ocr, florence2_sam2_image, florence2_sam2_video, + florence2_object_detection, ixc25_image_vqa, ixc25_video_vqa, detr_segmentation,