From e047bc2c8691832bcd62a99e370aeee45826c1c4 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Thu, 15 Aug 2024 15:45:40 -0700 Subject: [PATCH] fixed ordering of args (#200) --- vision_agent/tools/tools.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index f1ba5e89..7837e207 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -607,12 +607,13 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) -> return answer[task] # type: ignore -def florencev2_object_detection(image: np.ndarray, prompt: str) -> List[Dict[str, Any]]: +def florencev2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]: """'florencev2_object_detection' is a tool that can detect objects given a text prompt such as a phrase or class names separated by commas. It returns a list of detected objects as labels and their location as bounding boxes with score of 1.0. Parameters: + prompt (str): The prompt to ground to the image. image (np.ndarray): The image to used to detect objects Returns: @@ -624,7 +625,7 @@ def florencev2_object_detection(image: np.ndarray, prompt: str) -> List[Dict[str Example ------- - >>> florencev2_object_detection(image, 'person looking at a coyote') + >>> florencev2_object_detection('person looking at a coyote', image) [ {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]}, {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},