From 59698dfc3ebc8e0e3b1910a48b8e66b954a60327 Mon Sep 17 00:00:00 2001 From: Mingrui Zhang Date: Tue, 3 Sep 2024 17:23:34 +0800 Subject: [PATCH 1/3] done --- vision_agent/tools/__init__.py | 1 + vision_agent/tools/tools.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py index e82d7553..e10080d7 100644 --- a/vision_agent/tools/__init__.py +++ b/vision_agent/tools/__init__.py @@ -23,6 +23,7 @@ florence2_image_caption, florence2_ocr, florence2_phrase_grounding, + florence2_phrase_grounding_fine_tune, florence2_roberta_vqa, florence2_sam2_image, florence2_sam2_video, diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index 0695b547..e85c248e 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -1601,20 +1601,18 @@ def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID: # TODO: add this function to the imports so that is picked in the agent -def florencev2_fine_tuned_object_detection( - image: np.ndarray, prompt: str, model_id: UUID, task: str +def florence2_phrase_grounding_fine_tune( + prompt: str, image: np.ndarray, model_id: UUID ) -> List[Dict[str, Any]]: - """'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model + """'florence2_phrase_grounding_fine_tune' is a tool that uses a fine tuned model to detect objects given a text prompt such as a phrase or class names separated by commas. It returns a list of detected objects as labels and their location as bounding boxes with score of 1.0. Parameters: - image (np.ndarray): The image to used to detect objects. prompt (str): The prompt to help find objects in the image. + image (np.ndarray): The image to used to detect objects. model_id (UUID): The fine-tuned model id. - task (PromptTask): The florencev2 fine-tuning task. The options are - CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION. Returns: List[Dict[str, Any]]: A list of dictionaries containing the score, label, and @@ -1626,8 +1624,8 @@ def florencev2_fine_tuned_object_detection( Example ------- >>> florencev2_fine_tuned_object_detection( - image, 'person looking at a coyote', + image, UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83") ) [ @@ -1705,12 +1703,17 @@ def florencev2_fine_tuned_object_detection( overlay_heat_map, ] +# non-implemented tools +OTHER_TOOLS = [ + florence2_phrase_grounding_fine_tune, +] + TOOLS = FUNCTION_TOOLS + UTIL_TOOLS TOOLS_DF = get_tools_df(TOOLS) # type: ignore TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore -TOOLS_INFO = get_tools_info(TOOLS) # type: ignore +TOOLS_INFO = get_tools_info(TOOLS + OTHER_TOOLS) # type: ignore UTILITIES_DOCSTRING = get_tool_documentation( [ save_json, From b479ce34e91bd5594f8a3850ebb0f8ceb9785cd1 Mon Sep 17 00:00:00 2001 From: Mingrui Zhang Date: Tue, 3 Sep 2024 17:50:36 +0800 Subject: [PATCH 2/3] done --- vision_agent/tools/__init__.py | 2 ++ vision_agent/tools/tools.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py index e10080d7..29f684ac 100644 --- a/vision_agent/tools/__init__.py +++ b/vision_agent/tools/__init__.py @@ -61,6 +61,8 @@ def register_tool(imports: Optional[List] = None) -> Callable: def decorator(tool: Callable) -> Callable: import inspect + print("try") + from .tools import get_tool_descriptions, get_tools_df, get_tools_info global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index e85c248e..bab02c6c 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -1639,7 +1639,10 @@ def florence2_phrase_grounding_fine_tune( if status is not JobStatus.SUCCEEDED: raise FineTuneModelIsNotReady() - task = PromptTask[task] + task = PromptTask[ + "CAPTION_TO_PHRASE_GROUNDING" + ] # hardcode to for now + if task is PromptTask.OBJECT_DETECTION: prompt = "" From b93cd89d1c7447ba791917fe0d81bff1bcdb4d05 Mon Sep 17 00:00:00 2001 From: Mingrui Zhang Date: Tue, 3 Sep 2024 18:07:26 +0800 Subject: [PATCH 3/3] done --- vision_agent/tools/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py index 29f684ac..e10080d7 100644 --- a/vision_agent/tools/__init__.py +++ b/vision_agent/tools/__init__.py @@ -61,8 +61,6 @@ def register_tool(imports: Optional[List] = None) -> Callable: def decorator(tool: Callable) -> Callable: import inspect - print("try") - from .tools import get_tool_descriptions, get_tools_df, get_tools_info global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO