From 59698dfc3ebc8e0e3b1910a48b8e66b954a60327 Mon Sep 17 00:00:00 2001
From: Mingrui Zhang <mingruizhang50@gmail.com>
Date: Tue, 3 Sep 2024 17:23:34 +0800
Subject: [PATCH 1/3] done

---
 vision_agent/tools/__init__.py |  1 +
 vision_agent/tools/tools.py    | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py
index e82d7553..e10080d7 100644
--- a/vision_agent/tools/__init__.py
+++ b/vision_agent/tools/__init__.py
@@ -23,6 +23,7 @@
     florence2_image_caption,
     florence2_ocr,
     florence2_phrase_grounding,
+    florence2_phrase_grounding_fine_tune,
     florence2_roberta_vqa,
     florence2_sam2_image,
     florence2_sam2_video,
diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py
index 0695b547..e85c248e 100644
--- a/vision_agent/tools/tools.py
+++ b/vision_agent/tools/tools.py
@@ -1601,20 +1601,18 @@ def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
 
 
 # TODO: add this function to the imports so that is picked in the agent
-def florencev2_fine_tuned_object_detection(
-    image: np.ndarray, prompt: str, model_id: UUID, task: str
+def florence2_phrase_grounding_fine_tune(
+    prompt: str, image: np.ndarray, model_id: UUID
 ) -> List[Dict[str, Any]]:
-    """'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model
+    """'florence2_phrase_grounding_fine_tune' is a tool that uses a fine tuned model
     to detect objects given a text prompt such as a phrase or class names separated by
     commas. It returns a list of detected objects as labels and their location as
     bounding boxes with score of 1.0.
 
     Parameters:
-        image (np.ndarray): The image to used to detect objects.
         prompt (str): The prompt to help find objects in the image.
+        image (np.ndarray): The image to used to detect objects.
         model_id (UUID): The fine-tuned model id.
-        task (PromptTask): The florencev2 fine-tuning task. The options are
-            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
 
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -1626,8 +1624,8 @@ def florencev2_fine_tuned_object_detection(
     Example
     -------
         >>> florencev2_fine_tuned_object_detection(
-            image,
             'person looking at a coyote',
+            image,
             UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
         )
         [
@@ -1705,12 +1703,17 @@ def florencev2_fine_tuned_object_detection(
     overlay_heat_map,
 ]
 
+# non-implemented tools
+OTHER_TOOLS = [
+    florence2_phrase_grounding_fine_tune,
+]
+
 TOOLS = FUNCTION_TOOLS + UTIL_TOOLS
 
 TOOLS_DF = get_tools_df(TOOLS)  # type: ignore
 TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS)  # type: ignore
 TOOL_DOCSTRING = get_tool_documentation(TOOLS)  # type: ignore
-TOOLS_INFO = get_tools_info(TOOLS)  # type: ignore
+TOOLS_INFO = get_tools_info(TOOLS + OTHER_TOOLS)  # type: ignore
 UTILITIES_DOCSTRING = get_tool_documentation(
     [
         save_json,

From b479ce34e91bd5594f8a3850ebb0f8ceb9785cd1 Mon Sep 17 00:00:00 2001
From: Mingrui Zhang <mingruizhang50@gmail.com>
Date: Tue, 3 Sep 2024 17:50:36 +0800
Subject: [PATCH 2/3] done

---
 vision_agent/tools/__init__.py | 2 ++
 vision_agent/tools/tools.py    | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py
index e10080d7..29f684ac 100644
--- a/vision_agent/tools/__init__.py
+++ b/vision_agent/tools/__init__.py
@@ -61,6 +61,8 @@ def register_tool(imports: Optional[List] = None) -> Callable:
     def decorator(tool: Callable) -> Callable:
         import inspect
 
+        print("try")
+
         from .tools import get_tool_descriptions, get_tools_df, get_tools_info
 
         global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO
diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py
index e85c248e..bab02c6c 100644
--- a/vision_agent/tools/tools.py
+++ b/vision_agent/tools/tools.py
@@ -1639,7 +1639,10 @@ def florence2_phrase_grounding_fine_tune(
     if status is not JobStatus.SUCCEEDED:
         raise FineTuneModelIsNotReady()
 
-    task = PromptTask[task]
+    task = PromptTask[
+        "CAPTION_TO_PHRASE_GROUNDING"
+    ]  # hardcode to <CAPTION_TO_PHRASE_GROUNDING> for now
+
     if task is PromptTask.OBJECT_DETECTION:
         prompt = ""
 

From b93cd89d1c7447ba791917fe0d81bff1bcdb4d05 Mon Sep 17 00:00:00 2001
From: Mingrui Zhang <mingruizhang50@gmail.com>
Date: Tue, 3 Sep 2024 18:07:26 +0800
Subject: [PATCH 3/3] done

---
 vision_agent/tools/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py
index 29f684ac..e10080d7 100644
--- a/vision_agent/tools/__init__.py
+++ b/vision_agent/tools/__init__.py
@@ -61,8 +61,6 @@ def register_tool(imports: Optional[List] = None) -> Callable:
     def decorator(tool: Callable) -> Callable:
         import inspect
 
-        print("try")
-
         from .tools import get_tool_descriptions, get_tools_df, get_tools_info
 
         global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO