From fd87fa51cb001001817c2668db02b51f156f4a64 Mon Sep 17 00:00:00 2001
From: Dayanne Fernandes <dayannefernandesc@gmail.com>
Date: Tue, 13 Aug 2024 18:27:55 -0300
Subject: [PATCH 1/7] check status and run prediction with fine tuned model

---
 vision_agent/agent/vision_agent.py         |   2 +-
 vision_agent/clients/http.py               |  15 +++
 vision_agent/clients/landing_public_api.py |   6 +-
 vision_agent/tools/__init__.py             |   7 +-
 vision_agent/tools/meta_tools.py           | 113 ++++++++++++++++++++-
 vision_agent/tools/meta_tools_types.py     |  58 ++++++++++-
 vision_agent/tools/tool_utils.py           |  24 +++--
 7 files changed, 209 insertions(+), 16 deletions(-)

diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
index 9090b706..375202db 100644
--- a/vision_agent/agent/vision_agent.py
+++ b/vision_agent/agent/vision_agent.py
@@ -28,7 +28,7 @@ class DefaultImports:
     code = [
         "from typing import *",
         "from vision_agent.utils.execute import CodeInterpreter",
-        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning",
+        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning, florencev2_fine_tuned_object_detection, check_if_fine_tuned_florencev2_is_ready",
     ]
 
     @staticmethod
diff --git a/vision_agent/clients/http.py b/vision_agent/clients/http.py
index 678148a9..dc969595 100644
--- a/vision_agent/clients/http.py
+++ b/vision_agent/clients/http.py
@@ -44,3 +44,18 @@ def post(self, url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
             resp_text = response.text
             _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
         return result
+
+    def get(self, url: str) -> Dict[str, Any]:
+        formatted_url = f"{self._base_endpoint}/{url}"
+        _LOGGER.info(f"Sending data to {formatted_url}")
+        try:
+            response = self._session.get(url=formatted_url, timeout=self._TIMEOUT)
+            response.raise_for_status()
+            result: Dict[str, Any] = response.json()
+            _LOGGER.info(json.dumps(result))
+        except (ConnectionError, Timeout, RequestException) as err:
+            _LOGGER.warning(f"Error: {err}.")
+        except json.JSONDecodeError:
+            resp_text = response.text
+            _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
+        return result
diff --git a/vision_agent/clients/landing_public_api.py b/vision_agent/clients/landing_public_api.py
index 4c50c388..09f98b44 100644
--- a/vision_agent/clients/landing_public_api.py
+++ b/vision_agent/clients/landing_public_api.py
@@ -4,7 +4,7 @@
 
 from vision_agent.clients.http import BaseHTTP
 from vision_agent.utils.type_defs import LandingaiAPIKey
-from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask
+from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask, JobStatus
 
 
 class LandingPublicAPI(BaseHTTP):
@@ -24,3 +24,7 @@ def launch_fine_tuning_job(
         }
         response = self.post(url, payload=data)
         return UUID(response["jobId"])
+
+    def check_fine_tuning_job(self, job_id: UUID) -> JobStatus:
+        url = f"v1/agent/jobs/fine-tuning/{job_id}/status"
+        return JobStatus(self.get(url)["status"])
diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py
index f9879626..4a863994 100644
--- a/vision_agent/tools/__init__.py
+++ b/vision_agent/tools/__init__.py
@@ -1,6 +1,11 @@
 from typing import Callable, List, Optional
 
-from .meta_tools import META_TOOL_DOCSTRING, florencev2_fine_tuning
+from .meta_tools import (
+    META_TOOL_DOCSTRING,
+    florencev2_fine_tuning,
+    florencev2_fine_tuned_object_detection,
+    check_if_fine_tuned_florencev2_is_ready,
+)
 from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
 from .tools import (
     TOOL_DESCRIPTIONS,
diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py
index 851aab18..2ff6df3c 100644
--- a/vision_agent/tools/meta_tools.py
+++ b/vision_agent/tools/meta_tools.py
@@ -4,13 +4,22 @@
 from pathlib import Path
 from typing import Any, Dict, List, Union
 
+import numpy as np
+
 import vision_agent as va
 from vision_agent.lmm.types import Message
-from vision_agent.tools.tool_utils import get_tool_documentation
+from vision_agent.tools.tool_utils import get_tool_documentation, send_inference_request
 from vision_agent.tools.tools import TOOL_DESCRIPTIONS
-from vision_agent.utils.image_utils import convert_to_b64
+from vision_agent.utils.image_utils import convert_to_b64, normalize_bbox
 from vision_agent.clients.landing_public_api import LandingPublicAPI
-from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
+from vision_agent.tools.meta_tools_types import (
+    BboxInput,
+    BboxInputBase64,
+    PromptTask,
+    Florencev2FtRequest,
+    FineTuning,
+    JobStatus,
+)
 
 # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
 
@@ -384,7 +393,7 @@ def edit_file(file_path: str, start: int, end: int, content: str) -> str:
 
 def get_tool_descriptions() -> str:
     """Returns a description of all the tools that `generate_vision_code` has access to.
-    Helpful for answerings questions about what types of vision tasks you can do with
+    Helpful for answering questions about what types of vision tasks you can do with
     `generate_vision_code`."""
     return TOOL_DESCRIPTIONS
 
@@ -429,6 +438,100 @@ def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
     )
 
 
+def check_if_fine_tuned_florencev2_is_ready(model_id: UUID) -> bool:
+    """'check_if_fine_tuned_florencev2_is_ready' is a tool that checks whether
+    is possible to use a certain florencev2 model. It checks if the status
+    is SUCCEEDED.
+
+    Parameters:
+        model_id (UUID): The fine-tuned model id.
+
+    Returns:
+        bool: The indication if the model is ready to be used or not. If this
+        is False, it's recommended to wait 5 seconds before checking again.
+
+    Example
+    -------
+        >>> check_if_fine_tuned_florencev2_is_ready(UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83"))
+        True
+    """
+    # check if job succeeded first
+    landing_api = LandingPublicAPI()
+    status = landing_api.check_fine_tuning_job(model_id)
+    return status is JobStatus.SUCCEEDED
+
+
+def florencev2_fine_tuned_object_detection(
+    image: np.ndarray, prompt: str, model_id: UUID, task: str, model_is_ready: bool
+) -> List[Dict[str, Any]]:
+    """'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model
+    to detect objects given a text prompt such as a phrase or class names separated by
+    commas. It returns a list of detected objects as labels and their location as
+    bounding boxes with score of 1.0.
+
+    Parameters:
+        image (np.ndarray): The image to used to detect objects.
+        prompt (str): The prompt to help find objects in the image.
+        model_id (UUID): The fine-tuned model id.
+        task (PromptTask): The florencev2 fine-tuning task. The options are
+            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
+        model_is_ready (bool): If the model is ready to be used. It's recommended
+            to get this value from the function check_if_fine_tuned_florencev2_is_ready.
+
+    Returns:
+        List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
+            bounding box of the detected objects with normalized coordinates between 0
+            and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
+            top-left and xmax and ymax are the coordinates of the bottom-right of the
+            bounding box. The scores are always 1.0 and cannot be thresholded
+
+    Example
+    -------
+        >>> florencev2_fine_tuned_object_detection(
+            image,
+            'person looking at a coyote',
+            UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83"),
+            model_is_ready=True
+        )
+        [
+            {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
+            {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
+        ]
+    """
+    if not model_is_ready:
+        return []
+
+    task = PromptTask[task]
+    if task is PromptTask.OBJECT_DETECTION:
+        prompt = ""
+
+    data_obj = Florencev2FtRequest(
+        image=convert_to_b64(image),
+        task=task,
+        tool="florencev2_fine_tuning",
+        prompt=prompt,
+        fine_tuning=FineTuning(job_id=model_id),
+    )
+    data = data_obj.model_dump(by_alias=True)
+    metadata_payload = {"function_name": "florencev2_fine_tuned_object_detection"}
+    detections = send_inference_request(
+        data, "tools", v2=False, metadata_payload=metadata_payload
+    )
+
+    detections = detections[task.value]
+    return_data = []
+    image_size = image.shape[:2]
+    for i in range(len(detections["bboxes"])):
+        return_data.append(
+            {
+                "score": 1.0,
+                "label": detections["labels"][i],
+                "bbox": normalize_bbox(detections["bboxes"][i], image_size),
+            }
+        )
+    return return_data
+
+
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
@@ -443,5 +546,7 @@ def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
         search_file,
         find_file,
         florencev2_fine_tuning,
+        florencev2_fine_tuned_object_detection,
+        check_if_fine_tuned_florencev2_is_ready,
     ]
 )
diff --git a/vision_agent/tools/meta_tools_types.py b/vision_agent/tools/meta_tools_types.py
index 4c60923e..6b34750b 100644
--- a/vision_agent/tools/meta_tools_types.py
+++ b/vision_agent/tools/meta_tools_types.py
@@ -1,7 +1,8 @@
+from uuid import UUID
 from enum import Enum
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict, Field, field_serializer
 
 
 class BboxInput(BaseModel):
@@ -28,3 +29,56 @@ class PromptTask(str, Enum):
     """"""
     OBJECT_DETECTION = "<OD>"
     """"""
+
+
+class FineTuning(BaseModel):
+    model_config = ConfigDict(populate_by_name=True)
+
+    job_id: UUID = Field(alias="jobId")
+
+    @field_serializer("job_id")
+    def serialize_job_id(self, job_id: UUID, _info):
+        return str(job_id)
+
+
+class Florencev2FtRequest(BaseModel):
+    model_config = ConfigDict(populate_by_name=True)
+
+    image: str
+    task: PromptTask
+    tool: str
+    prompt: Optional[str] = ""
+    fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
+
+
+class JobStatus(str, Enum):
+    """The status of a fine-tuning job.
+
+    CREATED:
+        The job has been created and is waiting to be scheduled to run.
+    STARTING:
+        The job has started running, but not entering the training phase.
+    TRAINING:
+        The job is training a model.
+    EVALUATING:
+        The job is evaluating the model and computing metrics.
+    PUBLISHING:
+        The job is exporting the artifact(s) to an external directory (s3 or local).
+    SUCCEEDED:
+        The job has finished, including training, evaluation and publishing the
+        artifact(s).
+    FAILED:
+        The job has failed for some reason internally, it can be due to resources
+        issues or the code itself.
+    STOPPED:
+        The job has been stopped by the use locally or in the cloud.
+    """
+
+    CREATED = "CREATED"
+    STARTING = "STARTING"
+    TRAINING = "TRAINING"
+    EVALUATING = "EVALUATING"
+    PUBLISHING = "PUBLISHING"
+    SUCCEEDED = "SUCCEEDED"
+    FAILED = "FAILED"
+    STOPPED = "STOPPED"
diff --git a/vision_agent/tools/tool_utils.py b/vision_agent/tools/tool_utils.py
index 0ff56177..e6dfc67d 100644
--- a/vision_agent/tools/tool_utils.py
+++ b/vision_agent/tools/tool_utils.py
@@ -15,9 +15,10 @@
 from vision_agent.utils.type_defs import LandingaiAPIKey
 
 _LOGGER = logging.getLogger(__name__)
-_LND_API_KEY = LandingaiAPIKey().api_key
-_LND_API_URL = "https://api.landing.ai/v1/agent/model"
-_LND_API_URL_v2 = "https://api.landing.ai/v1/tools"
+_LND_API_KEY = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
+_LND_BASE_URL = os.environ.get("LANDINGAI_URL", "https://api.landing.ai")
+_LND_API_URL = f"{_LND_BASE_URL}/v1/agent/model"
+_LND_API_URL_v2 = f"{_LND_BASE_URL}/v1/tools"
 
 
 class ToolCallTrace(BaseModel):
@@ -28,8 +29,13 @@ class ToolCallTrace(BaseModel):
 
 
 def send_inference_request(
-    payload: Dict[str, Any], endpoint_name: str, v2: bool = False
+    payload: Dict[str, Any],
+    endpoint_name: str,
+    v2: bool = False,
+    metadata_payload: Optional[Dict[str, Any]] = None,
 ) -> Dict[str, Any]:
+    # TODO: runtime_tag and function_name should be metadata_payload and now included
+    # in the service payload
     try:
         if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
             payload["runtime_tag"] = runtime_tag
@@ -62,9 +68,13 @@ def send_inference_request(
                 traceback_raw=[],
             )
             _LOGGER.error(f"Request failed: {res.status_code} {res.text}")
-            raise RemoteToolCallFailed(
-                payload["function_name"], res.status_code, res.text
-            )
+            # TODO: function_name should be in metadata_payload
+            function_name = "unknown"
+            if "function_name" in payload:
+                function_name = payload["function_name"]
+            elif metadata_payload is not None and "function_name" in metadata_payload:
+                function_name = metadata_payload["function_name"]
+            raise RemoteToolCallFailed(function_name, res.status_code, res.text)
 
         resp = res.json()
         tool_call_trace.response = resp

From 4bee8d76a84981cbb247567dd5dba78bc6ae7325 Mon Sep 17 00:00:00 2001
From: Dayanne Fernandes <dayannefernandesc@gmail.com>
Date: Tue, 13 Aug 2024 19:04:07 -0300
Subject: [PATCH 2/7] fix linter

---
 pyproject.toml                         | 2 ++
 vision_agent/tools/meta_tools_types.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 664534e2..9f971210 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,6 +78,8 @@ line_length = 88
 profile = "black"
 
 [tool.mypy]
+plugins = "pydantic.mypy"
+
 exclude = "tests"
 show_error_context = true
 pretty = true
diff --git a/vision_agent/tools/meta_tools_types.py b/vision_agent/tools/meta_tools_types.py
index 6b34750b..aeb45c95 100644
--- a/vision_agent/tools/meta_tools_types.py
+++ b/vision_agent/tools/meta_tools_types.py
@@ -2,7 +2,7 @@
 from enum import Enum
 from typing import List, Tuple, Optional
 
-from pydantic import BaseModel, ConfigDict, Field, field_serializer
+from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
 
 
 class BboxInput(BaseModel):
@@ -37,7 +37,7 @@ class FineTuning(BaseModel):
     job_id: UUID = Field(alias="jobId")
 
     @field_serializer("job_id")
-    def serialize_job_id(self, job_id: UUID, _info):
+    def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
         return str(job_id)
 
 

From 21d5ee82a01c15250309ef47c1a0098581177e1c Mon Sep 17 00:00:00 2001
From: Dayanne Fernandes <dayannefernandesc@gmail.com>
Date: Tue, 13 Aug 2024 19:09:58 -0300
Subject: [PATCH 3/7] improve docstring

---
 vision_agent/tools/meta_tools.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py
index 2ff6df3c..925de2d4 100644
--- a/vision_agent/tools/meta_tools.py
+++ b/vision_agent/tools/meta_tools.py
@@ -491,7 +491,9 @@ def florencev2_fine_tuned_object_detection(
             image,
             'person looking at a coyote',
             UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83"),
-            model_is_ready=True
+            model_is_ready=check_if_fine_tuned_florencev2_is_ready(
+                UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
+            )
         )
         [
             {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},

From a61ac2667d69674265036dab7cbbc7dfc37b6124 Mon Sep 17 00:00:00 2001
From: Dayanne Fernandes <dayannefernandesc@gmail.com>
Date: Fri, 23 Aug 2024 18:20:20 -0300
Subject: [PATCH 4/7] fine-tuning to tools

---
 vision_agent/agent/vision_agent.py            |   2 +-
 vision_agent/clients/landing_public_api.py    |   2 +-
 vision_agent/tools/__init__.py                |   3 -
 vision_agent/tools/meta_tools.py              | 155 +-----------------
 vision_agent/tools/tools.py                   | 147 +++++++++++++++++
 .../{meta_tools_types.py => tools_types.py}   |   0
 6 files changed, 151 insertions(+), 158 deletions(-)
 rename vision_agent/tools/{meta_tools_types.py => tools_types.py} (100%)

diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
index 996e5eac..cfb482e1 100644
--- a/vision_agent/agent/vision_agent.py
+++ b/vision_agent/agent/vision_agent.py
@@ -28,7 +28,7 @@ class DefaultImports:
     code = [
         "from typing import *",
         "from vision_agent.utils.execute import CodeInterpreter",
-        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning, florencev2_fine_tuned_object_detection, check_if_fine_tuned_florencev2_is_ready",
+        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
     ]
 
     @staticmethod
diff --git a/vision_agent/clients/landing_public_api.py b/vision_agent/clients/landing_public_api.py
index 09f98b44..f9d52389 100644
--- a/vision_agent/clients/landing_public_api.py
+++ b/vision_agent/clients/landing_public_api.py
@@ -4,7 +4,7 @@
 
 from vision_agent.clients.http import BaseHTTP
 from vision_agent.utils.type_defs import LandingaiAPIKey
-from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask, JobStatus
+from vision_agent.tools.tools_types import BboxInputBase64, PromptTask, JobStatus
 
 
 class LandingPublicAPI(BaseHTTP):
diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py
index 4a863994..53b64ffb 100644
--- a/vision_agent/tools/__init__.py
+++ b/vision_agent/tools/__init__.py
@@ -2,9 +2,6 @@
 
 from .meta_tools import (
     META_TOOL_DOCSTRING,
-    florencev2_fine_tuning,
-    florencev2_fine_tuned_object_detection,
-    check_if_fine_tuned_florencev2_is_ready,
 )
 from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
 from .tools import (
diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py
index 925de2d4..7c857550 100644
--- a/vision_agent/tools/meta_tools.py
+++ b/vision_agent/tools/meta_tools.py
@@ -1,25 +1,13 @@
 import os
 import subprocess
-from uuid import UUID
 from pathlib import Path
 from typing import Any, Dict, List, Union
 
-import numpy as np
-
 import vision_agent as va
 from vision_agent.lmm.types import Message
-from vision_agent.tools.tool_utils import get_tool_documentation, send_inference_request
+from vision_agent.tools.tool_utils import get_tool_documentation
 from vision_agent.tools.tools import TOOL_DESCRIPTIONS
-from vision_agent.utils.image_utils import convert_to_b64, normalize_bbox
-from vision_agent.clients.landing_public_api import LandingPublicAPI
-from vision_agent.tools.meta_tools_types import (
-    BboxInput,
-    BboxInputBase64,
-    PromptTask,
-    Florencev2FtRequest,
-    FineTuning,
-    JobStatus,
-)
+
 
 # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
 
@@ -398,142 +386,6 @@ def get_tool_descriptions() -> str:
     return TOOL_DESCRIPTIONS
 
 
-def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
-    """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
-    to detect objects in an image based on a given dataset. It returns the fine
-    tuning job id.
-
-    Parameters:
-        bboxes (List[BboxInput]): A list of BboxInput containing the
-            image path, labels and bounding boxes.
-        task (PromptTask): The florencev2 fine-tuning task. The options are
-            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
-
-    Returns:
-        UUID: The fine tuning job id, this id will used to retrieve the fine
-            tuned model.
-
-    Example
-    -------
-        >>> fine_tuning_job_id = florencev2_fine_tuning(
-            [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
-             {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
-             "OBJECT_DETECTION"
-        )
-    """
-    bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
-    task_input = PromptTask[task]
-    fine_tuning_request = [
-        BboxInputBase64(
-            image=convert_to_b64(bbox_input.image_path),
-            filename=bbox_input.image_path.split("/")[-1],
-            labels=bbox_input.labels,
-            bboxes=bbox_input.bboxes,
-        )
-        for bbox_input in bboxes_input
-    ]
-    landing_api = LandingPublicAPI()
-    return landing_api.launch_fine_tuning_job(
-        "florencev2", task_input, fine_tuning_request
-    )
-
-
-def check_if_fine_tuned_florencev2_is_ready(model_id: UUID) -> bool:
-    """'check_if_fine_tuned_florencev2_is_ready' is a tool that checks whether
-    is possible to use a certain florencev2 model. It checks if the status
-    is SUCCEEDED.
-
-    Parameters:
-        model_id (UUID): The fine-tuned model id.
-
-    Returns:
-        bool: The indication if the model is ready to be used or not. If this
-        is False, it's recommended to wait 5 seconds before checking again.
-
-    Example
-    -------
-        >>> check_if_fine_tuned_florencev2_is_ready(UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83"))
-        True
-    """
-    # check if job succeeded first
-    landing_api = LandingPublicAPI()
-    status = landing_api.check_fine_tuning_job(model_id)
-    return status is JobStatus.SUCCEEDED
-
-
-def florencev2_fine_tuned_object_detection(
-    image: np.ndarray, prompt: str, model_id: UUID, task: str, model_is_ready: bool
-) -> List[Dict[str, Any]]:
-    """'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model
-    to detect objects given a text prompt such as a phrase or class names separated by
-    commas. It returns a list of detected objects as labels and their location as
-    bounding boxes with score of 1.0.
-
-    Parameters:
-        image (np.ndarray): The image to used to detect objects.
-        prompt (str): The prompt to help find objects in the image.
-        model_id (UUID): The fine-tuned model id.
-        task (PromptTask): The florencev2 fine-tuning task. The options are
-            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
-        model_is_ready (bool): If the model is ready to be used. It's recommended
-            to get this value from the function check_if_fine_tuned_florencev2_is_ready.
-
-    Returns:
-        List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
-            bounding box of the detected objects with normalized coordinates between 0
-            and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
-            top-left and xmax and ymax are the coordinates of the bottom-right of the
-            bounding box. The scores are always 1.0 and cannot be thresholded
-
-    Example
-    -------
-        >>> florencev2_fine_tuned_object_detection(
-            image,
-            'person looking at a coyote',
-            UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83"),
-            model_is_ready=check_if_fine_tuned_florencev2_is_ready(
-                UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
-            )
-        )
-        [
-            {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
-            {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
-        ]
-    """
-    if not model_is_ready:
-        return []
-
-    task = PromptTask[task]
-    if task is PromptTask.OBJECT_DETECTION:
-        prompt = ""
-
-    data_obj = Florencev2FtRequest(
-        image=convert_to_b64(image),
-        task=task,
-        tool="florencev2_fine_tuning",
-        prompt=prompt,
-        fine_tuning=FineTuning(job_id=model_id),
-    )
-    data = data_obj.model_dump(by_alias=True)
-    metadata_payload = {"function_name": "florencev2_fine_tuned_object_detection"}
-    detections = send_inference_request(
-        data, "tools", v2=False, metadata_payload=metadata_payload
-    )
-
-    detections = detections[task.value]
-    return_data = []
-    image_size = image.shape[:2]
-    for i in range(len(detections["bboxes"])):
-        return_data.append(
-            {
-                "score": 1.0,
-                "label": detections["labels"][i],
-                "bbox": normalize_bbox(detections["bboxes"][i], image_size),
-            }
-        )
-    return return_data
-
-
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
@@ -547,8 +399,5 @@ def florencev2_fine_tuned_object_detection(
         search_dir,
         search_file,
         find_file,
-        florencev2_fine_tuning,
-        florencev2_fine_tuned_object_detection,
-        check_if_fine_tuned_florencev2_is_ready,
     ]
 )
diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py
index 0254a455..52f3c6d9 100644
--- a/vision_agent/tools/tools.py
+++ b/vision_agent/tools/tools.py
@@ -2,6 +2,7 @@
 import json
 import logging
 import tempfile
+from uuid import UUID
 from pathlib import Path
 from importlib import resources
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
@@ -31,6 +32,15 @@
     convert_quad_box_to_bbox,
     rle_decode,
 )
+from vision_agent.tools.tools_types import (
+    BboxInput,
+    BboxInputBase64,
+    PromptTask,
+    Florencev2FtRequest,
+    FineTuning,
+    JobStatus,
+)
+from vision_agent.clients.landing_public_api import LandingPublicAPI
 
 register_heif_opener()
 
@@ -1285,6 +1295,143 @@ def overlay_heat_map(
     return np.array(combined)
 
 
+# TODO: add this function to the imports so that is picked in the agent
+def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
+    """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
+    to detect objects in an image based on a given dataset. It returns the fine
+    tuning job id.
+
+    Parameters:
+        bboxes (List[BboxInput]): A list of BboxInput containing the
+            image path, labels and bounding boxes.
+        task (PromptTask): The florencev2 fine-tuning task. The options are
+            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
+
+    Returns:
+        UUID: The fine tuning job id, this id will used to retrieve the fine
+            tuned model.
+
+    Example
+    -------
+        >>> fine_tuning_job_id = florencev2_fine_tuning(
+            [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
+             {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
+             "OBJECT_DETECTION"
+        )
+    """
+    bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
+    task_input = PromptTask[task]
+    fine_tuning_request = [
+        BboxInputBase64(
+            image=convert_to_b64(bbox_input.image_path),
+            filename=bbox_input.image_path.split("/")[-1],
+            labels=bbox_input.labels,
+            bboxes=bbox_input.bboxes,
+        )
+        for bbox_input in bboxes_input
+    ]
+    landing_api = LandingPublicAPI()
+    return landing_api.launch_fine_tuning_job(
+        "florencev2", task_input, fine_tuning_request
+    )
+
+
+def check_if_fine_tuned_florencev2_is_ready(model_id: UUID) -> bool:
+    """'check_if_fine_tuned_florencev2_is_ready' is a tool that checks whether
+    is possible to use a certain florencev2 model. It checks if the status
+    is SUCCEEDED.
+
+    Parameters:
+        model_id (UUID): The fine-tuned model id.
+
+    Returns:
+        bool: The indication if the model is ready to be used or not. If this
+        is False, it's recommended to wait 5 seconds before checking again.
+
+    Example
+    -------
+        >>> check_if_fine_tuned_florencev2_is_ready(UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83"))
+        True
+    """
+    # check if job succeeded first
+    landing_api = LandingPublicAPI()
+    status = landing_api.check_fine_tuning_job(model_id)
+    return status is JobStatus.SUCCEEDED
+
+
+def florencev2_fine_tuned_object_detection(
+    image: np.ndarray, prompt: str, model_id: UUID, task: str, model_is_ready: bool
+) -> List[Dict[str, Any]]:
+    """'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model
+    to detect objects given a text prompt such as a phrase or class names separated by
+    commas. It returns a list of detected objects as labels and their location as
+    bounding boxes with score of 1.0.
+
+    Parameters:
+        image (np.ndarray): The image to used to detect objects.
+        prompt (str): The prompt to help find objects in the image.
+        model_id (UUID): The fine-tuned model id.
+        task (PromptTask): The florencev2 fine-tuning task. The options are
+            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
+        model_is_ready (bool): If the model is ready to be used. It's recommended
+            to get this value from the function check_if_fine_tuned_florencev2_is_ready.
+
+    Returns:
+        List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
+            bounding box of the detected objects with normalized coordinates between 0
+            and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
+            top-left and xmax and ymax are the coordinates of the bottom-right of the
+            bounding box. The scores are always 1.0 and cannot be thresholded
+
+    Example
+    -------
+        >>> florencev2_fine_tuned_object_detection(
+            image,
+            'person looking at a coyote',
+            UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83"),
+            model_is_ready=check_if_fine_tuned_florencev2_is_ready(
+                UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
+            )
+        )
+        [
+            {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
+            {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
+        ]
+    """
+    if not model_is_ready:
+        return []
+
+    task = PromptTask[task]
+    if task is PromptTask.OBJECT_DETECTION:
+        prompt = ""
+
+    data_obj = Florencev2FtRequest(
+        image=convert_to_b64(image),
+        task=task,
+        tool="florencev2_fine_tuning",
+        prompt=prompt,
+        fine_tuning=FineTuning(job_id=model_id),
+    )
+    data = data_obj.model_dump(by_alias=True)
+    metadata_payload = {"function_name": "florencev2_fine_tuned_object_detection"}
+    detections = send_inference_request(
+        data, "tools", v2=False, metadata_payload=metadata_payload
+    )
+
+    detections = detections[task.value]
+    return_data = []
+    image_size = image.shape[:2]
+    for i in range(len(detections["bboxes"])):
+        return_data.append(
+            {
+                "score": 1.0,
+                "label": detections["labels"][i],
+                "bbox": normalize_bbox(detections["bboxes"][i], image_size),
+            }
+        )
+    return return_data
+
+
 TOOLS = [
     owl_v2,
     grounding_sam,
diff --git a/vision_agent/tools/meta_tools_types.py b/vision_agent/tools/tools_types.py
similarity index 100%
rename from vision_agent/tools/meta_tools_types.py
rename to vision_agent/tools/tools_types.py

From f88cd6c236b0c957349eaa54cccaef3987ef7dd4 Mon Sep 17 00:00:00 2001
From: Dayanne Fernandes <dayannefernandesc@gmail.com>
Date: Fri, 23 Aug 2024 18:29:19 -0300
Subject: [PATCH 5/7] raise exception when model is not ready

---
 vision_agent/tools/tools.py      | 40 +++++++-------------------------
 vision_agent/utils/exceptions.py |  7 ++++++
 2 files changed, 15 insertions(+), 32 deletions(-)

diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py
index 52f3c6d9..6cb697a3 100644
--- a/vision_agent/tools/tools.py
+++ b/vision_agent/tools/tools.py
@@ -21,6 +21,7 @@
     get_tool_documentation,
     get_tools_df,
 )
+from vision_agent.utils.exceptions import FineTuneModelIsNotReady
 from vision_agent.utils import extract_frames_from_video
 from vision_agent.utils.execute import FileSerializer, MimeType
 from vision_agent.utils.image_utils import (
@@ -1336,31 +1337,8 @@ def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
     )
 
 
-def check_if_fine_tuned_florencev2_is_ready(model_id: UUID) -> bool:
-    """'check_if_fine_tuned_florencev2_is_ready' is a tool that checks whether
-    is possible to use a certain florencev2 model. It checks if the status
-    is SUCCEEDED.
-
-    Parameters:
-        model_id (UUID): The fine-tuned model id.
-
-    Returns:
-        bool: The indication if the model is ready to be used or not. If this
-        is False, it's recommended to wait 5 seconds before checking again.
-
-    Example
-    -------
-        >>> check_if_fine_tuned_florencev2_is_ready(UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83"))
-        True
-    """
-    # check if job succeeded first
-    landing_api = LandingPublicAPI()
-    status = landing_api.check_fine_tuning_job(model_id)
-    return status is JobStatus.SUCCEEDED
-
-
 def florencev2_fine_tuned_object_detection(
-    image: np.ndarray, prompt: str, model_id: UUID, task: str, model_is_ready: bool
+    image: np.ndarray, prompt: str, model_id: UUID, task: str
 ) -> List[Dict[str, Any]]:
     """'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model
     to detect objects given a text prompt such as a phrase or class names separated by
@@ -1373,8 +1351,6 @@ def florencev2_fine_tuned_object_detection(
         model_id (UUID): The fine-tuned model id.
         task (PromptTask): The florencev2 fine-tuning task. The options are
             CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
-        model_is_ready (bool): If the model is ready to be used. It's recommended
-            to get this value from the function check_if_fine_tuned_florencev2_is_ready.
 
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -1388,18 +1364,18 @@ def florencev2_fine_tuned_object_detection(
         >>> florencev2_fine_tuned_object_detection(
             image,
             'person looking at a coyote',
-            UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83"),
-            model_is_ready=check_if_fine_tuned_florencev2_is_ready(
-                UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
-            )
+            UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
         )
         [
             {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
             {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
         ]
     """
-    if not model_is_ready:
-        return []
+    # check if job succeeded first
+    landing_api = LandingPublicAPI()
+    status = landing_api.check_fine_tuning_job(model_id)
+    if status is not JobStatus.SUCCEEDED:
+        raise FineTuneModelIsNotReady()
 
     task = PromptTask[task]
     if task is PromptTask.OBJECT_DETECTION:
diff --git a/vision_agent/utils/exceptions.py b/vision_agent/utils/exceptions.py
index 41f81dad..ce2066b2 100644
--- a/vision_agent/utils/exceptions.py
+++ b/vision_agent/utils/exceptions.py
@@ -49,3 +49,10 @@ class RemoteSandboxClosedError(RemoteSandboxError):
     """
 
     is_retryable = True
+
+
+class FineTuneModelIsNotReady(Exception):
+    """Exception raised when the fine-tune model is not ready.
+    If this is raised, it's recommended to wait 5 seconds before trying to use
+    the model again.
+    """

From 653bfd210eb634168b859e10a914b853887c8a2a Mon Sep 17 00:00:00 2001
From: Dayanne Fernandes <dayannefernandesc@gmail.com>
Date: Fri, 23 Aug 2024 19:29:58 -0300
Subject: [PATCH 6/7] comment

---
 vision_agent/tools/tools.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py
index 6cb697a3..6d20f4f9 100644
--- a/vision_agent/tools/tools.py
+++ b/vision_agent/tools/tools.py
@@ -1337,6 +1337,7 @@ def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
     )
 
 
+# TODO: add this function to the imports so that is picked in the agent
 def florencev2_fine_tuned_object_detection(
     image: np.ndarray, prompt: str, model_id: UUID, task: str
 ) -> List[Dict[str, Any]]:

From 14126f7269c8635464acbc7af25181e08f5716c2 Mon Sep 17 00:00:00 2001
From: Dayanne Fernandes <dayannefernandesc@gmail.com>
Date: Fri, 23 Aug 2024 21:57:43 -0300
Subject: [PATCH 7/7] handle exceptions

---
 vision_agent/clients/http.py               |  7 ++-----
 vision_agent/clients/landing_public_api.py | 10 +++++++++-
 vision_agent/utils/exceptions.py           |  6 ++++++
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/vision_agent/clients/http.py b/vision_agent/clients/http.py
index dc969595..fd6b3e32 100644
--- a/vision_agent/clients/http.py
+++ b/vision_agent/clients/http.py
@@ -4,7 +4,6 @@
 
 from requests import Session
 from requests.adapters import HTTPAdapter
-from requests.exceptions import ConnectionError, RequestException, Timeout
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -38,11 +37,10 @@ def post(self, url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
             response.raise_for_status()
             result: Dict[str, Any] = response.json()
             _LOGGER.info(json.dumps(result))
-        except (ConnectionError, Timeout, RequestException) as err:
-            _LOGGER.warning(f"Error: {err}.")
         except json.JSONDecodeError:
             resp_text = response.text
             _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
+            raise
         return result
 
     def get(self, url: str) -> Dict[str, Any]:
@@ -53,9 +51,8 @@ def get(self, url: str) -> Dict[str, Any]:
             response.raise_for_status()
             result: Dict[str, Any] = response.json()
             _LOGGER.info(json.dumps(result))
-        except (ConnectionError, Timeout, RequestException) as err:
-            _LOGGER.warning(f"Error: {err}.")
         except json.JSONDecodeError:
             resp_text = response.text
             _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
+            raise
         return result
diff --git a/vision_agent/clients/landing_public_api.py b/vision_agent/clients/landing_public_api.py
index f9d52389..3fd1928e 100644
--- a/vision_agent/clients/landing_public_api.py
+++ b/vision_agent/clients/landing_public_api.py
@@ -2,8 +2,11 @@
 from uuid import UUID
 from typing import List
 
+from requests.exceptions import HTTPError
+
 from vision_agent.clients.http import BaseHTTP
 from vision_agent.utils.type_defs import LandingaiAPIKey
+from vision_agent.utils.exceptions import FineTuneModelNotFound
 from vision_agent.tools.tools_types import BboxInputBase64, PromptTask, JobStatus
 
 
@@ -27,4 +30,9 @@ def launch_fine_tuning_job(
 
     def check_fine_tuning_job(self, job_id: UUID) -> JobStatus:
         url = f"v1/agent/jobs/fine-tuning/{job_id}/status"
-        return JobStatus(self.get(url)["status"])
+        try:
+            get_job = self.get(url)
+        except HTTPError as err:
+            if err.response.status_code == 404:
+                raise FineTuneModelNotFound()
+        return JobStatus(get_job["status"])
diff --git a/vision_agent/utils/exceptions.py b/vision_agent/utils/exceptions.py
index ce2066b2..22def208 100644
--- a/vision_agent/utils/exceptions.py
+++ b/vision_agent/utils/exceptions.py
@@ -56,3 +56,9 @@ class FineTuneModelIsNotReady(Exception):
     If this is raised, it's recommended to wait 5 seconds before trying to use
     the model again.
     """
+
+
+class FineTuneModelNotFound(Exception):
+    """Exception raised when the fine-tune model is not found.
+    If this is raised, it's recommended to try another model id.
+    """