diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py index 73d7a020..4a82436d 100644 --- a/vision_agent/tools/meta_tools.py +++ b/vision_agent/tools/meta_tools.py @@ -2,16 +2,11 @@ import subprocess from pathlib import Path from typing import Any, Dict, List, Union -from uuid import UUID import vision_agent as va -from vision_agent.clients.landing_public_api import LandingPublicAPI from vision_agent.lmm.types import Message -from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask from vision_agent.tools.tool_utils import get_tool_documentation from vision_agent.tools.tools import TOOL_DESCRIPTIONS -from vision_agent.utils.image_utils import convert_to_b64 - # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index e4e8262c..2dade7f7 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -2,11 +2,10 @@ import json import logging import tempfile -from uuid import UUID -from pathlib import Path from importlib import resources from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union, cast +from uuid import UUID import cv2 import numpy as np @@ -16,15 +15,24 @@ from pillow_heif import register_heif_opener # type: ignore from pytube import YouTube # type: ignore +from vision_agent.clients.landing_public_api import LandingPublicAPI from vision_agent.tools.tool_utils import ( get_tool_descriptions, get_tool_documentation, get_tools_df, - send_inference_request, get_tools_info, + send_inference_request, +) +from vision_agent.tools.tools_types import ( + BboxInput, + BboxInputBase64, + FineTuning, + Florencev2FtRequest, + JobStatus, + PromptTask, ) -from vision_agent.utils.exceptions import FineTuneModelIsNotReady from vision_agent.utils import extract_frames_from_video +from vision_agent.utils.exceptions import FineTuneModelIsNotReady from vision_agent.utils.execute import FileSerializer, MimeType from vision_agent.utils.image_utils import ( b64_to_pil, @@ -38,15 +46,6 @@ rle_decode, rle_decode_array, ) -from vision_agent.tools.tools_types import ( - BboxInput, - BboxInputBase64, - PromptTask, - Florencev2FtRequest, - FineTuning, - JobStatus, -) -from vision_agent.clients.landing_public_api import LandingPublicAPI register_heif_opener()