From 0b1c88665bb3787d973946e96a26b366e22632ea Mon Sep 17 00:00:00 2001
From: Dayanne Fernandes <dayannefernandesc@gmail.com>
Date: Tue, 1 Oct 2024 18:20:52 -0300
Subject: [PATCH] linter

---
 vision_agent/tools/tools.py       | 2 +-
 vision_agent/tools/tools_types.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py
index ff360d87..7faa123a 100644
--- a/vision_agent/tools/tools.py
+++ b/vision_agent/tools/tools.py
@@ -1221,7 +1221,7 @@ def florence2_phrase_grounding_image(
 
 def florence2_phrase_grounding_video(
     prompt: str, frames: List[np.ndarray], fine_tune_id: Optional[str] = None
-) -> List[Dict[str, Any]]:
+) -> List[List[Dict[str, Any]]]:
     """'florence2_phrase_grounding_video' will run florence2 on each frame of a video.
     It can detect multiple objects given a text prompt which can be object names or
     caption. You can optionally separate the object names in the text with commas.
diff --git a/vision_agent/tools/tools_types.py b/vision_agent/tools/tools_types.py
index 4b24aabb..1cc765b6 100644
--- a/vision_agent/tools/tools_types.py
+++ b/vision_agent/tools/tools_types.py
@@ -27,8 +27,8 @@ class PromptTask(str, Enum):
 class Florence2FtRequest(BaseModel):
     model_config = ConfigDict(populate_by_name=True)
 
-    image: str | None
-    video: bytes | None
+    image: Optional[str] = None
+    video: Optional[bytes] = None
     task: PromptTask
     prompt: Optional[str] = ""
     chunk_length_frames: Optional[int] = None