From 0b1c88665bb3787d973946e96a26b366e22632ea Mon Sep 17 00:00:00 2001 From: Dayanne Fernandes Date: Tue, 1 Oct 2024 18:20:52 -0300 Subject: [PATCH] linter --- vision_agent/tools/tools.py | 2 +- vision_agent/tools/tools_types.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index ff360d87..7faa123a 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -1221,7 +1221,7 @@ def florence2_phrase_grounding_image( def florence2_phrase_grounding_video( prompt: str, frames: List[np.ndarray], fine_tune_id: Optional[str] = None -) -> List[Dict[str, Any]]: +) -> List[List[Dict[str, Any]]]: """'florence2_phrase_grounding_video' will run florence2 on each frame of a video. It can detect multiple objects given a text prompt which can be object names or caption. You can optionally separate the object names in the text with commas. diff --git a/vision_agent/tools/tools_types.py b/vision_agent/tools/tools_types.py index 4b24aabb..1cc765b6 100644 --- a/vision_agent/tools/tools_types.py +++ b/vision_agent/tools/tools_types.py @@ -27,8 +27,8 @@ class PromptTask(str, Enum): class Florence2FtRequest(BaseModel): model_config = ConfigDict(populate_by_name=True) - image: str | None - video: bytes | None + image: Optional[str] = None + video: Optional[bytes] = None task: PromptTask prompt: Optional[str] = "" chunk_length_frames: Optional[int] = None