diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py index caba0533..d02ee0a9 100644 --- a/vision_agent/agent/vision_agent.py +++ b/vision_agent/agent/vision_agent.py @@ -2,7 +2,6 @@ import logging import sys import tempfile -from os import walk from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple, Union @@ -349,7 +348,7 @@ def parse_reflect(reflect: str) -> bool: def visualize_result(all_tool_results: List[Dict]) -> List[str]: - image_to_data = {} + image_to_data: Dict[str, Dict] = {} for tool_result in all_tool_results: if not tool_result["tool_name"] in ["grounding_sam_", "grounding_dino_"]: continue @@ -373,7 +372,7 @@ def visualize_result(all_tool_results: List[Dict]) -> List[str]: # calls can fail, so we need to check if the call was successful if not isinstance(call_result, dict): continue - if not "bboxes" in call_result: + if "bboxes" not in call_result: continue # if the call was successful, then we can add the image data diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index 55bbd477..f13c14dd 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -488,7 +488,7 @@ def __call__(self, mask1: Union[str, Path], mask2: Union[str, Path]) -> float: intersection = np.logical_and(np_mask1, np_mask2) union = np.logical_or(np_mask1, np_mask2) iou = np.sum(intersection) / np.sum(union) - return round(iou, 2) + return cast(float, round(iou, 2)) class ExtractFrames(Tool):