From 98482e9a45b4a02ee4370021333a74826f47f026 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Wed, 27 Mar 2024 13:27:34 -0700 Subject: [PATCH] added iou tools --- tests/test_tools.py | 26 ++++++++++++++++++ vision_agent/tools/tools.py | 55 ++++++++++++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 tests/test_tools.py diff --git a/tests/test_tools.py b/tests/test_tools.py new file mode 100644 index 00000000..00f85072 --- /dev/null +++ b/tests/test_tools.py @@ -0,0 +1,26 @@ +import os +import tempfile + +import numpy as np +from PIL import Image + +from vision_agent.tools.tools import BboxIoU, SegIoU + + +def test_bbox_iou(): + bbox1 = [0, 0, 0.75, 0.75] + bbox2 = [0.25, 0.25, 1, 1] + assert BboxIoU()(bbox1, bbox2) == 0.29 + + +def test_seg_iou(): + mask1 = np.zeros((10, 10), dtype=np.uint8) + mask1[2:4, 2:4] = 255 + mask2 = np.zeros((10, 10), dtype=np.uint8) + mask2[3:5, 3:5] = 255 + with tempfile.TemporaryDirectory() as tmpdir: + mask1_path = os.path.join(tmpdir, "mask1.png") + mask2_path = os.path.join(tmpdir, "mask2.png") + Image.fromarray(mask1).save(mask1_path) + Image.fromarray(mask2).save(mask2_path) + assert SegIoU()(mask1_path, mask2_path) == 0.14 diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index 6e55f210..b543fa9d 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -388,7 +388,7 @@ class BboxArea(Tool): "required_parameters": [{"name": "bbox", "type": "List[int]"}], "examples": [ { - "scenario": "If you want to calculate the area of the bounding box [0, 0, 100, 100]", + "scenario": "If you want to calculate the area of the bounding box [0.2, 0.21, 0.34, 0.42]", "parameters": {"bboxes": [0.2, 0.21, 0.34, 0.42]}, } ], @@ -430,6 +430,57 @@ def __call__(self, masks: Union[str, Path]) -> float: return cast(float, round(np.sum(np_mask) / 255, 2)) +class BboxIoU(Tool): + name = "bbox_iou_" + description = "'bbox_iou_' returns the intersection over union of two bounding boxes." + usage = { + "required_parameters": [{"name": "bbox1", "type": "List[int]"}, {"name": "bbox2", "type": "List[int]"}], + "examples": [ + { + "scenario": "If you want to calculate the intersection over union of the bounding boxes [0.2, 0.21, 0.34, 0.42] and [0.3, 0.31, 0.44, 0.52]", + "parameters": {"bbox1": [0.2, 0.21, 0.34, 0.42], "bbox2": [0.3, 0.31, 0.44, 0.52]}, + } + ] + } + + def __call__(self, bbox1: List[int], bbox2: List[int]) -> float: + x1, y1, x2, y2 = bbox1 + x3, y3, x4, y4 = bbox2 + xA = max(x1, x3) + yA = max(y1, y3) + xB = min(x2, x4) + yB = min(y2, y4) + inter_area = max(0, xB - xA) * max(0, yB - yA) + boxa_area = (x2 - x1) * (y2 - y1) + boxb_area = (x4 - x3) * (y4 - y3) + iou = inter_area / float(boxa_area + boxb_area - inter_area) + return round(iou, 2) + + +class SegIoU(Tool): + name = "seg_iou_" + description = "'seg_iou_' returns the intersection over union of two segmentation masks." + usage = { + "required_parameters": [{"name": "mask1", "type": "str"}, {"name": "mask2", "type": "str"}], + "examples": [ + { + "scenario": "If you want to calculate the intersection over union of the segmentation masks for mask1.png and mask2.png", + "parameters": {"mask1": "mask1.png", "mask2": "mask2.png"}, + } + ], + } + + def __call__(self, mask1: Union[str, Path], mask2: Union[str, Path]) -> float: + pil_mask1 = Image.open(str(mask1)) + pil_mask2 = Image.open(str(mask2)) + np_mask1 = np.clip(np.array(pil_mask1), 0, 1) + np_mask2 = np.clip(np.array(pil_mask2), 0, 1) + intersection = np.logical_and(np_mask1, np_mask2) + union = np.logical_or(np_mask1, np_mask2) + iou = np.sum(intersection) / np.sum(union) + return round(iou, 2) + + class Add(Tool): r"""Add returns the sum of all the arguments passed to it, normalized to 2 decimal places.""" @@ -558,6 +609,8 @@ def __call__(self, video_uri: str) -> list[tuple[str, float]]: Crop, BboxArea, SegArea, + BboxIoU, + SegIoU, Add, Subtract, Multiply,