From c19f1458619c7f55079ed047d6a29b38f2165138 Mon Sep 17 00:00:00 2001 From: Shankar <90070882+shankar-vision-eng@users.noreply.github.com> Date: Tue, 4 Jun 2024 15:44:27 -0700 Subject: [PATCH] Optimize mask distance (#111) * adding new algorithm for mask calculation * added test case * fixed flake8 error --- tests/test_tools.py | 21 ++++++++++++++++++++ vision_agent/tools/tools.py | 39 ++++++++++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/tests/test_tools.py b/tests/test_tools.py index e5ebe4f3..410ae561 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -1,4 +1,5 @@ import skimage as ski +import numpy as np from vision_agent.tools import ( clip, @@ -9,6 +10,7 @@ ocr, visual_prompt_counting, zero_shot_counting, + closest_mask_distance, ) @@ -82,3 +84,22 @@ def test_ocr() -> None: image=img, ) assert any("Region-based segmentation" in res["label"] for res in result) + + +def test_mask_distance(): + # Create two binary masks + mask1 = np.zeros((100, 100), dtype=np.uint8) + mask1[:10, :10] = 1 # Top left + mask2 = np.zeros((100, 100), dtype=np.uint8) + mask2[-10:, -10:] = 1 # Bottom right + + # Calculate the distance between the masks + distance = closest_mask_distance(mask1, mask2) + print(f"Distance between the masks: {distance}") + + # Check the result + assert np.isclose( + distance, + np.sqrt(2) * 81, + atol=1e-2, + ), f"Expected {np.sqrt(2) * 81}, got {distance}" diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index 9f44e536..3a5c0c51 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -7,11 +7,11 @@ from pathlib import Path from typing import Any, Callable, Dict, List, Tuple, Union, cast +import cv2 import numpy as np import pandas as pd import requests from PIL import Image, ImageDraw, ImageFont -from scipy.spatial import distance # type: ignore from vision_agent.tools.tool_utils import _send_inference_request from vision_agent.utils import extract_frames_from_video @@ -421,10 +421,39 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float: mask1 = np.clip(mask1, 0, 1) mask2 = np.clip(mask2, 0, 1) - mask1_points = np.transpose(np.nonzero(mask1)) - mask2_points = np.transpose(np.nonzero(mask2)) - dist_matrix = distance.cdist(mask1_points, mask2_points, "euclidean") - return cast(float, np.min(dist_matrix)) + contours1, _ = cv2.findContours(mask1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + contours2, _ = cv2.findContours(mask2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + largest_contour1 = max(contours1, key=cv2.contourArea) + largest_contour2 = max(contours2, key=cv2.contourArea) + polygon1 = cv2.approxPolyDP(largest_contour1, 1.0, True) + polygon2 = cv2.approxPolyDP(largest_contour2, 1.0, True) + min_distance = np.inf + + small_polygon, larger_contour = ( + (polygon1, largest_contour2) + if len(largest_contour1) < len(largest_contour2) + else (polygon2, largest_contour1) + ) + + # For each point in the first polygon + for point in small_polygon: + # Calculate the distance to the second polygon, -1 is to invert result as point inside the polygon is positive + + distance = ( + cv2.pointPolygonTest( + larger_contour, (point[0, 0].item(), point[0, 1].item()), True + ) + * -1 + ) + + # If the distance is negative, the point is inside the polygon, so the distance is 0 + if distance < 0: + continue + else: + # Update the minimum distance if the point is outside the polygon + min_distance = min(min_distance, distance) + + return min_distance if min_distance != np.inf else 0.0 def closest_box_distance(