Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize mask distance #111

Merged
merged 3 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import skimage as ski
import numpy as np

from vision_agent.tools import (
clip,
Expand All @@ -9,6 +10,7 @@
ocr,
visual_prompt_counting,
zero_shot_counting,
closest_mask_distance,
)


Expand Down Expand Up @@ -82,3 +84,22 @@ def test_ocr() -> None:
image=img,
)
assert any("Region-based segmentation" in res["label"] for res in result)


def test_mask_distance():
# Create two binary masks
mask1 = np.zeros((100, 100), dtype=np.uint8)
mask1[:10, :10] = 1 # Top left
mask2 = np.zeros((100, 100), dtype=np.uint8)
mask2[-10:, -10:] = 1 # Bottom right

# Calculate the distance between the masks
distance = closest_mask_distance(mask1, mask2)
print(f"Distance between the masks: {distance}")

# Check the result
assert np.isclose(
distance,
np.sqrt(2) * 81,
atol=1e-2,
), f"Expected {np.sqrt(2) * 81}, got {distance}"
39 changes: 34 additions & 5 deletions vision_agent/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from pathlib import Path
from typing import Any, Callable, Dict, List, Tuple, Union, cast

import cv2
import numpy as np
import pandas as pd
import requests
from PIL import Image, ImageDraw, ImageFont
from scipy.spatial import distance # type: ignore

from vision_agent.tools.tool_utils import _send_inference_request
from vision_agent.utils import extract_frames_from_video
Expand Down Expand Up @@ -421,10 +421,39 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:

mask1 = np.clip(mask1, 0, 1)
mask2 = np.clip(mask2, 0, 1)
mask1_points = np.transpose(np.nonzero(mask1))
mask2_points = np.transpose(np.nonzero(mask2))
dist_matrix = distance.cdist(mask1_points, mask2_points, "euclidean")
return cast(float, np.min(dist_matrix))
contours1, _ = cv2.findContours(mask1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours2, _ = cv2.findContours(mask2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
largest_contour1 = max(contours1, key=cv2.contourArea)
largest_contour2 = max(contours2, key=cv2.contourArea)
polygon1 = cv2.approxPolyDP(largest_contour1, 1.0, True)
polygon2 = cv2.approxPolyDP(largest_contour2, 1.0, True)
min_distance = np.inf

small_polygon, larger_contour = (
(polygon1, largest_contour2)
if len(largest_contour1) < len(largest_contour2)
else (polygon2, largest_contour1)
)

# For each point in the first polygon
for point in small_polygon:
# Calculate the distance to the second polygon, -1 is to invert result as point inside the polygon is positive

distance = (
cv2.pointPolygonTest(
larger_contour, (point[0, 0].item(), point[0, 1].item()), True
)
* -1
)

# If the distance is negative, the point is inside the polygon, so the distance is 0
if distance < 0:
continue
else:
# Update the minimum distance if the point is outside the polygon
min_distance = min(min_distance, distance)

return min_distance if min_distance != np.inf else 0.0


def closest_box_distance(
Expand Down
Loading