From bbed08925aa278b323dbe030e07957a24a5856f1 Mon Sep 17 00:00:00 2001
From: Alexander Berger <alexander.berger@jax.org>
Date: Thu, 10 Jul 2025 14:36:20 -0400
Subject: [PATCH] Rewriting VideoObservations.stitch_greedy_tracklets method
 and adding unit and benchmark tests

---
 src/mouse_tracking/utils/matching.py          | 146 +++++-
 tests/utils/matching/__init__.py              |   1 +
 .../matching/video_observations/__init__.py   |   1 +
 .../matching/video_observations/conftest.py   | 362 +++++++++++++
 .../test_benchmark_stich_greedy_tracklets.py  | 295 +++++++++++
 .../test_stitch_greedy_tracklets.py           | 483 ++++++++++++++++++
 6 files changed, 1270 insertions(+), 18 deletions(-)
 create mode 100644 tests/utils/matching/__init__.py
 create mode 100644 tests/utils/matching/video_observations/__init__.py
 create mode 100644 tests/utils/matching/video_observations/conftest.py
 create mode 100644 tests/utils/matching/video_observations/test_benchmark_stich_greedy_tracklets.py
 create mode 100644 tests/utils/matching/video_observations/test_stitch_greedy_tracklets.py

diff --git a/src/mouse_tracking/utils/matching.py b/src/mouse_tracking/utils/matching.py
index 685118c..60b5dea 100644
--- a/src/mouse_tracking/utils/matching.py
+++ b/src/mouse_tracking/utils/matching.py
@@ -1070,29 +1070,137 @@ def generate_greedy_tracklets(self, max_cost: float = -np.log(1e-3), rotate_pose
 		self._tracklet_gen_method = 'greedy'
 		self._make_tracklets()
 
-	def stitch_greedy_tracklets(self, num_tracks: int = None, all_embeds: bool = True, prioritize_long: bool = False):
-		"""Greedy method that links merges tracklets 1 at a time based on lowest cost.
 
-		Args:
-			num_tracks: number of tracks to produce
-			all_embeds: bool to include original tracklet centers as merges are made
-			prioritize_long: bool to adjust cost of linking with length of tracklets
-		"""
+	def stitch_greedy_tracklets(
+			self,
+			num_tracks: int | None = None,
+			all_embeds: bool = True,
+			prioritize_long: bool = False,
+	):
+		"""Optimized greedy method that links merges tracklets 1 at a time based on lowest cost.
+
+			Args:
+					num_tracks: number of tracks to produce
+					all_embeds: bool to include original tracklet centers as merges are made
+					prioritize_long: bool to adjust cost of linking with length of tracklets
+
+			Notes:
+					Optimized version eliminates O(n³) pandas DataFrame recreation bottleneck.
+					Uses numpy arrays and incremental cost matrix updates for O(n²) complexity.
+			"""
 		if num_tracks is None:
 			num_tracks = self._avg_observation
 
 		# copy original tracklet list, so that we can revert at the end
 		original_tracklets = self._tracklets
 
-		# We can use pandas to do slightly easier searching
-		current_costs = pd.DataFrame(self._get_transition_costs(all_embeds, True, longer_track_priority=float(prioritize_long)))
-		while not np.all(np.isinf(current_costs.to_numpy(na_value=np.inf))):
-			t1, t2 = np.unravel_index(np.argmin(current_costs.to_numpy(na_value=np.inf)), current_costs.shape)
-			tracklet_1 = current_costs.index[t1]
-			tracklet_2 = current_costs.columns[t2]
-			new_tracklet = Tracklet.from_tracklets([self._tracklets[tracklet_1], self._tracklets[tracklet_2]], True)
-			self._tracklets = [x for i, x in enumerate(self._tracklets) if i not in [tracklet_1, tracklet_2]] + [new_tracklet]
-			current_costs = pd.DataFrame(self._get_transition_costs(all_embeds, True, longer_track_priority=float(prioritize_long)))
+		# Early exit if no tracklets or only one tracklet
+		if len(self._tracklets) <= 1:
+			self._stitch_translation = {0: 0}
+			self._tracklets = original_tracklets
+			self._tracklet_stitch_method = "greedy"
+			return
+
+		# Get initial transition costs as dict and convert to numpy matrix
+		cost_dict = self._get_transition_costs(
+			all_embeds, True, longer_track_priority=float(prioritize_long)
+		)
+
+		# Build numpy cost matrix - work with a copy of tracklets for merging
+		working_tracklets = list(
+			self._tracklets
+		)  # Copy for modifications during merging
+		n_tracklets = len(working_tracklets)
+
+		# Initialize cost matrix with infinity
+		cost_matrix = np.full((n_tracklets, n_tracklets), np.inf, dtype=np.float64)
+
+		# Fill cost matrix from cost_dict
+		for i, costs_for_i in cost_dict.items():
+			for j, cost in costs_for_i.items():
+				cost_matrix[i, j] = cost
+				cost_matrix[j, i] = cost  # Matrix should be symmetric
+
+		# Track which tracklets are still active (not merged)
+		active_tracklets = set(range(n_tracklets))
+
+		# Main stitching loop - continues until no more valid merges
+		while len(active_tracklets) > 1:
+			# Find minimum cost among active tracklets
+			min_cost = np.inf
+			best_pair = None
+
+			for i in active_tracklets:
+				for j in active_tracklets:
+					if i < j and cost_matrix[i, j] < min_cost:
+						min_cost = cost_matrix[i, j]
+						best_pair = (i, j)
+
+			# If no finite cost found, break (no more valid merges)
+			if best_pair is None or np.isinf(min_cost):
+				break
+
+			tracklet_1_idx, tracklet_2_idx = best_pair
+
+			# Create new merged tracklet
+			new_tracklet = Tracklet.from_tracklets(
+				[working_tracklets[tracklet_1_idx], working_tracklets[tracklet_2_idx]],
+				True,
+			)
+
+			# Remove merged tracklets from active set
+			active_tracklets.remove(tracklet_1_idx)
+			active_tracklets.remove(tracklet_2_idx)
+
+			# Add new tracklet to working list and get its index
+			working_tracklets.append(new_tracklet)
+			new_tracklet_idx = len(working_tracklets) - 1
+			active_tracklets.add(new_tracklet_idx)
+
+			# Extend cost matrix for new tracklet if needed
+			if new_tracklet_idx >= cost_matrix.shape[0]:
+				# Extend matrix size
+				old_size = cost_matrix.shape[0]
+				new_size = max(old_size * 2, new_tracklet_idx + 1)
+				new_matrix = np.full((new_size, new_size), np.inf, dtype=np.float64)
+				new_matrix[:old_size, :old_size] = cost_matrix
+				cost_matrix = new_matrix
+
+			# Calculate costs for new tracklet with all remaining active tracklets
+			for other_idx in active_tracklets:
+				if other_idx != new_tracklet_idx and other_idx < len(working_tracklets):
+					# Calculate cost between new tracklet and existing tracklet
+					match_cost = new_tracklet.compare_to(
+						working_tracklets[other_idx], other_anchors=all_embeds
+					)
+
+					# Apply priority adjustment if enabled
+					if match_cost is not None and prioritize_long:
+						longer_track_length = 100  # Default from _get_transition_costs
+						sigmoid_length_new = 1 / (
+								1 + np.exp(longer_track_length - new_tracklet.n_frames)
+						)
+						sigmoid_length_other = 1 / (
+								1
+								+ np.exp(
+							longer_track_length
+							- working_tracklets[other_idx].n_frames
+						)
+						)
+						match_cost += (
+											  1 - sigmoid_length_new * sigmoid_length_other
+									  ) * float(prioritize_long)
+
+					# Update cost matrix
+					if match_cost is not None and not np.isinf(match_cost):
+						cost_matrix[new_tracklet_idx, other_idx] = match_cost
+						cost_matrix[other_idx, new_tracklet_idx] = match_cost
+					else:
+						cost_matrix[new_tracklet_idx, other_idx] = np.inf
+						cost_matrix[other_idx, new_tracklet_idx] = np.inf
+
+		# Update self._tracklets with the merged result for ID assignment
+		self._tracklets = [working_tracklets[i] for i in active_tracklets]
 
 		# Tracklets are formed. Now we should assign the longest ones IDs.
 		tracklet_lengths = [len(x.frames) for x in self._tracklets]
@@ -1102,9 +1210,11 @@ def stitch_greedy_tracklets(self, num_tracks: int = None, all_embeds: bool = Tru
 		for cur_assignment in assignment_order:
 			ids_to_assign = self._tracklets[cur_assignment].track_id
 			for cur_tracklet_id in ids_to_assign:
-				track_to_longterm_id[int(cur_tracklet_id + 1)] = current_id if current_id > 0 else 0
+				track_to_longterm_id[int(cur_tracklet_id + 1)] = (
+					current_id if current_id > 0 else 0
+				)
 			current_id -= 1
 
 		self._stitch_translation = track_to_longterm_id
 		self._tracklets = original_tracklets
-		self._tracklet_stitch_method = 'greedy'
+		self._tracklet_stitch_method = "greedy"
diff --git a/tests/utils/matching/__init__.py b/tests/utils/matching/__init__.py
new file mode 100644
index 0000000..822c2e4
--- /dev/null
+++ b/tests/utils/matching/__init__.py
@@ -0,0 +1 @@
+"""Tests for the matching utils module."""
diff --git a/tests/utils/matching/video_observations/__init__.py b/tests/utils/matching/video_observations/__init__.py
new file mode 100644
index 0000000..8333a3c
--- /dev/null
+++ b/tests/utils/matching/video_observations/__init__.py
@@ -0,0 +1 @@
+"""Tests for the VideoObservations class."""
diff --git a/tests/utils/matching/video_observations/conftest.py b/tests/utils/matching/video_observations/conftest.py
new file mode 100644
index 0000000..b816a49
--- /dev/null
+++ b/tests/utils/matching/video_observations/conftest.py
@@ -0,0 +1,362 @@
+"""Shared fixtures for VideoObservations testing.
+
+This module provides shared test fixtures and utilities for testing the VideoObservations
+class and its methods, particularly the stitch_greedy_tracklets functionality.
+"""
+
+import numpy as np
+import pytest
+
+from mouse_tracking.utils.matching import Detection, Tracklet, VideoObservations
+
+
+@pytest.fixture
+def basic_detection():
+    """Create a function that generates basic Detection objects with configurable parameters."""
+
+    def _create_detection(
+        frame_idx: int = 0,
+        pose_idx: int = 0,
+        embed_size: int = 128,
+        pose_shape: tuple = (12, 2),
+        seg_shape: tuple = (100, 2),
+        embed_value: float | None = None,
+        pose_coords: tuple | None = None,
+    ):
+        """Create a Detection with specified parameters.
+
+        Args:
+            frame_idx: Frame index for the detection
+            pose_idx: Pose index within the frame
+            embed_size: Size of the embedding vector
+            pose_shape: Shape of pose data
+            seg_shape: Shape of segmentation data
+            embed_value: Fixed value for embedding (random if None)
+            pose_coords: Fixed coordinates for pose center (random if None)
+
+        Returns:
+            Detection object with specified parameters
+        """
+        # Create pose data
+        if pose_coords is not None:
+            pose = np.zeros(pose_shape, dtype=np.float32)
+            center_x, center_y = pose_coords
+            # Create pose keypoints around the center
+            for i in range(pose_shape[0]):
+                pose[i] = [
+                    center_x + np.random.uniform(-10, 10),
+                    center_y + np.random.uniform(-10, 10),
+                ]
+        else:
+            pose = np.random.rand(*pose_shape) * 100
+
+        # Create embedding
+        if embed_value is not None:
+            embed = np.full(embed_size, embed_value, dtype=np.float32)
+        else:
+            embed = np.random.rand(embed_size).astype(np.float32)
+
+        # Create segmentation data
+        seg = np.random.randint(-1, 100, size=seg_shape, dtype=np.int32)
+
+        return Detection(
+            frame=frame_idx,
+            pose_idx=pose_idx,
+            pose=pose,
+            embed=embed,
+            seg_idx=pose_idx,
+            seg=seg,
+        )
+
+    return _create_detection
+
+
+@pytest.fixture
+def simple_tracklet(basic_detection):
+    """Create a simple tracklet with a few detections."""
+
+    def _create_tracklet(
+        track_id: int = 1,
+        frame_range: tuple = (0, 5),
+        pose_coords: tuple = (50, 50),
+        embed_value: float = 0.5,
+    ):
+        """Create a tracklet with detections across specified frames.
+
+        Args:
+            track_id: ID for the tracklet
+            frame_range: (start_frame, end_frame) for the tracklet
+            pose_coords: Center coordinates for poses
+            embed_value: Fixed embedding value for all detections
+
+        Returns:
+            Tracklet object
+        """
+        detections = []
+        for frame in range(frame_range[0], frame_range[1]):
+            detection = basic_detection(
+                frame_idx=frame,
+                pose_idx=0,
+                embed_value=embed_value,
+                pose_coords=pose_coords,
+            )
+            detections.append(detection)
+
+        return Tracklet(track_id, detections)
+
+    return _create_tracklet
+
+
+@pytest.fixture
+def minimal_video_observations(basic_detection):
+    """Create VideoObservations with minimal data (2 tracklets)."""
+    observations = []
+
+    # Create two simple tracklets
+    # Tracklet 1: frames 0-4
+    for frame in range(5):
+        detection = basic_detection(
+            frame_idx=frame,
+            pose_idx=0,
+            embed_value=0.1,
+            pose_coords=(20, 20),
+        )
+        observations.append([detection])
+
+    # Gap (no detections)
+    for _ in range(5, 10):
+        observations.append([])
+
+    # Tracklet 2: frames 10-14
+    for frame in range(10, 15):
+        detection = basic_detection(
+            frame_idx=frame,
+            pose_idx=0,
+            embed_value=0.9,
+            pose_coords=(80, 80),
+        )
+        observations.append([detection])
+
+    video_obs = VideoObservations(observations)
+    video_obs.generate_greedy_tracklets(rotate_pose=False, num_threads=1)
+    return video_obs
+
+
+@pytest.fixture
+def fragmented_video_observations(basic_detection):
+    """Create VideoObservations with many small tracklets that can be stitched."""
+    observations = []
+
+    # Create several small tracklets with similar embeddings that should be stitched
+    tracklet_configs = [
+        # (start_frame, duration, embed_value, pose_coords)
+        (0, 3, 0.1, (10, 10)),  # Tracklet 1
+        (5, 2, 0.11, (10, 10)),  # Similar to tracklet 1, should stitch
+        (10, 4, 0.2, (50, 50)),  # Tracklet 2
+        (16, 3, 0.21, (50, 50)),  # Similar to tracklet 2, should stitch
+        (25, 2, 0.3, (90, 90)),  # Tracklet 3
+        (30, 3, 0.31, (90, 90)),  # Similar to tracklet 3, should stitch
+    ]
+
+    # Initialize all frames as empty
+    total_frames = 35
+    for _ in range(total_frames):
+        observations.append([])
+
+    # Add detections according to tracklet configs
+    for start_frame, duration, embed_value, pose_coords in tracklet_configs:
+        for offset in range(duration):
+            frame = start_frame + offset
+            detection = basic_detection(
+                frame_idx=frame,
+                pose_idx=0,
+                embed_value=embed_value,
+                pose_coords=pose_coords,
+            )
+            observations[frame] = [detection]
+
+    video_obs = VideoObservations(observations)
+    video_obs.generate_greedy_tracklets(rotate_pose=False, num_threads=1)
+    return video_obs
+
+
+@pytest.fixture
+def single_tracklet_video_observations(basic_detection):
+    """Create VideoObservations with only one tracklet (edge case)."""
+    observations = []
+
+    # Single tracklet: frames 0-9
+    for frame in range(10):
+        detection = basic_detection(
+            frame_idx=frame,
+            pose_idx=0,
+            embed_value=0.5,
+            pose_coords=(50, 50),
+        )
+        observations.append([detection])
+
+    video_obs = VideoObservations(observations)
+    video_obs.generate_greedy_tracklets(rotate_pose=False, num_threads=1)
+    return video_obs
+
+
+@pytest.fixture
+def empty_video_observations():
+    """Create VideoObservations with no tracklets (edge case)."""
+    observations = []
+
+    # Create empty frames
+    for _ in range(10):
+        observations.append([])
+
+    video_obs = VideoObservations(observations)
+    # Don't call generate_greedy_tracklets for empty data - it will fail
+    # Instead, manually set up the minimal state
+    video_obs._tracklets = []
+    video_obs._tracklet_gen_method = None
+    return video_obs
+
+
+@pytest.fixture
+def complex_video_observations(basic_detection):
+    """Create VideoObservations with complex stitching scenarios."""
+    observations = []
+    total_frames = 100
+
+    # Initialize all frames as empty
+    for _ in range(total_frames):
+        observations.append([])
+
+    # Create complex tracklet patterns
+    tracklet_patterns = [
+        # Long tracklets that should remain separate
+        (0, 20, 0.1, (10, 10)),  # Long tracklet 1
+        (25, 25, 0.9, (90, 90)),  # Long tracklet 2 (different embedding)
+        # Short tracklets that should stitch together
+        (55, 3, 0.2, (30, 30)),  # Part 1 of animal
+        (60, 4, 0.21, (30, 30)),  # Part 2 of same animal
+        (67, 2, 0.19, (30, 30)),  # Part 3 of same animal
+        # Overlapping tracklets (should not stitch)
+        (75, 10, 0.3, (60, 60)),  # Overlapping tracklet 1
+        (80, 10, 0.31, (60, 60)),  # Overlapping tracklet 2 (slight overlap)
+        # Very short tracklets
+        (92, 1, 0.4, (70, 70)),  # Single frame
+        (95, 2, 0.41, (70, 70)),  # Two frames
+    ]
+
+    # Add detections according to patterns
+    for start_frame, duration, embed_value, pose_coords in tracklet_patterns:
+        for offset in range(duration):
+            frame = start_frame + offset
+            if frame < total_frames:
+                detection = basic_detection(
+                    frame_idx=frame,
+                    pose_idx=0,
+                    embed_value=embed_value,
+                    pose_coords=pose_coords,
+                )
+                observations[frame] = [detection]
+
+    video_obs = VideoObservations(observations)
+    video_obs.generate_greedy_tracklets(rotate_pose=False, num_threads=1)
+    return video_obs
+
+
+@pytest.fixture
+def tracklet_lengths_fixture():
+    """Return function to calculate tracklet lengths."""
+
+    def _get_tracklet_lengths(video_observations):
+        """Get lengths of all tracklets in VideoObservations."""
+        return [len(tracklet.frames) for tracklet in video_observations._tracklets]
+
+    return _get_tracklet_lengths
+
+
+@pytest.fixture
+def tracklet_ids_fixture():
+    """Return function to extract tracklet IDs."""
+
+    def _get_tracklet_ids(video_observations):
+        """Get all tracklet IDs from VideoObservations."""
+        return [tracklet.track_id for tracklet in video_observations._tracklets]
+
+    return _get_tracklet_ids
+
+
+@pytest.fixture
+def verify_no_overlaps_fixture():
+    """Return function to verify tracklets don't overlap."""
+
+    def _verify_no_overlaps(video_observations):
+        """Verify that no tracklets overlap in frames."""
+        tracklets = video_observations._tracklets
+        for i, tracklet_1 in enumerate(tracklets):
+            for j, tracklet_2 in enumerate(tracklets[i + 1 :], i + 1):
+                assert not tracklet_1.overlaps_with(tracklet_2), (
+                    f"Tracklet {i} overlaps with tracklet {j}"
+                )
+
+    return _verify_no_overlaps
+
+
+@pytest.fixture
+def stitching_verification_fixture():
+    """Return function to verify stitching results are valid."""
+
+    def _verify_stitching_results(
+        original_tracklets, stitched_tracklets, original_count, final_count
+    ):
+        """Verify that stitching results are valid.
+
+        Args:
+            original_tracklets: List of tracklets before stitching
+            stitched_tracklets: List of tracklets after stitching
+            original_count: Original number of tracklets
+            final_count: Final number of tracklets after stitching
+
+        Returns:
+            dict with verification results
+        """
+        # Basic count check
+        assert len(stitched_tracklets) == final_count, (
+            f"Expected {final_count} tracklets, got {len(stitched_tracklets)}"
+        )
+
+        # Should have fewer or same number of tracklets
+        assert final_count <= original_count, (
+            "Stitching should not increase tracklet count"
+        )
+
+        # All frames should still be covered
+        original_frames = set()
+        for tracklet in original_tracklets:
+            original_frames.update(tracklet.frames)
+
+        stitched_frames = set()
+        for tracklet in stitched_tracklets:
+            stitched_frames.update(tracklet.frames)
+
+        assert original_frames == stitched_frames, (
+            "Frame coverage should not change after stitching"
+        )
+
+        # No overlaps should exist
+        for i, tracklet_1 in enumerate(stitched_tracklets):
+            for j, tracklet_2 in enumerate(stitched_tracklets[i + 1 :], i + 1):
+                assert not tracklet_1.overlaps_with(tracklet_2), (
+                    f"Stitched tracklet {i} overlaps with tracklet {j}"
+                )
+
+        return {
+            "original_count": original_count,
+            "final_count": final_count,
+            "reduction": original_count - final_count,
+            "reduction_percentage": (original_count - final_count)
+            / original_count
+            * 100
+            if original_count > 0
+            else 0,
+        }
+
+    return _verify_stitching_results
diff --git a/tests/utils/matching/video_observations/test_benchmark_stich_greedy_tracklets.py b/tests/utils/matching/video_observations/test_benchmark_stich_greedy_tracklets.py
new file mode 100644
index 0000000..545b563
--- /dev/null
+++ b/tests/utils/matching/video_observations/test_benchmark_stich_greedy_tracklets.py
@@ -0,0 +1,295 @@
+"""Benchmark tests for VideoObservations.stitch_greedy_tracklets method.
+
+This module contains performance benchmarks to measure the efficiency of tracklet stitching
+and help identify performance bottlenecks. Uses pytest-benchmark plugin.
+
+Run with: pytest tests/utils/matching/video_observations/test_benchmark_stich_greedy_tracklets.py --benchmark-only
+"""
+
+import numpy as np
+import pytest
+
+from mouse_tracking.utils.matching import Detection, VideoObservations
+
+
+@pytest.fixture
+def mock_detection():
+    """Create a mock detection with realistic data."""
+
+    def _create_detection(frame_idx, pose_idx, embed_size=128):
+        pose = np.random.rand(12, 2) * 100  # Random pose keypoints
+        embed = np.random.rand(embed_size)  # Random embedding vector
+        seg = np.random.randint(-1, 100, size=(100, 2))  # Random segmentation contour
+        return Detection(
+            frame=frame_idx,
+            pose_idx=pose_idx,
+            pose=pose,
+            embed=embed,
+            seg_idx=pose_idx,
+            seg=seg,
+        )
+
+    return _create_detection
+
+
+@pytest.fixture
+def small_video_observations(mock_detection):
+    """Create VideoObservations with small number of tracklets (10-15 tracklets)."""
+    observations = []
+    num_frames = 100
+    animals_per_frame = 2
+
+    for frame_idx in range(num_frames):
+        frame_observations = []
+        for animal_idx in range(animals_per_frame):
+            detection = mock_detection(frame_idx, animal_idx)
+            frame_observations.append(detection)
+        observations.append(frame_observations)
+
+    video_obs = VideoObservations(observations)
+    # Generate tracklets
+    video_obs.generate_greedy_tracklets(rotate_pose=True, num_threads=1)
+    return video_obs
+
+
+@pytest.fixture
+def medium_video_observations(mock_detection):
+    """Create VideoObservations with medium number of tracklets (30-50 tracklets)."""
+    observations = []
+    num_frames = 200
+    animals_per_frame = 3
+
+    for frame_idx in range(num_frames):
+        frame_observations = []
+        for animal_idx in range(animals_per_frame):
+            # Add some noise to create more tracklets by making some detections inconsistent
+            if np.random.random() > 0.8:  # 20% chance to skip detection
+                continue
+            detection = mock_detection(frame_idx, animal_idx)
+            frame_observations.append(detection)
+        observations.append(frame_observations)
+
+    video_obs = VideoObservations(observations)
+    # Generate tracklets
+    video_obs.generate_greedy_tracklets(rotate_pose=True, num_threads=1)
+    return video_obs
+
+
+@pytest.fixture
+def large_video_observations(mock_detection):
+    """Create VideoObservations with large number of tracklets (80-120 tracklets)."""
+    observations = []
+    num_frames = 300
+    animals_per_frame = 4
+
+    for frame_idx in range(num_frames):
+        frame_observations = []
+        for animal_idx in range(animals_per_frame):
+            # Add more noise to create many fragmented tracklets
+            if np.random.random() > 0.7:  # 30% chance to skip detection
+                continue
+            detection = mock_detection(frame_idx, animal_idx)
+            frame_observations.append(detection)
+        observations.append(frame_observations)
+
+    video_obs = VideoObservations(observations)
+    # Generate tracklets
+    video_obs.generate_greedy_tracklets(rotate_pose=True, num_threads=1)
+    return video_obs
+
+
+class TestStitchGreedyTrackletsBenchmark:
+    """Benchmark tests for stitch_greedy_tracklets method."""
+
+    def test_benchmark_small_tracklets(self, benchmark, small_video_observations):
+        """Benchmark stitching with small number of tracklets (~10-15)."""
+        # Store original tracklets for verification
+        original_tracklet_count = len(small_video_observations._tracklets)
+
+        def run_stitch():
+            # Reset tracklets before each run
+            small_video_observations._make_tracklets()
+            small_video_observations.stitch_greedy_tracklets(
+                num_tracks=None, all_embeds=True, prioritize_long=True
+            )
+            return len(small_video_observations._tracklets)
+
+        result = benchmark(run_stitch)
+
+        # Verify that stitching actually happened
+        assert result <= original_tracklet_count
+        print(f"Small test: {original_tracklet_count} -> {result} tracklets")
+
+    def test_benchmark_medium_tracklets(self, benchmark, medium_video_observations):
+        """Benchmark stitching with medium number of tracklets (~30-50)."""
+        original_tracklet_count = len(medium_video_observations._tracklets)
+
+        def run_stitch():
+            # Reset tracklets before each run
+            medium_video_observations._make_tracklets()
+            medium_video_observations.stitch_greedy_tracklets(
+                num_tracks=None, all_embeds=True, prioritize_long=True
+            )
+            return len(medium_video_observations._tracklets)
+
+        result = benchmark(run_stitch)
+
+        # Verify that stitching actually happened
+        assert result <= original_tracklet_count
+        print(f"Medium test: {original_tracklet_count} -> {result} tracklets")
+
+    def test_benchmark_large_tracklets(self, benchmark, large_video_observations):
+        """Benchmark stitching with large number of tracklets (~80-120)."""
+        original_tracklet_count = len(large_video_observations._tracklets)
+
+        def run_stitch():
+            # Reset tracklets before each run
+            large_video_observations._make_tracklets()
+            large_video_observations.stitch_greedy_tracklets(
+                num_tracks=None, all_embeds=True, prioritize_long=True
+            )
+            return len(large_video_observations._tracklets)
+
+        result = benchmark(run_stitch)
+
+        # Verify that stitching actually happened
+        assert result <= original_tracklet_count
+        print(f"Large test: {original_tracklet_count} -> {result} tracklets")
+
+    def test_benchmark_get_transition_costs(self, benchmark, medium_video_observations):
+        """Benchmark the _get_transition_costs method specifically."""
+
+        def run_get_costs():
+            return medium_video_observations._get_transition_costs(
+                all_comparisons=True, include_inf=True, longer_track_priority=1.0
+            )
+
+        result = benchmark(run_get_costs)
+
+        # Verify result is reasonable
+        assert isinstance(result, dict)
+        assert len(result) > 0
+        print(f"Transition costs calculated for {len(result)} tracklets")
+
+    def test_scaling_comparison(
+        self,
+        benchmark,
+        small_video_observations,
+        medium_video_observations,
+        large_video_observations,
+    ):
+        """Compare performance scaling across different tracklet counts."""
+        import time
+
+        test_cases = [
+            ("small", small_video_observations),
+            ("medium", medium_video_observations),
+            ("large", large_video_observations),
+        ]
+
+        results = {}
+
+        for name, video_obs in test_cases:
+            original_count = len(video_obs._tracklets)
+
+            # Reset tracklets
+            video_obs._make_tracklets()
+
+            # Time the stitching
+            start_time = time.time()
+            video_obs.stitch_greedy_tracklets(
+                num_tracks=None, all_embeds=True, prioritize_long=True
+            )
+            end_time = time.time()
+
+            final_count = len(video_obs._tracklets)
+            duration = end_time - start_time
+
+            results[name] = {
+                "original_tracklets": original_count,
+                "final_tracklets": final_count,
+                "duration_seconds": duration,
+                "tracklets_per_second": original_count / duration
+                if duration > 0
+                else float("inf"),
+            }
+
+            print(
+                f"{name}: {original_count} -> {final_count} tracklets in {duration:.3f}s"
+            )
+
+        # Check for quadratic or worse scaling
+        small_time = results["small"]["duration_seconds"]
+        medium_time = results["medium"]["duration_seconds"]
+        large_time = results["large"]["duration_seconds"]
+
+        small_tracklets = results["small"]["original_tracklets"]
+        medium_tracklets = results["medium"]["original_tracklets"]
+        large_tracklets = results["large"]["original_tracklets"]
+
+        if medium_time > 0 and small_time > 0:
+            scaling_factor_small_to_medium = (medium_time / small_time) / (
+                (medium_tracklets / small_tracklets) ** 2
+            )
+            print(
+                f"Scaling factor (small->medium): {scaling_factor_small_to_medium:.2f} (1.0 = quadratic)"
+            )
+
+        if large_time > 0 and medium_time > 0:
+            scaling_factor_medium_to_large = (large_time / medium_time) / (
+                (large_tracklets / medium_tracklets) ** 2
+            )
+            print(
+                f"Scaling factor (medium->large): {scaling_factor_medium_to_large:.2f} (1.0 = quadratic)"
+            )
+
+
+@pytest.mark.parametrize(
+    "num_tracklets,expected_complexity",
+    [(10, "linear"), (30, "quadratic"), (50, "quadratic"), (100, "cubic")],
+)
+def test_complexity_analysis(
+    benchmark, mock_detection, num_tracklets, expected_complexity
+):
+    """Test performance complexity with different numbers of tracklets."""
+    # Create observations that will result in approximately num_tracklets tracklets
+    observations = []
+    frames_per_tracklet = 5
+    num_frames = num_tracklets * frames_per_tracklet
+
+    for frame_idx in range(num_frames):
+        frame_observations = []
+        # Create sparse detections to generate many short tracklets
+        if frame_idx % frames_per_tracklet < 2:  # Only 2 out of every 5 frames
+            detection = mock_detection(frame_idx, frame_idx // frames_per_tracklet)
+            frame_observations.append(detection)
+        observations.append(frame_observations)
+
+    video_obs = VideoObservations(observations)
+    video_obs.generate_greedy_tracklets(rotate_pose=True, num_threads=1)
+
+    actual_tracklets = len(video_obs._tracklets)
+    print(f"Created {actual_tracklets} tracklets (target: {num_tracklets})")
+
+    # Measure time
+    import time
+
+    start_time = time.time()
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=True
+    )
+    duration = time.time() - start_time
+
+    print(f"Processed {actual_tracklets} tracklets in {duration:.3f}s")
+
+    # Basic complexity check - this is more for documentation than assertion
+    if actual_tracklets > 0:
+        time_per_tracklet = duration / actual_tracklets
+        time_per_tracklet_squared = duration / (actual_tracklets**2)
+        print(f"Time per tracklet: {time_per_tracklet:.6f}s")
+        print(f"Time per tracklet²: {time_per_tracklet_squared:.6f}s")
+
+
+if __name__ == "__main__":
+    # Allow running benchmark tests directly
+    pytest.main([__file__, "--benchmark-only", "-v"])
diff --git a/tests/utils/matching/video_observations/test_stitch_greedy_tracklets.py b/tests/utils/matching/video_observations/test_stitch_greedy_tracklets.py
new file mode 100644
index 0000000..512acdc
--- /dev/null
+++ b/tests/utils/matching/video_observations/test_stitch_greedy_tracklets.py
@@ -0,0 +1,483 @@
+"""Comprehensive unit tests for VideoObservations.stitch_greedy_tracklets method.
+
+This module provides thorough test coverage for the stitch_greedy_tracklets functionality,
+including normal operation, edge cases, error conditions, and parameter variations.
+"""
+
+import copy
+from unittest.mock import patch
+
+import numpy as np
+import pytest
+
+from mouse_tracking.utils.matching import VideoObservations
+
+
+def test_stitch_greedy_tracklets_basic_functionality(
+    minimal_video_observations, stitching_verification_fixture
+):
+    """Test basic stitching functionality with minimal data."""
+    # Arrange
+    video_obs = minimal_video_observations
+    original_count = len(video_obs._tracklets)
+    original_tracklets = copy.deepcopy(video_obs._tracklets)
+
+    # Act
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+
+    # Assert
+    final_count = len(video_obs._tracklets)
+    assert final_count <= original_count, "Stitching should not increase tracklet count"
+
+    # Verify stitching results
+    stitching_verification_fixture(
+        original_tracklets, video_obs._tracklets, original_count, final_count
+    )
+
+    # Check that method attributes were set correctly
+    assert video_obs._tracklet_stitch_method == "greedy"
+    assert hasattr(video_obs, "_stitch_translation")
+    assert isinstance(video_obs._stitch_translation, dict)
+
+
+def test_stitch_greedy_tracklets_parameter_variations(minimal_video_observations):
+    """Test different parameter combinations for stitch_greedy_tracklets."""
+    # Test cases with different parameter combinations
+    test_cases = [
+        {"num_tracks": None, "all_embeds": True, "prioritize_long": False},
+        {"num_tracks": None, "all_embeds": False, "prioritize_long": False},
+        {"num_tracks": None, "all_embeds": True, "prioritize_long": True},
+        {"num_tracks": 1, "all_embeds": True, "prioritize_long": False},
+        {"num_tracks": 2, "all_embeds": False, "prioritize_long": True},
+    ]
+
+    for params in test_cases:
+        # Arrange - reset tracklets for each test
+        video_obs = minimal_video_observations
+        video_obs._make_tracklets()
+        original_count = len(video_obs._tracklets)
+
+        # Act
+        video_obs.stitch_greedy_tracklets(**params)
+
+        # Assert
+        final_count = len(video_obs._tracklets)
+        assert final_count <= original_count, f"Failed for params: {params}"
+        assert video_obs._tracklet_stitch_method == "greedy"
+        assert hasattr(video_obs, "_stitch_translation")
+
+
+def test_stitch_greedy_tracklets_fragmented_data(
+    fragmented_video_observations, stitching_verification_fixture
+):
+    """Test stitching with fragmented tracklets that should be combined."""
+    # Arrange
+    video_obs = fragmented_video_observations
+    original_count = len(video_obs._tracklets)
+    original_tracklets = copy.deepcopy(video_obs._tracklets)
+
+    # Should have multiple small tracklets initially
+    assert original_count >= 6, "Should have multiple fragmented tracklets"
+
+    # Act
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+
+    # Assert
+    final_count = len(video_obs._tracklets)
+    reduction = original_count - final_count
+
+    # May see reduction in tracklet count (depends on similarity thresholds)
+    # The important thing is that no tracklets are added
+    assert reduction >= 0, "Should not increase tracklet count"
+    assert final_count <= original_count, "Should not increase the number of tracklets"
+
+    # Verify stitching results
+    verification_result = stitching_verification_fixture(
+        original_tracklets, video_obs._tracklets, original_count, final_count
+    )
+
+    # May see meaningful reduction depending on similarity thresholds
+    # At minimum, should not increase tracklet count
+    assert verification_result["reduction_percentage"] >= 0, (
+        "Should not increase tracklet count"
+    )
+
+
+def test_stitch_greedy_tracklets_single_tracklet(
+    single_tracklet_video_observations, verify_no_overlaps_fixture
+):
+    """Test stitching behavior with only one tracklet (edge case)."""
+    # Arrange
+    video_obs = single_tracklet_video_observations
+    original_count = len(video_obs._tracklets)
+
+    # Should have exactly one tracklet
+    assert original_count == 1, "Should start with exactly one tracklet"
+
+    # Act
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+
+    # Assert
+    final_count = len(video_obs._tracklets)
+    assert final_count == 1, "Should still have exactly one tracklet"
+
+    # Verify state is consistent
+    verify_no_overlaps_fixture(video_obs)
+    assert video_obs._tracklet_stitch_method == "greedy"
+    assert hasattr(video_obs, "_stitch_translation")
+
+
+def test_stitch_greedy_tracklets_empty_tracklets(
+    empty_video_observations, verify_no_overlaps_fixture
+):
+    """Test stitching behavior with no tracklets (edge case)."""
+    # Arrange
+    video_obs = empty_video_observations
+    original_count = len(video_obs._tracklets)
+
+    # Should have no tracklets
+    assert original_count == 0, "Should start with no tracklets"
+
+    # Act
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+
+    # Assert
+    final_count = len(video_obs._tracklets)
+    assert final_count == 0, "Should still have no tracklets"
+
+    # Verify state is consistent
+    verify_no_overlaps_fixture(video_obs)
+    assert video_obs._tracklet_stitch_method == "greedy"
+    assert hasattr(video_obs, "_stitch_translation")
+
+
+def test_stitch_greedy_tracklets_complex_scenarios(
+    complex_video_observations,
+    stitching_verification_fixture,
+    verify_no_overlaps_fixture,
+):
+    """Test stitching with complex scenarios including overlaps and various lengths."""
+    # Arrange
+    video_obs = complex_video_observations
+    original_count = len(video_obs._tracklets)
+    original_tracklets = copy.deepcopy(video_obs._tracklets)
+
+    # Should have multiple tracklets of various lengths
+    assert original_count >= 5, "Should have multiple tracklets for complex test"
+
+    # Act
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=True
+    )
+
+    # Assert
+    final_count = len(video_obs._tracklets)
+
+    # Verify no overlaps exist
+    verify_no_overlaps_fixture(video_obs)
+
+    # Verify stitching results
+    stitching_verification_fixture(
+        original_tracklets, video_obs._tracklets, original_count, final_count
+    )
+
+    # Complex scenarios should show some reduction
+    assert final_count <= original_count, "Should not increase tracklet count"
+
+
+def test_stitch_greedy_tracklets_with_num_tracks_parameter(minimal_video_observations):
+    """Test stitching with specific num_tracks parameter."""
+    # Arrange
+    video_obs = minimal_video_observations
+    video_obs._make_tracklets()
+    original_count = len(video_obs._tracklets)
+
+    target_tracks = 1
+
+    # Act
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=target_tracks, all_embeds=True, prioritize_long=False
+    )
+
+    # Assert
+    final_count = len(video_obs._tracklets)
+
+    # Should respect the target when possible
+    assert final_count <= original_count, "Should not increase tracklet count"
+    assert video_obs._tracklet_stitch_method == "greedy"
+
+
+def test_stitch_greedy_tracklets_preserves_original_tracklets(
+    minimal_video_observations,
+):
+    """Test that original tracklets are preserved after stitching."""
+    # Arrange
+    video_obs = minimal_video_observations
+    original_tracklets = copy.deepcopy(video_obs._tracklets)
+
+    # Act
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+
+    # Assert - implementation should restore original tracklets
+    # This is based on the line: self._tracklets = original_tracklets
+    for i, (original, current) in enumerate(
+        zip(original_tracklets, video_obs._tracklets, strict=False)
+    ):
+        assert original.track_id == current.track_id, (
+            f"Tracklet {i} ID should be preserved"
+        )
+        assert len(original.frames) == len(current.frames), (
+            f"Tracklet {i} frame count should be preserved"
+        )
+
+
+def test_stitch_greedy_tracklets_translation_mapping(minimal_video_observations):
+    """Test that stitch translation mapping is correctly created."""
+    # Arrange
+    video_obs = minimal_video_observations
+
+    # Act
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+
+    # Assert
+    assert hasattr(video_obs, "_stitch_translation")
+    assert isinstance(video_obs._stitch_translation, dict)
+
+    # Should contain mapping for track ID 0 (background)
+    assert 0 in video_obs._stitch_translation.values()
+
+    # Should have entries for original tracklets
+    translation = video_obs._stitch_translation
+    assert len(translation) >= 1, "Should have at least background translation"
+
+
+def test_stitch_greedy_tracklets_prioritize_long_parameter(
+    fragmented_video_observations,
+):
+    """Test that prioritize_long parameter affects stitching behavior."""
+    # Test without prioritizing long tracklets
+    video_obs_no_priority = fragmented_video_observations
+    video_obs_no_priority._make_tracklets()
+    video_obs_no_priority.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+    result_no_priority = len(video_obs_no_priority._tracklets)
+
+    # Test with prioritizing long tracklets
+    video_obs_with_priority = fragmented_video_observations
+    video_obs_with_priority._make_tracklets()
+    video_obs_with_priority.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=True
+    )
+    result_with_priority = len(video_obs_with_priority._tracklets)
+
+    # Both should be valid results
+    assert result_no_priority >= 0
+    assert result_with_priority >= 0
+
+    # Results may differ based on prioritization
+    # (This is hard to test deterministically without knowing the exact algorithm)
+
+
+def test_stitch_greedy_tracklets_all_embeds_parameter(minimal_video_observations):
+    """Test that all_embeds parameter affects behavior."""
+    # Test with all_embeds=True
+    video_obs_all_embeds = minimal_video_observations
+    video_obs_all_embeds._make_tracklets()
+    video_obs_all_embeds.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+    result_all_embeds = len(video_obs_all_embeds._tracklets)
+
+    # Test with all_embeds=False
+    video_obs_no_all_embeds = minimal_video_observations
+    video_obs_no_all_embeds._make_tracklets()
+    video_obs_no_all_embeds.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=False, prioritize_long=False
+    )
+    result_no_all_embeds = len(video_obs_no_all_embeds._tracklets)
+
+    # Both should be valid results
+    assert result_all_embeds >= 0
+    assert result_no_all_embeds >= 0
+
+
+@pytest.mark.parametrize(
+    "num_tracks, all_embeds, prioritize_long",
+    [
+        (None, True, False),
+        (1, True, False),
+        (2, False, True),
+        (5, True, True),
+        (None, False, False),
+    ],
+)
+def test_stitch_greedy_tracklets_parameter_combinations(
+    minimal_video_observations, num_tracks, all_embeds, prioritize_long
+):
+    """Test various parameter combinations for stitch_greedy_tracklets."""
+    # Arrange
+    video_obs = minimal_video_observations
+    video_obs._make_tracklets()
+    original_count = len(video_obs._tracklets)
+
+    # Act
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=num_tracks, all_embeds=all_embeds, prioritize_long=prioritize_long
+    )
+
+    # Assert
+    final_count = len(video_obs._tracklets)
+    assert final_count <= original_count, "Should not increase tracklet count"
+    assert video_obs._tracklet_stitch_method == "greedy"
+    assert hasattr(video_obs, "_stitch_translation")
+
+
+def test_stitch_greedy_tracklets_idempotent(minimal_video_observations):
+    """Test that running stitch_greedy_tracklets multiple times is safe."""
+    # Arrange
+    video_obs = minimal_video_observations
+
+    # Act - run stitching twice
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+    first_result = len(video_obs._tracklets)
+
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+    second_result = len(video_obs._tracklets)
+    second_translation = video_obs._stitch_translation
+
+    # Assert - should be consistent
+    assert first_result == second_result, "Multiple runs should give same result"
+    # Translation might change, but should still be valid
+    assert isinstance(second_translation, dict)
+
+
+def test_stitch_greedy_tracklets_state_consistency(minimal_video_observations):
+    """Test that object state remains consistent after stitching."""
+    # Arrange
+    video_obs = minimal_video_observations
+    original_num_frames = video_obs.num_frames
+
+    # Act
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+
+    # Assert - verify object state is consistent
+    assert video_obs.num_frames == original_num_frames, "Frame count should not change"
+    assert video_obs._tracklet_stitch_method == "greedy"
+    assert hasattr(video_obs, "_stitch_translation")
+    assert isinstance(video_obs._tracklets, list)
+
+
+def test_stitch_greedy_tracklets_tracklet_properties(minimal_video_observations):
+    """Test that tracklet properties are maintained after stitching."""
+    # Arrange
+    video_obs = minimal_video_observations
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+
+    # Assert - verify tracklet properties
+    for tracklet in video_obs._tracklets:
+        assert hasattr(tracklet, "frames"), "Tracklet should have frames"
+        assert hasattr(tracklet, "track_id"), "Tracklet should have track_id"
+        assert hasattr(tracklet, "detection_list"), (
+            "Tracklet should have detection_list"
+        )
+
+        # Verify frame consistency
+        assert len(tracklet.frames) > 0, "Tracklet should have frames"
+        assert len(tracklet.detection_list) == len(tracklet.frames), (
+            "Detection count should match frame count"
+        )
+
+
+def test_stitch_greedy_tracklets_error_handling_invalid_parameters():
+    """Test that method handles edge cases gracefully."""
+    # Create minimal video observations for testing
+    from mouse_tracking.utils.matching import Detection
+
+    detection = Detection(frame=0, pose_idx=0, pose=np.random.rand(12, 2))
+    video_obs = VideoObservations([[detection]])
+    video_obs.generate_greedy_tracklets()
+
+    # The method should handle edge cases gracefully rather than raising exceptions
+    # Test with unusual but valid parameters
+
+    # Very large num_tracks should work
+    video_obs.stitch_greedy_tracklets(num_tracks=1000)
+    assert len(video_obs._tracklets) >= 0
+
+    # Reset for next test
+    video_obs._make_tracklets()
+
+    # All valid parameter combinations should work
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=0, all_embeds=False, prioritize_long=True
+    )
+    assert len(video_obs._tracklets) >= 0
+
+
+def test_stitch_greedy_tracklets_memory_efficiency(complex_video_observations):
+    """Test that stitching doesn't cause memory leaks or excessive usage."""
+    # Arrange
+    video_obs = complex_video_observations
+
+    # Act - measure memory usage indirectly by checking object sizes
+    import sys
+
+    initial_size = sys.getsizeof(video_obs)
+
+    video_obs.stitch_greedy_tracklets(
+        num_tracks=None, all_embeds=True, prioritize_long=False
+    )
+
+    final_size = sys.getsizeof(video_obs)
+
+    # Assert - size should not grow excessively
+    size_increase = final_size - initial_size
+    assert size_increase < initial_size, (
+        "Memory usage should not double after stitching"
+    )
+
+
+def test_stitch_greedy_tracklets_with_get_transition_costs_called(
+    minimal_video_observations,
+):
+    """Test that _get_transition_costs is called during stitching."""
+    # Arrange
+    video_obs = minimal_video_observations
+
+    # Act & Assert - using patch to verify method is called
+    with patch.object(
+        video_obs, "_get_transition_costs", wraps=video_obs._get_transition_costs
+    ) as mock_costs:
+        video_obs.stitch_greedy_tracklets(
+            num_tracks=None, all_embeds=True, prioritize_long=False
+        )
+
+        # Should call _get_transition_costs at least once
+        assert mock_costs.call_count > 0, (
+            "_get_transition_costs should be called during stitching"
+        )
+
+        # Verify it was called with correct parameters
+        call_args = mock_costs.call_args_list[0]
+        assert "all_comparisons" in call_args[1] or len(call_args[0]) > 0