Skip to content

Fix/mAP #1834

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: develop
Choose a base branch
from
Open

Fix/mAP #1834

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion supervision/dataset/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,6 @@ def from_coco(
force_masks (bool): If True,
forces masks to be loaded for all annotations,
regardless of whether they are present.

Returns:
DetectionDataset: A DetectionDataset instance containing
the loaded images and annotations.
Expand Down
39 changes: 34 additions & 5 deletions supervision/dataset/formats/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@ def coco_annotations_to_masks(


def coco_annotations_to_detections(
image_annotations: List[dict], resolution_wh: Tuple[int, int], with_masks: bool
image_annotations: List[dict],
resolution_wh: Tuple[int, int],
with_masks: bool,
use_iscrowd: bool = False,
) -> Detections:
if not image_annotations:
return Detections.empty()
Expand All @@ -102,15 +105,26 @@ def coco_annotations_to_detections(
xyxy = np.asarray(xyxy)
xyxy[:, 2:4] += xyxy[:, 0:2]

data = dict()
if use_iscrowd:
iscrowd = [
image_annotation["iscrowd"] for image_annotation in image_annotations
]
area = [image_annotation["area"] for image_annotation in image_annotations]
data = dict(
iscrowd=np.asarray(iscrowd, dtype=int), area=np.asarray(area, dtype=float)
)

if with_masks:
mask = coco_annotations_to_masks(
image_annotations=image_annotations, resolution_wh=resolution_wh
)
return Detections(
class_id=np.asarray(class_ids, dtype=int), xyxy=xyxy, mask=mask
)
else:
mask = None

return Detections(xyxy=xyxy, class_id=np.asarray(class_ids, dtype=int))
return Detections(
class_id=np.asarray(class_ids, dtype=int), xyxy=xyxy, mask=mask, data=data
)


def detections_to_coco_annotations(
Expand Down Expand Up @@ -159,16 +173,29 @@ def detections_to_coco_annotations(
return coco_annotations, annotation_id


def get_coco_class_index_mapping(annotations_path: str) -> Dict[int, int]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is not used.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're correct — this function is not currently invoked in the main pipeline. However, it is essential for evaluating certain models whose class ID schemes differ from those used in the COCO dataset.

Specifically, some models use sequential class IDs (e.g., 0 to 79 for 80 classes), whereas COCO's official annotations intentionally skip some IDs. You can see a detailed breakdown of these skipped IDs in this spread sheet.

To address this mismatch, this function is super useful. A practical example of this mapping is used in
this colab notebook, where get_coco_class_index_mapping is applied to reproduce results consistent with the roboflow/model-leaderboard

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since in your examples you always reverse the dictionary right after get_coco_class_index_mapping, maybe it would be easier to just return the reversed mapping directly?

class_mapping = get_coco_class_index_mapping(annotation_file)
inv_class_mapping = {v: k for k, v in class_mapping.items()}

Copy link
Author

@rafaelpadilla rafaelpadilla Jun 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

✔️ I have updated the code to return the reversed mapping. Thanks for the suggestion!

coco_data = read_json_file(annotations_path)
classes = coco_categories_to_classes(coco_categories=coco_data["categories"])
class_mapping = build_coco_class_index_mapping(
coco_categories=coco_data["categories"], target_classes=classes
)
inv_class_mapping = {v: k for k, v in class_mapping.items()}
return inv_class_mapping


def load_coco_annotations(
images_directory_path: str,
annotations_path: str,
force_masks: bool = False,
# use_iscrowd: bool = True,
) -> Tuple[List[str], List[str], Dict[str, Detections]]:
coco_data = read_json_file(file_path=annotations_path)
classes = coco_categories_to_classes(coco_categories=coco_data["categories"])

class_index_mapping = build_coco_class_index_mapping(
coco_categories=coco_data["categories"], target_classes=classes
)

coco_images = coco_data["images"]
coco_annotations_groups = group_coco_annotations_by_image_id(
coco_annotations=coco_data["annotations"]
Expand All @@ -190,7 +217,9 @@ def load_coco_annotations(
image_annotations=image_annotations,
resolution_wh=(image_width, image_height),
with_masks=force_masks,
use_iscrowd=True,
)

annotation = map_detections_class_id(
source_to_target_mapping=class_index_mapping,
detections=annotation,
Expand Down
74 changes: 74 additions & 0 deletions supervision/detection/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,3 +1325,77 @@ def spread_out_boxes(
xyxy_padded[:, [2, 3]] += force_vectors

return pad_boxes(xyxy_padded, px=-1)


def _jaccard(box_a: List[float], box_b: List[float], is_crowd: bool) -> float:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I meant was to enable batch processing of the boxes, so that we wouldn't need double for loop

for g_idx, g in enumerate(gt):
    for d_idx, d in enumerate(dt):

in the iou_with_jaccard function.

Copy link
Author

@rafaelpadilla rafaelpadilla Jun 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, see my explanation in the next conversation. I addressed this issue there.

"""
Calculate the Jaccard index (intersection over union) between two bounding boxes.
If a gt object is marked as "iscrowd", a dt is allowed to match any subregion
of the gt. Choosing gt' in the crowd gt that best matches the dt can be done using
gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)

Args:
box_a (List[float]): Box coordinates in the format [x, y, width, height].
box_b (List[float]): Box coordinates in the format [x, y, width, height].
iscrowd (bool): Flag indicating if the second box is a crowd region or not.

Returns:
float: Jaccard index between the two bounding boxes.
"""
# Smallest number to avoid division by zero
EPS = np.spacing(1)

xa, ya, x2a, y2a = box_a[0], box_a[1], box_a[0] + box_a[2], box_a[1] + box_a[3]
xb, yb, x2b, y2b = box_b[0], box_b[1], box_b[0] + box_b[2], box_b[1] + box_b[3]

# Innermost left x
xi = max(xa, xb)
# Innermost right x
x2i = min(x2a, x2b)
# Same for y
yi = max(ya, yb)
y2i = min(y2a, y2b)

# Calculate areas
Aa = max(x2a - xa, 0.0) * max(y2a - ya, 0.0)
Ab = max(x2b - xb, 0.0) * max(y2b - yb, 0.0)
Ai = max(x2i - xi, 0.0) * max(y2i - yi, 0.0)

if is_crowd:
return Ai / (Aa + EPS)

return Ai / (Aa + Ab - Ai + EPS)


def iou_with_jaccard(
boxes_true: List[List[float]],
boxes_detection: List[List[float]],
is_crowd: List[bool],
) -> np.ndarray:
"""
Calculate the intersection over union (IoU) between detection bounding boxes (dt)
and ground-truth bounding boxes (gt).
Reference: https://github.com/rafaelpadilla/review_object_detection_metrics

Args:
boxes_true (List[List[float]]): List of ground-truth bounding boxes in the \
format [x, y, width, height].
boxes_detection (List[List[float]]): List of detection bounding boxes in the \
format [x, y, width, height].
is_crowd (List[bool]): List indicating if each ground-truth bounding box \
is a crowd region or not.

Returns:
np.ndarray: Array of IoU values of shape (len(dt), len(gt)).
"""
assert len(is_crowd) == len(boxes_true), (
"iou(iscrowd=) must have the same length as boxes_true"
)
if len(boxes_detection) == 0 or len(boxes_true) == 0:
return np.array([])
ious = np.zeros((len(boxes_detection), len(boxes_true)), dtype=np.float64)
for g_idx, g in enumerate(boxes_true):
for d_idx, d in enumerate(boxes_detection):
ious[d_idx, g_idx] = _jaccard(d, g, is_crowd[g_idx])
return ious
Loading