🐛 [Fix] image size order inconsistencies (#118)

* 🐛 [Fix] image size order inconsistencies Fixes the expected order of image_size to be [width, height] in bounding_box_utils.py/generate_anchors function as in the rest of the repository. Additionally fixes the same issue in the create_auto_anchor functions of the Vec2Box and Anc2Box classes, where the order does not matter, but for the sake of consistency. * 🐛 [Fix] incorrect image_size order in ValidateModel Fixes an additional image_size order related bug in ValidateModel, in which the PostProcessor received the image_size in [height, width] instead of [width, height] order. * 🐛 [Fix] a variable spelling mistake Fixes a bug in Anc2Box, where its update function was updating "self.anchor_grid" instead of "self.anchor_grids" as initialized in the __init__() function. * 🐛 [Fix] test_anc2box_autoanchor The asserted Tensor shapes were wrong. The number of anchors should be half in each row as in the columns for the updated resolution of (320, 640) [width, height].
MultimediaTechLab · Nov 7, 2024 · 959b9b0 · 959b9b0
1 parent 2522f72
commit 959b9b0
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 11 deletions.
diff --git a/tests/test_utils/test_bounding_box_utils.py b/tests/test_utils/test_bounding_box_utils.py
@@ -138,9 +138,9 @@ def test_anc2box_autoanchor(inference_v7_cfg: Config):
     anc2box.update((320, 640))
     anchor_grids_shape = [anchor_grid.shape for anchor_grid in anc2box.anchor_grids]
     assert anchor_grids_shape == [
-        torch.Size([1, 1, 80, 80, 2]),
-        torch.Size([1, 1, 40, 40, 2]),
-        torch.Size([1, 1, 20, 20, 2]),
+        torch.Size([1, 1, 80, 40, 2]),
+        torch.Size([1, 1, 40, 20, 2]),
+        torch.Size([1, 1, 20, 10, 2]),
     ]
     assert anc2box.anchor_scale.shape == torch.Size([3, 1, 3, 1, 1, 2])
 

diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
@@ -45,7 +45,8 @@ def val_dataloader(self):
 
     def validation_step(self, batch, batch_idx):
         batch_size, images, targets, rev_tensor, img_paths = batch
-        predicts = self.post_process(self(images), image_size=images.shape[2:])
+        H, W = images.shape[2:]
+        predicts = self.post_process(self(images), image_size=[W, H])
         batch_metrics = self.metric(
             [to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
         )

diff --git a/yolo/utils/bounding_box_utils.py b/yolo/utils/bounding_box_utils.py
@@ -122,7 +122,7 @@ def generate_anchors(image_size: List[int], strides: List[int]):
         all_anchors [HW x 2]:
         all_scalers [HW]: The index of the best targets for each anchors
     """
-    H, W = image_size
+    W, H = image_size
     anchors = []
     scaler = []
     for stride in strides:
@@ -312,17 +312,18 @@ def __init__(self, model: YOLO, anchor_cfg: AnchorConfig, image_size, device):
         self.anchor_grid, self.scaler = anchor_grid.to(device), scaler.to(device)
 
     def create_auto_anchor(self, model: YOLO, image_size):
-        dummy_input = torch.zeros(1, 3, *image_size).to(self.device)
+        W, H = image_size
+        dummy_input = torch.zeros(1, 3, H, W).to(self.device)
         dummy_output = model(dummy_input)
         strides = []
         for predict_head in dummy_output["Main"]:
             _, _, *anchor_num = predict_head[2].shape
-            strides.append(image_size[1] // anchor_num[1])
+            strides.append(W // anchor_num[1])
         return strides
 
     def update(self, image_size):
         """
-        image_size: H, W
+        image_size: W, H
         """
         if self.image_size == image_size:
             return
@@ -365,12 +366,13 @@ def __init__(self, model: YOLO, anchor_cfg: AnchorConfig, image_size, device):
         self.class_num = model.num_classes
 
     def create_auto_anchor(self, model: YOLO, image_size):
-        dummy_input = torch.zeros(1, 3, *image_size).to(self.device)
+        W, H = image_size
+        dummy_input = torch.zeros(1, 3, H, W).to(self.device)
         dummy_output = model(dummy_input)
         strides = []
         for predict_head in dummy_output["Main"]:
             _, _, *anchor_num = predict_head.shape
-            strides.append(image_size[1] // anchor_num[1])
+            strides.append(W // anchor_num[1])
         return strides
 
     def generate_anchors(self, image_size: List[int]):
@@ -383,7 +385,7 @@ def generate_anchors(self, image_size: List[int]):
         return anchor_grids
 
     def update(self, image_size):
-        self.anchor_grid = self.generate_anchors(image_size)
+        self.anchor_grids = self.generate_anchors(image_size)
 
     def __call__(self, predicts: List[Tensor]):
         preds_box, preds_cls, preds_cnf = [], [], []