From 959b9b05667f6b9a1f349bc2c9843d039e405f60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Kun=C3=A1k?=
 <38215643+Adamusen@users.noreply.github.com>
Date: Thu, 7 Nov 2024 09:29:52 +0100
Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20[Fix]=20image=20size=20order=20i?=
 =?UTF-8?q?nconsistencies=20(#118)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 🐛 [Fix] image size order inconsistencies

Fixes the expected order of image_size to be [width, height] in bounding_box_utils.py/generate_anchors function as in the rest of the repository.

Additionally fixes the same issue in the create_auto_anchor functions of the Vec2Box and Anc2Box classes, where the order does not matter, but for the sake of consistency.

* 🐛 [Fix] incorrect image_size order in ValidateModel

Fixes an additional image_size order related bug in ValidateModel, in which the PostProcessor received the image_size in [height, width] instead of [width, height] order.

* 🐛 [Fix] a variable spelling mistake

Fixes a bug in Anc2Box, where its update function was updating "self.anchor_grid" instead of "self.anchor_grids" as initialized in the __init__() function.

* 🐛 [Fix] test_anc2box_autoanchor

The asserted Tensor shapes were wrong.
The number of anchors should be half in each row as in the columns for the updated resolution of (320, 640) [width, height].
---
 tests/test_utils/test_bounding_box_utils.py |  6 +++---
 yolo/tools/solver.py                        |  3 ++-
 yolo/utils/bounding_box_utils.py            | 16 +++++++++-------
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/tests/test_utils/test_bounding_box_utils.py b/tests/test_utils/test_bounding_box_utils.py
index 88d8fe59..68cb5ea8 100644
--- a/tests/test_utils/test_bounding_box_utils.py
+++ b/tests/test_utils/test_bounding_box_utils.py
@@ -138,9 +138,9 @@ def test_anc2box_autoanchor(inference_v7_cfg: Config):
     anc2box.update((320, 640))
     anchor_grids_shape = [anchor_grid.shape for anchor_grid in anc2box.anchor_grids]
     assert anchor_grids_shape == [
-        torch.Size([1, 1, 80, 80, 2]),
-        torch.Size([1, 1, 40, 40, 2]),
-        torch.Size([1, 1, 20, 20, 2]),
+        torch.Size([1, 1, 80, 40, 2]),
+        torch.Size([1, 1, 40, 20, 2]),
+        torch.Size([1, 1, 20, 10, 2]),
     ]
     assert anc2box.anchor_scale.shape == torch.Size([3, 1, 3, 1, 1, 2])
 
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index 7d038f0a..1bf8f037 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -45,7 +45,8 @@ def val_dataloader(self):
 
     def validation_step(self, batch, batch_idx):
         batch_size, images, targets, rev_tensor, img_paths = batch
-        predicts = self.post_process(self(images), image_size=images.shape[2:])
+        H, W = images.shape[2:]
+        predicts = self.post_process(self(images), image_size=[W, H])
         batch_metrics = self.metric(
             [to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
         )
diff --git a/yolo/utils/bounding_box_utils.py b/yolo/utils/bounding_box_utils.py
index a3ef478a..20539be3 100644
--- a/yolo/utils/bounding_box_utils.py
+++ b/yolo/utils/bounding_box_utils.py
@@ -122,7 +122,7 @@ def generate_anchors(image_size: List[int], strides: List[int]):
         all_anchors [HW x 2]:
         all_scalers [HW]: The index of the best targets for each anchors
     """
-    H, W = image_size
+    W, H = image_size
     anchors = []
     scaler = []
     for stride in strides:
@@ -312,17 +312,18 @@ def __init__(self, model: YOLO, anchor_cfg: AnchorConfig, image_size, device):
         self.anchor_grid, self.scaler = anchor_grid.to(device), scaler.to(device)
 
     def create_auto_anchor(self, model: YOLO, image_size):
-        dummy_input = torch.zeros(1, 3, *image_size).to(self.device)
+        W, H = image_size
+        dummy_input = torch.zeros(1, 3, H, W).to(self.device)
         dummy_output = model(dummy_input)
         strides = []
         for predict_head in dummy_output["Main"]:
             _, _, *anchor_num = predict_head[2].shape
-            strides.append(image_size[1] // anchor_num[1])
+            strides.append(W // anchor_num[1])
         return strides
 
     def update(self, image_size):
         """
-        image_size: H, W
+        image_size: W, H
         """
         if self.image_size == image_size:
             return
@@ -365,12 +366,13 @@ def __init__(self, model: YOLO, anchor_cfg: AnchorConfig, image_size, device):
         self.class_num = model.num_classes
 
     def create_auto_anchor(self, model: YOLO, image_size):
-        dummy_input = torch.zeros(1, 3, *image_size).to(self.device)
+        W, H = image_size
+        dummy_input = torch.zeros(1, 3, H, W).to(self.device)
         dummy_output = model(dummy_input)
         strides = []
         for predict_head in dummy_output["Main"]:
             _, _, *anchor_num = predict_head.shape
-            strides.append(image_size[1] // anchor_num[1])
+            strides.append(W // anchor_num[1])
         return strides
 
     def generate_anchors(self, image_size: List[int]):
@@ -383,7 +385,7 @@ def generate_anchors(self, image_size: List[int]):
         return anchor_grids
 
     def update(self, image_size):
-        self.anchor_grid = self.generate_anchors(image_size)
+        self.anchor_grids = self.generate_anchors(image_size)
 
     def __call__(self, predicts: List[Tensor]):
         preds_box, preds_cls, preds_cnf = [], [], []