Support GradCAM (#7626)

xs1997zju · nemonameless · web-flow · commit ffce1218c1f1 · 2023-02-01T11:43:25.000+08:00
* fix slice infer one image save_results (#7654) * Support GradCAM Cascade_rcnn forward bugfix * code style fix * BBoxCAM class name fix * Add gradcam tutorial and demo --------- Co-authored-by: Feng Ni <nemonameless@qq.com>
diff --git a/docs/images/grad_cam_ppyoloe_demo.jpg b/docs/images/grad_cam_ppyoloe_demo.jpg
diff --git a/docs/tutorials/GradCAM_cn.md b/docs/tutorials/GradCAM_cn.md
@@ -0,0 +1,37 @@
+# 目标检测热力图
+
+## 1.简介
+
+基于backbone特征图计算物体预测框的cam(类激活图)
+
+## 2.使用方法
+* 以PP-YOLOE为例，准备好数据之后，指定网络配置文件、模型权重地址和图片路径以及输出文件夹路径，使用脚本调用tools/cam_ppdet.py计算图片中物体预测框的grad_cam热力图。下面为运行脚本示例。
+```shell
+python tools/cam_ppdet.py -c configs/ppyoloe/ppyoloe_crn_l_300e_coco.yml --infer_img demo/000000014439.jpg --cam_out cam_ppyoloe -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_300e_coco.pdparams
+```
+
+* **参数**
+
+|         FLAG             |                                                  用途                                                   |
+| :----------------------: |:-----------------------------------------------------------------------------------------------------:|
+|          -c              |                                                指定配置文件                                                 |
+|          --infer_img              |                                               用于预测的图片路径                                               |
+|          --cam_out              |                                                指定输出路径                                                 |
+|          -o              | 设置或更改配置文件里的参数内容, 如 -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_300e_coco.pdparams |
+
+* 运行效果
+
+<center>
+<img src="../images/grad_cam_ppyoloe_demo.jpg" width="500" >
+</center>
+<br><center>cam_ppyoloe/225.jpg</center></br>
+
+## 3. 目前支持基于FasterRCNN和YOLOv3系列的网络。
+* FasterRCNN网络热图可视化脚本
+```bash
+python tools/cam_ppdet.py -c configs/faster_rcnn/faster_rcnn_r50_vd_fpn_2x_coco.yml --infer_img demo/000000014439.jpg  --cam_out cam_faster_rcnn -o weights=https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams
+```
+* PPYOLOE网络热图可视化脚本
+```bash
+python tools/cam_ppdet.py -c configs/ppyoloe/ppyoloe_crn_l_300e_coco.yml --infer_img demo/000000014439.jpg --cam_out cam_ppyoloe -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_300e_coco.pdparams
+```
diff --git a/docs/tutorials/GradCAM_en.md b/docs/tutorials/GradCAM_en.md
@@ -0,0 +1,37 @@
+# Object detection grad_cam heatmap
+
+## 1.Introduction
+Calculate the cam (class activation map) of the object predict bbox based on the backbone feature map
+
+## 2.Usage
+* Taking PP-YOLOE as an example, after preparing the data, specify the network configuration file, model weight address, image path and output folder path, and then use the script to call tools/cam_ppdet.py to calculate the grad_cam heat map of the prediction box. Below is an example run script.
+```shell
+python tools/cam_ppdet.py -c configs/ppyoloe/ppyoloe_crn_l_300e_coco.yml --infer_img demo/000000014439.jpg --cam_out cam_ppyoloe -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_300e_coco.pdparams
+```
+
+* **Arguments**
+
+|         FLAG             |                                                            description                                                            |
+| :----------------------: |:---------------------------------------------------------------------------------------------------------------------------------:|
+|          -c              |                                                        Select config file                                                         |
+|          --infer_img              |                                                            Image path                                                             |
+|          --cam_out              |                                                       Directory for output                                                        |
+|          -o              | Set parameters in configure file, for example: -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_300e_coco.pdparams |
+
+* result
+
+<center>
+<img src="../images/grad_cam_ppyoloe_demo.jpg" width="500" >
+</center>
+<br><center>cam_ppyoloe/225.jpg</center></br>
+
+
+## 3.Currently supports networks based on FasterRCNN and YOLOv3 series.
+* FasterRCNN bbox heat map visualization script
+```bash
+python tools/cam_ppdet.py -c configs/faster_rcnn/faster_rcnn_r50_vd_fpn_2x_coco.yml --infer_img demo/000000014439.jpg  --cam_out cam_faster_rcnn -o weights=https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams
+```
+* PPYOLOE bbox heat map visualization script
+```bash
+python tools/cam_ppdet.py -c configs/ppyoloe/ppyoloe_crn_l_300e_coco.yml --infer_img demo/000000014439.jpg --cam_out cam_ppyoloe -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_300e_coco.pdparams
+```
diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py
@@ -1039,7 +1039,7 @@ def setup_metrics_for_loader():
                     image.save(save_name, quality=95)
 
                     start = end
-
+        return results
     def _get_save_image_name(self, output_dir, image_path):
         """
         Get save image name from source image path.
diff --git a/ppdet/modeling/architectures/cascade_rcnn.py b/ppdet/modeling/architectures/cascade_rcnn.py
@@ -108,7 +108,7 @@ def _forward(self):
             im_shape = self.inputs['im_shape']
             scale_factor = self.inputs['scale_factor']
 
-            bbox, bbox_num = self.bbox_post_process(
+            bbox, bbox_num, before_nms_indexes = self.bbox_post_process(
                 preds, (refined_rois, rois_num), im_shape, scale_factor)
             # rescale the prediction back to origin image
             bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
diff --git a/ppdet/modeling/architectures/faster_rcnn.py b/ppdet/modeling/architectures/faster_rcnn.py
@@ -82,18 +82,21 @@ def _forward(self):
                                           self.inputs)
             return rpn_loss, bbox_loss
         else:
+            cam_data = {}  # record bbox scores and index before nms
             rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
             preds, _ = self.bbox_head(body_feats, rois, rois_num, None)
+            cam_data['scores'] = preds[1]
 
             im_shape = self.inputs['im_shape']
             scale_factor = self.inputs['scale_factor']
-            bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
+            bbox, bbox_num, before_nms_indexes = self.bbox_post_process(preds, (rois, rois_num),
                                                     im_shape, scale_factor)
+            cam_data['before_nms_indexes'] = before_nms_indexes  # , bbox index before nms, for cam
 
             # rescale the prediction back to origin image
             bboxes, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
                 bbox, bbox_num, im_shape, scale_factor)
-            return bbox_pred, bbox_num
+            return bbox_pred, bbox_num, cam_data
 
     def get_loss(self, ):
         rpn_loss, bbox_loss = self._forward()
@@ -105,8 +108,8 @@ def get_loss(self, ):
         return loss
 
     def get_pred(self):
-        bbox_pred, bbox_num = self._forward()
-        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
+        bbox_pred, bbox_num, cam_data = self._forward()
+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'cam_data': cam_data}
         return output
 
     def target_bbox_forward(self, data):
diff --git a/ppdet/modeling/architectures/yolo.py b/ppdet/modeling/architectures/yolo.py
@@ -98,7 +98,9 @@ def _forward(self):
                 return yolo_losses
 
         else:
+            cam_data = {} # record bbox scores and index before nms
             yolo_head_outs = self.yolo_head(neck_feats)
+            cam_data['scores'] = yolo_head_outs[0]
 
             if self.for_mot:
                 # the detection part of JDE MOT model
@@ -118,14 +120,17 @@ def _forward(self):
                         yolo_head_outs, self.yolo_head.mask_anchors)
                 elif self.post_process is not None:
                     # anchor based YOLOs: YOLOv3,PP-YOLO,PP-YOLOv2 use mask_anchors
-                    bbox, bbox_num = self.post_process(
+                    bbox, bbox_num, before_nms_indexes = self.post_process(
                         yolo_head_outs, self.yolo_head.mask_anchors,
                         self.inputs['im_shape'], self.inputs['scale_factor'])
+                    cam_data['before_nms_indexes'] = before_nms_indexes
                 else:
                     # anchor free YOLOs: PP-YOLOE, PP-YOLOE+
-                    bbox, bbox_num = self.yolo_head.post_process(
+                    bbox, bbox_num, before_nms_indexes = self.yolo_head.post_process(
                         yolo_head_outs, self.inputs['scale_factor'])
-                output = {'bbox': bbox, 'bbox_num': bbox_num}
+                    # data for cam 
+                    cam_data['before_nms_indexes'] = before_nms_indexes
+                output = {'bbox': bbox, 'bbox_num': bbox_num, 'cam_data': cam_data}
 
             return output
 
diff --git a/ppdet/modeling/heads/ppyoloe_head.py b/ppdet/modeling/heads/ppyoloe_head.py
@@ -462,8 +462,8 @@ def post_process(self, head_outs, scale_factor):
                 # `exclude_nms=True` just use in benchmark
                 return pred_bboxes, pred_scores
             else:
-                bbox_pred, bbox_num, _ = self.nms(pred_bboxes, pred_scores)
-                return bbox_pred, bbox_num
+                bbox_pred, bbox_num, before_nms_indexes = self.nms(pred_bboxes, pred_scores)
+                return bbox_pred, bbox_num, before_nms_indexes
 
 
 def get_activation(name="LeakyReLU"):
diff --git a/ppdet/modeling/post_process.py b/ppdet/modeling/post_process.py
@@ -67,7 +67,7 @@ def __call__(self, head_out, rois, im_shape, scale_factor):
         """
         if self.nms is not None:
             bboxes, score = self.decode(head_out, rois, im_shape, scale_factor)
-            bbox_pred, bbox_num, _ = self.nms(bboxes, score, self.num_classes)
+            bbox_pred, bbox_num, before_nms_indexes = self.nms(bboxes, score, self.num_classes)
 
         else:
             bbox_pred, bbox_num = self.decode(head_out, rois, im_shape,
@@ -82,7 +82,10 @@ def __call__(self, head_out, rois, im_shape, scale_factor):
             bbox_pred = paddle.concat([bbox_pred, fake_bboxes])
             bbox_num = bbox_num + 1
 
-        return bbox_pred, bbox_num
+        if self.nms is not None:
+            return bbox_pred, bbox_num, before_nms_indexes
+        else:
+            return bbox_pred, bbox_num
 
     def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
         """
diff --git a/ppdet/utils/cam_utils.py b/ppdet/utils/cam_utils.py
diff --git a/tools/cam_ppdet.py b/tools/cam_ppdet.py