diff --git a/.circleci/test.yml b/.circleci/test.yml
index 5498c3b20..7d08ccf3a 100644
--- a/.circleci/test.yml
+++ b/.circleci/test.yml
@@ -99,7 +99,7 @@ jobs:
         type: string
       cuda:
         type: enum
-        enum: ["10.1", "10.2", "11.1","11.0"]
+        enum: ["10.1", "10.2", "11.1", "11.0"]
       cudnn:
         type: integer
         default: 7
@@ -151,8 +151,7 @@ workflows:
 
   pr_stage_test:
     when:
-      not:
-        << pipeline.parameters.lint_only >>
+      not: << pipeline.parameters.lint_only >>
     jobs:
       - lint:
           name: lint
@@ -164,7 +163,7 @@ workflows:
           name: minimum_version_cpu
           torch: 1.8.0
           torchvision: 0.9.0
-          python: 3.8.0  # The lowest python 3.6.x version available on CircleCI images
+          python: 3.8.0 # The lowest python 3.7.x version available on CircleCI images
           requires:
             - lint
       - build_cpu:
@@ -188,8 +187,7 @@ workflows:
             - hold
   merge_stage_test:
     when:
-      not:
-        << pipeline.parameters.lint_only >>
+      not: << pipeline.parameters.lint_only >>
     jobs:
       - build_cuda:
           name: minimum_version_gpu
diff --git a/README.md b/README.md
index 163985b57..e5e8af2c9 100644
--- a/README.md
+++ b/README.md
@@ -71,14 +71,32 @@ And the figure of P6 model is in [model_design.md](docs/en/algorithm_description
 
 ## What's New
 
-💎 **v0.2.0** was released on 1/12/2022:
+### Highlight
 
-1. Support [YOLOv7](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov7) P5 and P6 model
-2. Support [YOLOv6](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov6/README.md) ML model
-3. Support [Grad-Based CAM and Grad-Free CAM](https://github.com/open-mmlab/mmyolo/blob/dev/demo/boxam_vis_demo.py)
-4. Support [large image inference](https://github.com/open-mmlab/mmyolo/blob/dev/demo/large_image_demo.py) based on sahi
-5. Add [easydeploy](https://github.com/open-mmlab/mmyolo/blob/dev/projects/easydeploy/README.md) project under the projects folder
-6. Add [custom dataset guide](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/user_guides/custom_dataset.md)
+We are excited to announce our latest work on real-time object recognition tasks, **RTMDet**, a family of fully convolutional single-stage detectors. RTMDet not only achieves the best parameter-accuracy trade-off on object detection from tiny to extra-large model sizes but also obtains new state-of-the-art performance on instance segmentation and rotated object detection tasks. Details can be found in the [technical report](https://arxiv.org/abs/2212.07784). Pre-trained models are [here](configs/rtmdet).
+
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/real-time-instance-segmentation-on-mscoco)](https://paperswithcode.com/sota/real-time-instance-segmentation-on-mscoco?p=rtmdet-an-empirical-study-of-designing-real)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/object-detection-in-aerial-images-on-dota-1)](https://paperswithcode.com/sota/object-detection-in-aerial-images-on-dota-1?p=rtmdet-an-empirical-study-of-designing-real)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/object-detection-in-aerial-images-on-hrsc2016)](https://paperswithcode.com/sota/object-detection-in-aerial-images-on-hrsc2016?p=rtmdet-an-empirical-study-of-designing-real)
+
+| Task                     | Dataset | AP                                   | FPS(TRT FP16 BS1 3090) |
+| ------------------------ | ------- | ------------------------------------ | ---------------------- |
+| Object Detection         | COCO    | 52.8                                 | 322                    |
+| Instance Segmentation    | COCO    | 44.6                                 | 188                    |
+| Rotated Object Detection | DOTA    | 78.9(single-scale)/81.3(multi-scale) | 121                    |
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/12907710/208044554-1e8de6b5-48d8-44e4-a7b5-75076c7ebb71.png"/>
+</div>
+
+MMYOLO currently only implements the object detection algorithm, but it has a significant training acceleration compared to the MMDeteciton version. The training speed is 2.6 times faster than the previous version.
+
+💎 **v0.3.0** was released on 8/1/2023:
+
+1. Implement fast version of [RTMDet](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md). RTMDet-s 8xA100 training takes only 14 hours. The training speed is 2.6 times faster than the previous version.
+2. Support [PPYOLOE](https://github.com/open-mmlab/mmyolo/blob/dev/configs/ppyoloe/README.md) training
+3. Support `iscrowd` attribute training in [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py)
+4. Support [YOLOv5 assigner result visualization](https://github.com/open-mmlab/mmyolo/blob/dev/projects/assigner_visualization/README.md)
 
 For release history and update details, please refer to [changelog](https://mmyolo.readthedocs.io/en/latest/notes/changelog.html).
 
@@ -92,7 +110,7 @@ conda activate open-mmlab
 pip install openmim
 mim install "mmengine>=0.3.1"
 mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc3,<3.1.0"
+mim install "mmdet>=3.0.0rc5,<3.1.0"
 git clone https://github.com/open-mmlab/mmyolo.git
 cd mmyolo
 # Install albumentations
@@ -152,7 +170,7 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
 - [x] [RTMDet](configs/rtmdet)
 - [x] [YOLOv6](configs/yolov6)
 - [x] [YOLOv7](configs/yolov7)
-- [ ] [PPYOLOE](configs/ppyoloe)(Inference only)
+- [x] [PPYOLOE](configs/ppyoloe)
 
 </details>
 
@@ -183,6 +201,8 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
         <li>YOLOXCSPDarknet</li>
         <li>EfficientRep</li>
         <li>CSPNeXt</li>
+        <li>YOLOv7Backbone</li>
+        <li>PPYOLOECSPResNet</li>
       </ul>
       </td>
       <td>
@@ -191,6 +211,8 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
         <li>YOLOv6RepPAFPN</li>
         <li>YOLOXPAFPN</li>
         <li>CSPNeXtPAFPN</li>
+        <li>YOLOv7PAFPN</li>
+        <li>PPYOLOECSPPAFPN</li>
       </ul>
       </td>
       <td>
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 430400714..3c0fa5add 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -71,25 +71,46 @@ P6 模型图详见 [model_design.md](docs/zh_CN/algorithm_descriptions/model_des
 
 ## 最新进展
 
-💎 **v0.2.0** 版本已经在 2022.12.1 发布：
+### 亮点
 
-1. 支持 [YOLOv7](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov7) P5 和 P6 模型
-2. 支持 [YOLOv6](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov6/README.md) 中的 ML 大模型
-3. 支持 [Grad-Based CAM 和 Grad-Free CAM](https://github.com/open-mmlab/mmyolo/blob/dev/demo/boxam_vis_demo.py)
-4. 基于 sahi 支持 [大图推理](https://github.com/open-mmlab/mmyolo/blob/dev/demo/large_image_demo.py)
-5. projects 文件夹下新增 [easydeploy](https://github.com/open-mmlab/mmyolo/blob/dev/projects/easydeploy/README.md) 项目
-6. 新增 [自定义数据集教程](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/user_guides/custom_dataset.md)
+我们很高兴向大家介绍我们在实时目标识别任务方面的最新成果 RTMDet，包含了一系列的全卷积单阶段检测模型。 RTMDet 不仅在从 tiny 到 extra-large 尺寸的目标检测模型上实现了最佳的参数量和精度的平衡，而且在实时实例分割和旋转目标检测任务上取得了最先进的成果。 更多细节请参阅[技术报告](https://arxiv.org/abs/2212.07784)。 预训练模型可以在[这里](configs/rtmdet)找到。
+
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/real-time-instance-segmentation-on-mscoco)](https://paperswithcode.com/sota/real-time-instance-segmentation-on-mscoco?p=rtmdet-an-empirical-study-of-designing-real)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/object-detection-in-aerial-images-on-dota-1)](https://paperswithcode.com/sota/object-detection-in-aerial-images-on-dota-1?p=rtmdet-an-empirical-study-of-designing-real)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/object-detection-in-aerial-images-on-hrsc2016)](https://paperswithcode.com/sota/object-detection-in-aerial-images-on-hrsc2016?p=rtmdet-an-empirical-study-of-designing-real)
+
+| Task                     | Dataset | AP                                   | FPS(TRT FP16 BS1 3090) |
+| ------------------------ | ------- | ------------------------------------ | ---------------------- |
+| Object Detection         | COCO    | 52.8                                 | 322                    |
+| Instance Segmentation    | COCO    | 44.6                                 | 188                    |
+| Rotated Object Detection | DOTA    | 78.9(single-scale)/81.3(multi-scale) | 121                    |
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/12907710/208044554-1e8de6b5-48d8-44e4-a7b5-75076c7ebb71.png"/>
+</div>
+
+MMYOLO 中目前仅仅实现了目标检测算法，但是相比 MMDeteciton 版本有显著训练加速，训练速度相比原先版本提升 2.6 倍。
+
+💎 **v0.3.0** 版本已经在 2023.1.8 发布：
+
+1. 实现了 [RTMDet](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md) 的快速版本。RTMDet-s 8xA100 训练只需要 14 个小时，训练速度相比原先版本提升 2.6 倍。
+2. 支持 [PPYOLOE](https://github.com/open-mmlab/mmyolo/blob/dev/configs/ppyoloe/README.md) 训练。
+3. 支持 [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py) 的 `iscrowd` 属性训练。
+4. 支持 [YOLOv5 正样本分配结果可视化](https://github.com/open-mmlab/mmyolo/blob/dev/projects/assigner_visualization/README.md)
+5. 新增 [YOLOv6 原理和实现全解析文档](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/algorithm_descriptions/yolov6_description.md)
 
 同时我们也推出了解读视频：
 
-|     |            内容            |                                                                                                                                                                                                      视频                                                                                                                                                                                                      |                                                                                                         课程中的代码                                                                                                          |
-| :-: | :------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| 🌟  |        特征图可视化        | [![Link](https://i2.hdslb.com/bfs/archive/480a0eb41fce26e0acb65f82a74501418eee1032.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV188411s7o8)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV188411s7o8)](https://www.bilibili.com/video/BV188411s7o8)  | [特征图可视化.ipynb](https://github.com/open-mmlab/OpenMMLabCourse/blob/main/codes/MMYOLO_tutorials/%5B%E5%B7%A5%E5%85%B7%E7%B1%BB%E7%AC%AC%E4%B8%80%E6%9C%9F%5D%E7%89%B9%E5%BE%81%E5%9B%BE%E5%8F%AF%E8%A7%86%E5%8C%96.ipynb) |
-| 🌟  |     特征图可视化 Demo      | [![Link](http://i0.hdslb.com/bfs/archive/081f300c84d6556f40d984cfbe801fc0644ff449.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1je4y1478R/)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1je4y1478R)](https://www.bilibili.com/video/BV1je4y1478R/) |                                                                                                                                                                                                                               |
-| 🌟  |         配置全解读         |  [![Link](http://i1.hdslb.com/bfs/archive/e06daf640ea39b3c0700bb4dc758f1a253f33e13.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1214y157ck)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1214y157ck)](https://www.bilibili.com/video/BV1214y157ck)  |                                                                                   [配置全解读文档](https://zhuanlan.zhihu.com/p/577715188)                                                                                    |
-| 🌟  | 源码阅读和调试「必备」技巧 | [![Link](https://i2.hdslb.com/bfs/archive/790d2422c879ff20488910da1c4422b667ea6af7.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1N14y1V7mB)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1N14y1V7mB)](https://www.bilibili.com/video/BV1N14y1V7mB)  |                                                                           [源码阅读和调试「必备」技巧文档](https://zhuanlan.zhihu.com/p/580885852)                                                                            |
-| 🌟  |      工程文件结构简析      |   [![Link](http://i2.hdslb.com/bfs/archive/41030efb84d0cada06d5451c1e6e9bccc0cdb5a3.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1LP4y117jS)[![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1LP4y117jS)](https://www.bilibili.com/video/BV1LP4y117jS)   |                                                                                [工程文件结构简析文档](https://zhuanlan.zhihu.com/p/584807195)                                                                                 |
-| 🌟  |     10分钟换遍主干网络     |  [![Link](http://i0.hdslb.com/bfs/archive/c51f1aef7c605856777249a7b4478f44bd69f3bd.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1JG4y1d7GC)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1JG4y1d7GC)](https://www.bilibili.com/video/BV1JG4y1d7GC)  |     [10分钟换遍主干网络文档](https://zhuanlan.zhihu.com/p/585641598)<br>[10分钟换遍主干网络.ipynb](https://github.com/open-mmlab/OpenMMLabCourse/blob/main/codes/MMYOLO_tutorials/[实用类第二期]10分钟换遍主干网络.ipynb)     |
+|     |                内容                |                                                                                                                                                                                                      视频                                                                                                                                                                                                       |                                                                                                         课程中的代码                                                                                                          |
+| :-: | :--------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| 🌟  |            特征图可视化            |  [![Link](https://i2.hdslb.com/bfs/archive/480a0eb41fce26e0acb65f82a74501418eee1032.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV188411s7o8)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV188411s7o8)](https://www.bilibili.com/video/BV188411s7o8)  | [特征图可视化.ipynb](https://github.com/open-mmlab/OpenMMLabCourse/blob/main/codes/MMYOLO_tutorials/%5B%E5%B7%A5%E5%85%B7%E7%B1%BB%E7%AC%AC%E4%B8%80%E6%9C%9F%5D%E7%89%B9%E5%BE%81%E5%9B%BE%E5%8F%AF%E8%A7%86%E5%8C%96.ipynb) |
+| 🌟  |         特征图可视化 Demo          | [![Link](http://i0.hdslb.com/bfs/archive/081f300c84d6556f40d984cfbe801fc0644ff449.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1je4y1478R/)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1je4y1478R)](https://www.bilibili.com/video/BV1je4y1478R/)  |                                                                                                                                                                                                                               |
+| 🌟  |             配置全解读             |  [![Link](http://i1.hdslb.com/bfs/archive/e06daf640ea39b3c0700bb4dc758f1a253f33e13.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1214y157ck)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1214y157ck)](https://www.bilibili.com/video/BV1214y157ck)   |                                                                                   [配置全解读文档](https://zhuanlan.zhihu.com/p/577715188)                                                                                    |
+| 🌟  |     源码阅读和调试「必备」技巧     |  [![Link](https://i2.hdslb.com/bfs/archive/790d2422c879ff20488910da1c4422b667ea6af7.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1N14y1V7mB)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1N14y1V7mB)](https://www.bilibili.com/video/BV1N14y1V7mB)  |                                                                           [源码阅读和调试「必备」技巧文档](https://zhuanlan.zhihu.com/p/580885852)                                                                            |
+| 🌟  |          工程文件结构简析          |   [![Link](http://i2.hdslb.com/bfs/archive/41030efb84d0cada06d5451c1e6e9bccc0cdb5a3.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1LP4y117jS)[![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1LP4y117jS)](https://www.bilibili.com/video/BV1LP4y117jS)    |                                                                                [工程文件结构简析文档](https://zhuanlan.zhihu.com/p/584807195)                                                                                 |
+| 🌟  |         10分钟换遍主干网络         |  [![Link](http://i0.hdslb.com/bfs/archive/c51f1aef7c605856777249a7b4478f44bd69f3bd.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1JG4y1d7GC)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1JG4y1d7GC)](https://www.bilibili.com/video/BV1JG4y1d7GC)   |     [10分钟换遍主干网络文档](https://zhuanlan.zhihu.com/p/585641598)<br>[10分钟换遍主干网络.ipynb](https://github.com/open-mmlab/OpenMMLabCourse/blob/main/codes/MMYOLO_tutorials/[实用类第二期]10分钟换遍主干网络.ipynb)     |
+| 🌟  |        基于 sahi 的大图推理        | [![Link](https://i0.hdslb.com/bfs/archive/62c41f508dbcf63a4c721738171612d2d7069ac2.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1EK411R7Ws/)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1EK411R7Ws)](https://www.bilibili.com/video/BV1EK411R7Ws/) |                                   [10分钟轻松掌握大图推理.ipynb](https://github.com/open-mmlab/OpenMMLabCourse/blob/main/codes/MMYOLO_tutorials/[工具类第二期]10分钟轻松掌握大图推理.ipynb)                                   |
+| 🌟  | 自定义数据集从标注到部署保姆级教程 |  [![Link](https://i2.hdslb.com/bfs/archive/13f566c89a18c9c881713b63ec14da952d4c0b14.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1RG4y137i5)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1RG4y137i5)](https://www.bilibili.com/video/BV1JG4y1d7GC)  |                                                 [自定义数据集从标注到部署保姆级教程](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/user_guides/custom_dataset.md)                                                  |
 
 发布历史和更新细节请参考 [更新日志](https://mmyolo.readthedocs.io/zh_CN/latest/notes/changelog.html)
 
@@ -103,7 +124,7 @@ conda activate open-mmlab
 pip install openmim
 mim install "mmengine>=0.3.1"
 mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc3,<3.1.0"
+mim install "mmdet>=3.0.0rc5,<3.1.0"
 git clone https://github.com/open-mmlab/mmyolo.git
 cd mmyolo
 # Install albumentations
@@ -149,6 +170,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 
 - 进阶指南
 
+  - [模块组合](docs/zh_cn/advanced_guides/module_combination.md)
   - [数据流](docs/zh_cn/advanced_guides/data_flow.md)
   - [How to](docs/zh_cn/advanced_guides/how_to.md)
   - [插件](docs/zh_cn/advanced_guides/plugins.md)
@@ -167,7 +189,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 - [x] [RTMDet](configs/rtmdet)
 - [x] [YOLOv6](configs/yolov6)
 - [x] [YOLOv7](configs/yolov7)
-- [ ] [PPYOLOE](configs/ppyoloe)(仅推理)
+- [x] [PPYOLOE](configs/ppyoloe)
 
 </details>
 
@@ -198,6 +220,8 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
         <li>YOLOXCSPDarknet</li>
         <li>EfficientRep</li>
         <li>CSPNeXt</li>
+        <li>YOLOv7Backbone</li>
+        <li>PPYOLOECSPResNet</li>
       </ul>
       </td>
       <td>
@@ -206,6 +230,8 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
         <li>YOLOv6RepPAFPN</li>
         <li>YOLOXPAFPN</li>
         <li>CSPNeXtPAFPN</li>
+        <li>YOLOv7PAFPN</li>
+        <li>PPYOLOECSPPAFPN</li>
       </ul>
       </td>
       <td>
diff --git a/configs/ppyoloe/README.md b/configs/ppyoloe/README.md
new file mode 100644
index 000000000..a7b232275
--- /dev/null
+++ b/configs/ppyoloe/README.md
@@ -0,0 +1,38 @@
+# PPYOLOE
+
+<!-- [ALGORITHM] -->
+
+## Abstract
+
+PP-YOLOE is an excellent single-stage anchor-free model based on PP-YOLOv2, surpassing a variety of popular YOLO models. PP-YOLOE has a series of models, named s/m/l/x, which are configured through width multiplier and depth multiplier. PP-YOLOE avoids using special operators, such as Deformable Convolution or Matrix NMS, to be deployed friendly on various hardware.
+
+<div align=center>
+<img src="https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/docs/images/ppyoloe_plus_map_fps.png" width="600" />
+</div>
+
+## Results and models
+
+### PPYOLOE+ COCO
+
+|  Backbone   | Arch | Size | Epoch | SyncBN | Mem (GB) | Box AP |                          Config                           |                                                                                                                                                      Download                                                                                                                                                      |
+| :---------: | :--: | :--: | :---: | :----: | :------: | :----: | :-------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| PPYOLOE+ -s |  P5  | 640  |  80   |  Yes   |   4.7    |  43.5  | [config](../ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco/ppyoloe_plus_s_fast_8xb8-80e_coco_20230101_154052-9fee7619.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco/ppyoloe_plus_s_fast_8xb8-80e_coco_20230101_154052.log.json) |
+| PPYOLOE+ -m |  P5  | 640  |  80   |  Yes   |   8.4    |  49.5  | [config](../ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco/ppyoloe_plus_m_fast_8xb8-80e_coco_20230104_193132-e4325ada.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco/ppyoloe_plus_m_fast_8xb8-80e_coco_20230104_193132.log.json) |
+| PPYOLOE+ -l |  P5  | 640  |  80   |  Yes   |   13.2   |  52.6  | [config](../ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco/ppyoloe_plus_l_fast_8xb8-80e_coco_20230102_203825-1864e7b3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco/ppyoloe_plus_l_fast_8xb8-80e_coco_20230102_203825.log.json) |
+| PPYOLOE+ -x |  P5  | 640  |  80   |  Yes   |   19.1   |  54.2  | [config](../ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco/ppyoloe_plus_x_fast_8xb8-80e_coco_20230104_194921-8c953949.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco/ppyoloe_plus_x_fast_8xb8-80e_coco_20230104_194921.log.json) |
+
+**Note**:
+
+1. The above Box APs are all models with the best performance in COCO
+2. The gap between the above performance and the official release is about 0.3. To speed up training in mmyolo, we use pytorch to implement the image resizing in `PPYOLOEBatchRandomResize` for multi-scale training, while official PPYOLOE use opencv. And `lanczos4` is not yet supported in `PPYOLOEBatchRandomResize`. The above two reasons lead to the gap. We will continue to experiment and address the gap in future releases.
+3. The mAP of the non-Plus version needs more verification, and we will update more details of the non-Plus version in future versions.
+
+```latex
+@article{Xu2022PPYOLOEAE,
+  title={PP-YOLOE: An evolved version of YOLO},
+  author={Shangliang Xu and Xinxin Wang and Wenyu Lv and Qinyao Chang and Cheng Cui and Kaipeng Deng and Guanzhong Wang and Qingqing Dang and Shengyun Wei and Yuning Du and Baohua Lai},
+  journal={ArXiv},
+  year={2022},
+  volume={abs/2203.16250}
+}
+```
diff --git a/configs/ppyoloe/metafile.yml b/configs/ppyoloe/metafile.yml
new file mode 100644
index 000000000..5b7ed9487
--- /dev/null
+++ b/configs/ppyoloe/metafile.yml
@@ -0,0 +1,69 @@
+Collections:
+  - Name: PPYOLOE
+    Metadata:
+      Training Data: COCO
+      Training Techniques:
+        - SGD with Nesterov
+        - Weight Decay
+        - Synchronize BN
+      Training Resources: 8x A100 GPUs
+      Architecture:
+        - PPYOLOECSPResNet
+        - PPYOLOECSPPAFPN
+    Paper:
+      URL: https://arxiv.org/abs/2203.16250
+      Title: 'PP-YOLOE: An evolved version of YOLO'
+    README: configs/ppyoloe/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmyolo/blob/v0.0.1/mmyolo/models/detectors/yolo_detector.py#L12
+      Version: v0.0.1
+
+Models:
+  - Name: ppyoloe_plus_s_fast_8xb8-80e_coco
+    In Collection: PPYOLOE
+    Config: configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py
+    Metadata:
+      Training Memory (GB): 4.7
+      Epochs: 80
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 43.5
+    Weights: https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco/ppyoloe_plus_s_fast_8xb8-80e_coco_20230101_154052-9fee7619.pth
+  - Name: ppyoloe_plus_m_fast_8xb8-80e_coco
+    In Collection: PPYOLOE
+    Config: configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco.py
+    Metadata:
+      Training Memory (GB): 8.4
+      Epochs: 80
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 49.5
+    Weights: https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco/ppyoloe_plus_m_fast_8xb8-80e_coco_20230104_193132-e4325ada.pth
+  - Name: ppyoloe_plus_L_fast_8xb8-80e_coco
+    In Collection: PPYOLOE
+    Config: configs/ppyoloe/ppyoloe_plus_L_fast_8xb8-80e_coco.py
+    Metadata:
+      Training Memory (GB): 13.2
+      Epochs: 80
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 52.6
+    Weights: https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco/ppyoloe_plus_l_fast_8xb8-80e_coco_20230102_203825-1864e7b3.pth
+  - Name: ppyoloe_plus_x_fast_8xb8-80e_coco
+    In Collection: PPYOLOE
+    Config: configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco.py
+    Metadata:
+      Training Memory (GB): 19.1
+      Epochs: 80
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 54.2
+    Weights: https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco/ppyoloe_plus_x_fast_8xb8-80e_coco_20230104_194921-8c953949.pth
diff --git a/configs/ppyoloe/ppyoloe_l_fast_8xb20-300e_coco.py b/configs/ppyoloe/ppyoloe_l_fast_8xb20-300e_coco.py
index 3ef870e5e..ef1b4eaae 100644
--- a/configs/ppyoloe/ppyoloe_l_fast_8xb20-300e_coco.py
+++ b/configs/ppyoloe/ppyoloe_l_fast_8xb20-300e_coco.py
@@ -1,15 +1,23 @@
 _base_ = './ppyoloe_s_fast_8xb32-300e_coco.py'
 
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+checkpoint = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_l_imagenet1k_pretrained-c0010e6c.pth'  # noqa
+
 deepen_factor = 1.0
 widen_factor = 1.0
 
-# TODO: training on ppyoloe need to be implemented.
 train_batch_size_per_gpu = 20
 
 model = dict(
-    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        init_cfg=dict(checkpoint=checkpoint)),
     neck=dict(
         deepen_factor=deepen_factor,
         widen_factor=widen_factor,
     ),
     bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
diff --git a/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco.py b/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco.py
index 77b49b762..abcfd7833 100644
--- a/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco.py
+++ b/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco.py
@@ -1,15 +1,23 @@
 _base_ = './ppyoloe_s_fast_8xb32-300e_coco.py'
 
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+checkpoint = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_m_imagenet1k_pretrained-09f1eba2.pth'  # noqa
+
 deepen_factor = 0.67
 widen_factor = 0.75
 
-# TODO: training on ppyoloe need to be implemented.
 train_batch_size_per_gpu = 28
 
 model = dict(
-    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        init_cfg=dict(checkpoint=checkpoint)),
     neck=dict(
         deepen_factor=deepen_factor,
         widen_factor=widen_factor,
     ),
     bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
diff --git a/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco_paddle.py b/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco_paddle.py
new file mode 100644
index 000000000..f79716051
--- /dev/null
+++ b/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco_paddle.py
@@ -0,0 +1,23 @@
+_base_ = './ppyoloe_s_fast_8xb32-300e_coco_paddle.py'
+
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+checkpoint = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_m_imagenet1k_pretrained-09f1eba2.pth'  # noqa
+
+deepen_factor = 0.67
+widen_factor = 0.75
+
+train_batch_size_per_gpu = 28
+
+model = dict(
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        init_cfg=dict(checkpoint=checkpoint)),
+    neck=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+    ),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
diff --git a/configs/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco.py b/configs/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco.py
index 3741d5f07..9db53e26f 100644
--- a/configs/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco.py
+++ b/configs/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco.py
@@ -1,5 +1,9 @@
 _base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco.py'
 
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_l_obj365_pretrained-3dd89562.pth'  # noqa
+
 deepen_factor = 1.0
 widen_factor = 1.0
 
diff --git a/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco.py b/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco.py
index af85f3104..17cb33556 100644
--- a/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco.py
+++ b/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco.py
@@ -1,5 +1,9 @@
 _base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco.py'
 
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_m_ojb365_pretrained-03206892.pth'  # noqa
+
 deepen_factor = 0.67
 widen_factor = 0.75
 
diff --git a/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco_paddle.py b/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco_paddle.py
new file mode 100644
index 000000000..98b60d209
--- /dev/null
+++ b/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco_paddle.py
@@ -0,0 +1,16 @@
+_base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco_paddle.py'
+
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_m_ojb365_pretrained-03206892.pth'  # noqa
+
+deepen_factor = 0.67
+widen_factor = 0.75
+
+model = dict(
+    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    neck=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+    ),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
diff --git a/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py b/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py
index a5931ec38..7c5ce2980 100644
--- a/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py
+++ b/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py
@@ -5,25 +5,44 @@
 dataset_type = 'YOLOv5CocoDataset'
 
 # parameters that often need to be modified
-img_scale = (640, 640)  # height, width
+img_scale = (640, 640)  # width, height
 deepen_factor = 0.33
 widen_factor = 0.5
 max_epochs = 80
-save_epoch_intervals = 10
+num_classes = 80
+save_epoch_intervals = 5
 train_batch_size_per_gpu = 8
 train_num_workers = 8
 val_batch_size_per_gpu = 1
 val_num_workers = 2
 
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_s_obj365_pretrained-bcfe8478.pth'  # noqa
+
 # persistent_workers must be False if num_workers is 0.
 persistent_workers = True
 
+# Base learning rate for optim_wrapper
+base_lr = 0.001
+
 strides = [8, 16, 32]
 
 model = dict(
     type='YOLODetector',
     data_preprocessor=dict(
-        type='YOLOv5DetDataPreprocessor',
+        # use this to support multi_scale training
+        type='PPYOLOEDetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='PPYOLOEBatchRandomResize',
+                random_size_range=(320, 800),
+                interval=1,
+                size_divisor=32,
+                random_interp=True,
+                keep_ratio=False)
+        ],
         mean=[0., 0., 0.],
         std=[255., 255., 255.],
         bgr_to_rgb=True),
@@ -56,11 +75,52 @@
         type='PPYOLOEHead',
         head_module=dict(
             type='PPYOLOEHeadModule',
-            num_classes=80,
+            num_classes=num_classes,
             in_channels=[192, 384, 768],
             widen_factor=widen_factor,
             featmap_strides=strides,
-            num_base_priors=1)),
+            reg_max=16,
+            norm_cfg=dict(type='BN', momentum=0.1, eps=1e-5),
+            act_cfg=dict(type='SiLU', inplace=True),
+            num_base_priors=1),
+        prior_generator=dict(
+            type='mmdet.MlvlPointGenerator', offset=0.5, strides=strides),
+        bbox_coder=dict(type='DistancePointBBoxCoder'),
+        loss_cls=dict(
+            type='mmdet.VarifocalLoss',
+            use_sigmoid=True,
+            alpha=0.75,
+            gamma=2.0,
+            iou_weighted=True,
+            reduction='sum',
+            loss_weight=1.0),
+        loss_bbox=dict(
+            type='IoULoss',
+            iou_mode='giou',
+            bbox_format='xyxy',
+            reduction='mean',
+            loss_weight=2.5,
+            return_iou=False),
+        # Since the dflloss is implemented differently in the official
+        # and mmdet, we're going to divide loss_weight by 4.
+        loss_dfl=dict(
+            type='mmdet.DistributionFocalLoss',
+            reduction='mean',
+            loss_weight=0.5 / 4)),
+    train_cfg=dict(
+        initial_epoch=30,
+        initial_assigner=dict(
+            type='BatchATSSAssigner',
+            num_classes=num_classes,
+            topk=9,
+            iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
+        assigner=dict(
+            type='BatchTaskAlignedAssigner',
+            num_classes=num_classes,
+            topk=13,
+            alpha=1,
+            beta=6,
+            eps=1e-9)),
     test_cfg=dict(
         multi_label=True,
         nms_pre=1000,
@@ -68,14 +128,40 @@
         nms=dict(type='nms', iou_threshold=0.7),
         max_per_img=300))
 
-test_pipeline = [
+train_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='PPYOLOERandomDistort'),
+    dict(type='mmdet.Expand', mean=(103.53, 116.28, 123.675)),
+    dict(type='PPYOLOERandomCrop'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
     dict(
-        type='LoadImageFromFile',
-        file_client_args={{_base_.file_client_args}}),
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
+                   'flip_direction'))
+]
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    persistent_workers=persistent_workers,
+    pin_memory=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    collate_fn=dict(type='yolov5_collate', use_ms_training=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=0),
+        pipeline=train_pipeline))
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
     dict(
         type='mmdet.FixShapeResize',
-        width=img_scale[1],
-        height=img_scale[0],
+        width=img_scale[0],
+        height=img_scale[1],
         keep_ratio=False,
         interpolation='bicubic'),
     dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
@@ -103,6 +189,41 @@
 
 test_dataloader = val_dataloader
 
+param_scheduler = None
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='SGD',
+        lr=base_lr,
+        momentum=0.9,
+        weight_decay=5e-4,
+        nesterov=False),
+    paramwise_cfg=dict(norm_decay_mult=0.))
+
+default_hooks = dict(
+    param_scheduler=dict(
+        type='PPYOLOEParamSchedulerHook',
+        warmup_min_iter=1000,
+        start_factor=0.,
+        warmup_epochs=5,
+        min_lr_ratio=0.0,
+        total_epochs=int(max_epochs * 1.2)),
+    checkpoint=dict(
+        type='CheckpointHook',
+        interval=save_epoch_intervals,
+        save_best='auto',
+        max_keep_ckpts=3))
+
+custom_hooks = [
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49)
+]
+
 val_evaluator = dict(
     type='mmdet.CocoMetric',
     proposal_nums=(100, 1, 10),
@@ -110,5 +231,9 @@
     metric='bbox')
 test_evaluator = val_evaluator
 
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=save_epoch_intervals)
 val_cfg = dict(type='ValLoop')
 test_cfg = dict(type='TestLoop')
diff --git a/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco_paddle.py b/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco_paddle.py
new file mode 100644
index 000000000..d57c5282d
--- /dev/null
+++ b/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco_paddle.py
@@ -0,0 +1,23 @@
+_base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco.py'
+
+# dataset settings
+dataset_type = 'PPYOLOECocoDataset'
+
+model = dict(
+    data_preprocessor=dict(
+        batch_augments=[
+            dict(
+                type='PPYOLOEBatchSyncRandomResizeallopencv',
+                random_size_range=(320, 800),
+                interval=1,
+                size_divisor=32,
+                random_interp=True,
+                keep_ratio=False)
+        ],
+        bgr_to_rgb=True))
+
+train_dataloader = dict(dataset=dict(type=dataset_type))
+
+val_dataloader = dict(dataset=dict(type=dataset_type))
+
+test_dataloader = val_dataloader
diff --git a/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco.py b/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco.py
index 1d5981771..b8e61120b 100644
--- a/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco.py
+++ b/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco.py
@@ -1,5 +1,9 @@
 _base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco.py'
 
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_x_obj365_pretrained-43a8000d.pth'  # noqa
+
 deepen_factor = 1.33
 widen_factor = 1.25
 
diff --git a/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco_paddle.py b/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco_paddle.py
new file mode 100644
index 000000000..88e667a2c
--- /dev/null
+++ b/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco_paddle.py
@@ -0,0 +1,16 @@
+_base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco_paddle.py'
+
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_x_obj365_pretrained-43a8000d.pth'  # noqa
+
+deepen_factor = 1.33
+widen_factor = 1.25
+
+model = dict(
+    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    neck=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+    ),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
diff --git a/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco.py b/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco.py
index 002de203c..622332899 100644
--- a/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco.py
+++ b/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco.py
@@ -1,11 +1,36 @@
 _base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco.py'
 
-# TODO: training on ppyoloe need to be implemented.
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+checkpoint = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_s_imagenet1k_pretrained-2be81763.pth'  # noqa
+
 train_batch_size_per_gpu = 32
 max_epochs = 300
 
+# Base learning rate for optim_wrapper
+base_lr = 0.01
+
 model = dict(
     data_preprocessor=dict(
         mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
         std=[0.229 * 255., 0.224 * 255., 0.225 * 255.]),
-    backbone=dict(block_cfg=dict(use_alpha=False)))
+    backbone=dict(
+        block_cfg=dict(use_alpha=False),
+        init_cfg=dict(
+            type='Pretrained',
+            prefix='backbone.',
+            checkpoint=checkpoint,
+            map_location='cpu')),
+    train_cfg=dict(initial_epoch=100))
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
+
+optim_wrapper = dict(optimizer=dict(lr=base_lr))
+
+default_hooks = dict(param_scheduler=dict(total_epochs=int(max_epochs * 1.2)))
+
+train_cfg = dict(max_epochs=max_epochs)
+
+# PPYOLOE plus use obj365 pretrained model, but PPYOLOE not,
+# `load_from` need to set to None.
+load_from = None
diff --git a/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco_paddle.py b/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco_paddle.py
new file mode 100644
index 000000000..cfec80213
--- /dev/null
+++ b/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco_paddle.py
@@ -0,0 +1,36 @@
+_base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco_paddle.py'
+
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+checkpoint = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_s_imagenet1k_pretrained-2be81763.pth'  # noqa
+
+train_batch_size_per_gpu = 32
+max_epochs = 300
+
+# Base learning rate for optim_wrapper
+base_lr = 0.01
+
+model = dict(
+    data_preprocessor=dict(
+        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
+        std=[0.229 * 255., 0.224 * 255., 0.225 * 255.]),
+    backbone=dict(
+        block_cfg=dict(use_alpha=False),
+        init_cfg=dict(
+            type='Pretrained',
+            prefix='backbone.',
+            checkpoint=checkpoint,
+            map_location='cpu')),
+    train_cfg=dict(initial_epoch=100))
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
+
+optim_wrapper = dict(optimizer=dict(lr=base_lr))
+
+default_hooks = dict(param_scheduler=dict(total_epochs=int(max_epochs * 1.2)))
+
+train_cfg = dict(max_epochs=max_epochs)
+
+# PPYOLOE plus use obj365 pretrained model, but PPYOLOE not,
+# `load_from` need to set to None.
+load_from = None
diff --git a/configs/ppyoloe/ppyoloe_s_fast_8xb32-400e_coco.py b/configs/ppyoloe/ppyoloe_s_fast_8xb32-400e_coco.py
index 9efb6402a..bef9e9130 100644
--- a/configs/ppyoloe/ppyoloe_s_fast_8xb32-400e_coco.py
+++ b/configs/ppyoloe/ppyoloe_s_fast_8xb32-400e_coco.py
@@ -1,4 +1,9 @@
 _base_ = './ppyoloe_s_fast_8xb32-300e_coco.py'
 
-# TODO: training on ppyoloe need to be implemented.
 max_epochs = 400
+
+model = dict(train_cfg=dict(initial_epoch=133))
+
+default_hooks = dict(param_scheduler=dict(total_epochs=int(max_epochs * 1.2)))
+
+train_cfg = dict(max_epochs=max_epochs)
diff --git a/configs/ppyoloe/ppyoloe_x_fast_8xb16-300e_coco.py b/configs/ppyoloe/ppyoloe_x_fast_8xb16-300e_coco.py
index 86cdfc190..fed594f0d 100644
--- a/configs/ppyoloe/ppyoloe_x_fast_8xb16-300e_coco.py
+++ b/configs/ppyoloe/ppyoloe_x_fast_8xb16-300e_coco.py
@@ -1,15 +1,23 @@
 _base_ = './ppyoloe_s_fast_8xb32-300e_coco.py'
 
+# The pretrained model is geted and converted from official PPYOLOE.
+# https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md
+checkpoint = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_x_imagenet1k_pretrained-81c33ccb.pth'  # noqa
+
 deepen_factor = 1.33
 widen_factor = 1.25
 
-# TODO: training on ppyoloe need to be implemented.
 train_batch_size_per_gpu = 16
 
 model = dict(
-    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        init_cfg=dict(checkpoint=checkpoint)),
     neck=dict(
         deepen_factor=deepen_factor,
         widen_factor=widen_factor,
     ),
     bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
diff --git a/configs/rtmdet/README.md b/configs/rtmdet/README.md
index e0b6bedc9..33ae6823f 100644
--- a/configs/rtmdet/README.md
+++ b/configs/rtmdet/README.md
@@ -14,6 +14,11 @@ In this paper, we aim to design an efficient real-time object detector that exce
 <img src="https://user-images.githubusercontent.com/12907710/208070055-7233a3d8-955f-486a-82da-b714b3c3bbd6.png"/>
 </div>
 
+<div align=center>
+<img src="https://user-images.githubusercontent.com/27466624/204126145-cb4ff4f1-fb16-455e-96b5-17620081023a.jpg"/>
+RTMDet-l model structure
+</div>
+
 ## Results and Models
 
 ## Object Detection
@@ -30,3 +35,16 @@ In this paper, we aim to design an efficient real-time object detector that exce
 
 1. The inference speed of RTMDet is measured on an NVIDIA 3090 GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS.
 2. For a fair comparison, the config of bbox postprocessing is changed to be consistent with YOLOv5/6/7 after [PR#9494](https://github.com/open-mmlab/mmdetection/pull/9494), bringing about 0.1~0.3% AP improvement.
+
+## Citation
+
+```latex
+@misc{lyu2022rtmdet,
+      title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+      author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+      year={2022},
+      eprint={2212.07784},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
diff --git a/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
index 95eb91d62..a5add2c50 100644
--- a/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
@@ -3,7 +3,7 @@
 data_root = 'data/coco/'
 dataset_type = 'YOLOv5CocoDataset'
 
-img_scale = (640, 640)  # height, width
+img_scale = (640, 640)  # width, height
 deepen_factor = 1.0
 widen_factor = 1.0
 max_epochs = 300
@@ -108,6 +108,7 @@
         pad_val=114.0),
     dict(
         type='mmdet.RandomResize',
+        # img_scale is (width, height)
         scale=(img_scale[0] * 2, img_scale[1] * 2),
         ratio_range=(0.1, 2.0),
         resize_type='mmdet.Resize',
diff --git a/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
index 0edd276ac..8ea4847ed 100644
--- a/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
@@ -34,6 +34,7 @@
         pad_val=114.0),
     dict(
         type='mmdet.RandomResize',
+        # img_scale is (width, height)
         scale=(img_scale[0] * 2, img_scale[1] * 2),
         ratio_range=(0.5, 2.0),  # note
         resize_type='mmdet.Resize',
@@ -74,6 +75,6 @@
         priority=49),
     dict(
         type='mmdet.PipelineSwitchHook',
-        switch_epoch=280,
+        switch_epoch=_base_.max_epochs - _base_.stage2_num_epochs,
         switch_pipeline=train_pipeline_stage2)
 ]
diff --git a/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py
index 855e7bb20..281062c13 100644
--- a/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py
@@ -18,9 +18,7 @@
     bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
 
 train_pipeline = [
-    dict(
-        type='LoadImageFromFile',
-        file_client_args={{_base_.file_client_args}}),
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
     dict(type='LoadAnnotations', with_bbox=True),
     dict(
         type='Mosaic',
@@ -31,7 +29,8 @@
         pad_val=114.0),
     dict(
         type='mmdet.RandomResize',
-        scale=(1280, 1280),
+        # img_scale is (width, height)
+        scale=(img_scale[0] * 2, img_scale[1] * 2),
         ratio_range=(0.5, 2.0),
         resize_type='mmdet.Resize',
         keep_ratio=True),
diff --git a/configs/yolov5/README.md b/configs/yolov5/README.md
index aec0843e9..641813c27 100644
--- a/configs/yolov5/README.md
+++ b/configs/yolov5/README.md
@@ -6,6 +6,16 @@
 
 YOLOv5 is a family of object detection architectures and models pretrained on the COCO dataset, and represents Ultralytics open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development.
 
+<div align=center>
+<img src="https://user-images.githubusercontent.com/27466624/200000324-70ae078f-cea7-4189-8baa-440656797dad.jpg"/>
+YOLOv5-l-P5 model structure
+</div>
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/27466624/211143533-1725c1b2-6189-4c3a-a046-ad968e03cb9d.jpg"/>
+YOLOv5-l-P6 model structure
+</div>
+
 ## Results and models
 
 ### COCO
diff --git a/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py b/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py
index 0a2cd7bf9..90ba758a5 100644
--- a/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py
+++ b/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py
@@ -36,6 +36,7 @@
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
         scaling_ratio_range=(0.5, 1.5),
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
     dict(
diff --git a/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py b/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
index b70f3c780..54f6cdeb7 100644
--- a/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
+++ b/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
@@ -6,7 +6,7 @@
 
 # parameters that often need to be modified
 num_classes = 20
-img_scale = (512, 512)
+img_scale = (512, 512)  # width, height
 max_epochs = 50
 train_batch_size_per_gpu = 64
 train_num_workers = 8
@@ -62,6 +62,7 @@
         max_translate_ratio=0.04591,
         max_shear_degree=0.0,
         scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
     dict(
@@ -80,6 +81,7 @@
                 max_translate_ratio=0.04591,
                 max_shear_degree=0.0,
                 scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+                # img_scale is (width, height)
                 border=(-img_scale[0] // 2, -img_scale[1] // 2),
                 border_val=(114, 114, 114))
         ])
@@ -164,9 +166,7 @@
     collate_fn=dict(type='yolov5_collate'))
 
 test_pipeline = [
-    dict(
-        type='LoadImageFromFile',
-        file_client_args={{_base_.file_client_args}}),
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
     dict(type='YOLOv5KeepRatioResize', scale=img_scale),
     dict(
         type='LetterResize',
diff --git a/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py
index 2974418af..f2ccf787a 100644
--- a/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py
@@ -39,6 +39,7 @@
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
         scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114))
 ]
diff --git a/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py
index dcd55ac4f..fdce96036 100644
--- a/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py
@@ -39,6 +39,7 @@
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
         scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114))
 ]
diff --git a/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py
index 787317c8e..d7cb09259 100644
--- a/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py
@@ -1,6 +1,6 @@
 _base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
 
-img_scale = (1280, 1280)  # height, width
+img_scale = (1280, 1280)  # width, height
 num_classes = 80
 # only on Val
 batch_shapes_cfg = dict(img_size=img_scale[0], size_divisor=64)
@@ -45,6 +45,7 @@
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
         scaling_ratio_range=(0.5, 1.5),
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
     dict(
diff --git a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
index 51376b649..d06f75c4e 100644
--- a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
@@ -6,7 +6,7 @@
 
 # parameters that often need to be modified
 num_classes = 80
-img_scale = (640, 640)  # height, width
+img_scale = (640, 640)  # width, height
 deepen_factor = 0.33
 widen_factor = 0.5
 max_epochs = 300
@@ -129,6 +129,7 @@
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
         scaling_ratio_range=(0.5, 1.5),
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
     dict(
diff --git a/configs/yolov6/README.md b/configs/yolov6/README.md
index 4070a1996..603ba555f 100644
--- a/configs/yolov6/README.md
+++ b/configs/yolov6/README.md
@@ -12,6 +12,16 @@ For years, YOLO series have been de facto industry-level standard for efficient
 <img src="https://github.com/meituan/YOLOv6/raw/main/assets/speed_comparision_v2.png"/>
 </div>
 
+<div align=center >
+<img alt="YOLOv6-s" src="https://user-images.githubusercontent.com/58845482/209790152-21c29d42-30cc-4c48-a723-39b198286c4d.png"/>
+YOLOv6-s model structure
+</div>
+
+<div align=center >
+<img alt="YOLOv6-l" src="https://user-images.githubusercontent.com/58845482/209787949-d57691c0-a2ea-4a0a-829f-e8a64ac29c7e.png"/>
+YOLOv6-l model structure
+</div>
+
 ## Results and models
 
 ### COCO
diff --git a/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py b/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py
index 28e1549d5..4f8e33ab6 100644
--- a/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py
@@ -33,6 +33,7 @@
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
         scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # img_scale is (width, height)
         border=(-_base_.img_scale[0] // 2, -_base_.img_scale[1] // 2),
         border_val=(114, 114, 114))
 ]
diff --git a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py
index 0b918caa4..21029f766 100644
--- a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py
+++ b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py
@@ -1,7 +1,7 @@
 _base_ = '../_base_/default_runtime.py'
 
 # dataset settings
-data_root = 'data/coco/'
+data_root = 'data/sub_coco/'
 dataset_type = 'YOLOv5CocoDataset'
 
 num_last_epochs = 15
@@ -9,12 +9,12 @@
 num_classes = 80
 
 # parameters that often need to be modified
-img_scale = (640, 640)  # height, width
+img_scale = (640, 640)  # width, height
 deepen_factor = 0.33
 widen_factor = 0.5
 affine_scale = 0.5
-save_epoch_intervals = 10
-train_batch_size_per_gpu = 32
+save_epoch_intervals = 1
+train_batch_size_per_gpu = 2
 train_num_workers = 8
 val_batch_size_per_gpu = 1
 val_num_workers = 2
@@ -117,6 +117,7 @@
         max_rotate_degree=0.0,
         max_translate_ratio=0.1,
         scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114),
         max_shear_degree=0.0),
diff --git a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
index 659f3a0f1..2bf8cb7f8 100644
--- a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
+++ b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
@@ -5,7 +5,7 @@
 dataset_type = 'YOLOv5CocoDataset'
 
 # parameters that often need to be modified
-img_scale = (640, 640)  # height, width
+img_scale = (640, 640)  # width, height
 max_epochs = 300
 save_epoch_intervals = 10
 train_batch_size_per_gpu = 16
@@ -123,6 +123,7 @@
         max_shear_degree=0.0,
         max_translate_ratio=0.2,  # note
         scaling_ratio_range=(0.1, 2.0),  # note
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
 ]
@@ -139,6 +140,7 @@
         max_shear_degree=0.0,
         max_translate_ratio=0.2,  # note
         scaling_ratio_range=(0.1, 2.0),  # note
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
 ]
diff --git a/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py
index db311b23a..afb004024 100644
--- a/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py
+++ b/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py
@@ -35,6 +35,7 @@
         max_shear_degree=0.0,
         max_translate_ratio=0.1,  # change
         scaling_ratio_range=(0.5, 1.6),  # change
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
 ]
diff --git a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py
index f4c55a491..6536c0937 100644
--- a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py
+++ b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py
@@ -52,6 +52,7 @@
         max_shear_degree=0.0,
         max_translate_ratio=0.2,  # note
         scaling_ratio_range=(0.1, 2.0),  # note
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
 ]
@@ -68,6 +69,7 @@
         max_shear_degree=0.0,
         max_translate_ratio=0.2,  # note
         scaling_ratio_range=(0.1, 2.0),  # note
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
 ]
diff --git a/configs/yolox/README.md b/configs/yolox/README.md
index de51adf7d..eff2ef4de 100644
--- a/configs/yolox/README.md
+++ b/configs/yolox/README.md
@@ -11,9 +11,10 @@ In this report, we present some experienced improvements to YOLO series, forming
 <div align=center>
 <img src="https://user-images.githubusercontent.com/40661020/144001736-9fb303dd-eac7-46b0-ad45-214cfa51e928.png"/>
 </div>
+
 <div align=center>
-<img src="https://user-images.githubusercontent.com/71306851/208933940-ffcd2f53-7630-4c7e-bf0d-695eb3cfe851.jpg"/>
-YOLOX_l model structure
+<img src="https://user-images.githubusercontent.com/27466624/211143387-004c6718-3d61-44c8-9406-f56b9238452a.jpg"/>
+YOLOX-l model structure
 </div>
 
 ## Results and Models
diff --git a/configs/yolox/yolox_s_8xb8-300e_coco.py b/configs/yolox/yolox_s_8xb8-300e_coco.py
index c9f8aa5e9..0cebbb0e3 100644
--- a/configs/yolox/yolox_s_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_s_8xb8-300e_coco.py
@@ -3,7 +3,7 @@
 data_root = 'data/coco/'
 dataset_type = 'YOLOv5CocoDataset'
 
-img_scale = (640, 640)  # height, width
+img_scale = (640, 640)  # width, height
 deepen_factor = 0.33
 widen_factor = 0.5
 
@@ -100,9 +100,7 @@
         nms=dict(type='nms', iou_threshold=0.65)))
 
 pre_transform = [
-    dict(
-        type='LoadImageFromFile',
-        file_client_args={{_base_.file_client_args}}),
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
     dict(type='LoadAnnotations', with_bbox=True)
 ]
 
@@ -116,6 +114,7 @@
     dict(
         type='mmdet.RandomAffine',
         scaling_ratio_range=(0.1, 2),
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2)),
     dict(
         type='YOLOXMixUp',
@@ -168,9 +167,7 @@
         pipeline=train_pipeline_stage1))
 
 test_pipeline = [
-    dict(
-        type='LoadImageFromFile',
-        file_client_args={{_base_.file_client_args}}),
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
     dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
     dict(
         type='mmdet.Pad',
diff --git a/configs/yolox/yolox_tiny_8xb8-300e_coco.py b/configs/yolox/yolox_tiny_8xb8-300e_coco.py
index 2730c58ea..0fd0a15a9 100644
--- a/configs/yolox/yolox_tiny_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_tiny_8xb8-300e_coco.py
@@ -29,6 +29,7 @@
     dict(
         type='mmdet.RandomAffine',
         scaling_ratio_range=(0.5, 1.5),  # note
+        # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2)),
     dict(type='mmdet.YOLOXHSVRandomAug'),
     dict(type='mmdet.RandomFlip', prob=0.5),
diff --git a/demo/image_demo.py b/demo/image_demo.py
index 28989bd1b..8f9fde1a3 100644
--- a/demo/image_demo.py
+++ b/demo/image_demo.py
@@ -5,7 +5,7 @@
 import mmcv
 from mmdet.apis import inference_detector, init_detector
 from mmengine.logging import print_log
-from mmengine.utils import ProgressBar
+from mmengine.utils import ProgressBar, path
 
 from mmyolo.registry import VISUALIZERS
 from mmyolo.utils import register_all_modules, switch_to_deploy
@@ -60,8 +60,8 @@ def main():
     if args.deploy:
         switch_to_deploy(model)
 
-    if not os.path.exists(args.out_dir) and not args.show:
-        os.mkdir(args.out_dir)
+    if not args.show:
+        path.mkdir_or_exist(args.out_dir)
 
     # init visualizer
     visualizer = VISUALIZERS.build(model.cfg.visualizer)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 191273c10..2bd006971 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -26,7 +26,7 @@ RUN apt-get update \
 
 # Install MMEngine , MMCV and MMDet
 RUN pip install --no-cache-dir openmim && \
-    mim install --no-cache-dir "mmengine>=0.3.1" "mmcv>=2.0.0rc1,<2.1.0" "mmdet>=3.0.0rc3,<3.1.0"
+    mim install --no-cache-dir "mmengine>=0.3.1" "mmcv>=2.0.0rc1,<2.1.0" "mmdet>=3.0.0rc5,<3.1.0"
 
 # Install MMYOLO
 RUN git clone https://github.com/open-mmlab/mmyolo.git /mmyolo && \
diff --git a/docker/Dockerfile_deployment b/docker/Dockerfile_deployment
index 7326cafab..7f63c1cc1 100644
--- a/docker/Dockerfile_deployment
+++ b/docker/Dockerfile_deployment
@@ -30,7 +30,7 @@ RUN wget -q https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRU
 
 # Install OPENMIM MMENGINE MMDET
 RUN pip install --no-cache-dir openmim \
-    && mim install --no-cache-dir "mmengine>=0.3.1" "mmdet>=3.0.0rc3,<3.1.0" \
+    && mim install --no-cache-dir "mmengine>=0.3.1" "mmdet>=3.0.0rc5,<3.1.0" \
     && mim install --no-cache-dir opencv-python==4.5.5.64 opencv-python-headless==4.5.5.64
 
 RUN git clone https://github.com/open-mmlab/mmcv.git -b 2.x mmcv \
diff --git a/docs/en/algorithm_descriptions/yolov5_description.md b/docs/en/algorithm_descriptions/yolov5_description.md
index 3ccddc9ee..4d2ed512e 100644
--- a/docs/en/algorithm_descriptions/yolov5_description.md
+++ b/docs/en/algorithm_descriptions/yolov5_description.md
@@ -4,12 +4,12 @@
 
 <div align=center >
 <img alt="YOLOv5-P5_structure_v3.4" src="https://user-images.githubusercontent.com/27466624/200000324-70ae078f-cea7-4189-8baa-440656797dad.jpg"/>
-Figure 1: YOLOv5-P5 model structure
+Figure 1: YOLOv5-l-P5 model structure
 </div>
 
 <div align=center >
-<img alt="YOLOv5-P6_structure_v1.0" src="https://user-images.githubusercontent.com/27466624/200845705-c9f3300f-9847-4933-b79d-0efcf0286e16.jpg"/>
-Figure 2: YOLOv5-P6 model structure
+<img alt="YOLOv5-P6_structure_v1.1" src="https://user-images.githubusercontent.com/27466624/211143533-1725c1b2-6189-4c3a-a046-ad968e03cb9d.jpg"/>
+Figure 2: YOLOv5-l-P6 model structure
 </div>
 
 RangeKing@github provides the graph above. Thanks, RangeKing!
diff --git a/docs/en/get_started.md b/docs/en/get_started.md
index ab04f5ba3..ab5a05fb8 100644
--- a/docs/en/get_started.md
+++ b/docs/en/get_started.md
@@ -6,7 +6,8 @@ Compatible MMEngine, MMCV and MMDetection versions are shown as below. Please in
 
 | MMYOLO version |   MMDetection version    |     MMEngine version     |      MMCV version       |
 | :------------: | :----------------------: | :----------------------: | :---------------------: |
-|      main      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|      main      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.3.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.2.0      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.1.3      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.1.2      | mmdet>=3.0.0rc2, \<3.1.0 | mmengine>=0.3.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
@@ -15,7 +16,7 @@ Compatible MMEngine, MMCV and MMDetection versions are shown as below. Please in
 
 In this section, we demonstrate how to prepare an environment with PyTorch.
 
-MMDetection works on Linux, Windows, and macOS. It requires Python 3.6+, CUDA 9.2+, and PyTorch 1.7+.
+MMDetection works on Linux, Windows, and macOS. It requires Python 3.7+, CUDA 9.2+, and PyTorch 1.7+.
 
 ```{note}
 If you are experienced with PyTorch and have already installed it, just skip this part and jump to the [next section](#installation). Otherwise, you can follow these steps for the preparation.
@@ -54,7 +55,7 @@ conda install pytorch torchvision cpuonly -c pytorch
 pip install -U openmim
 mim install "mmengine>=0.3.1"
 mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc3,<3.1.0"
+mim install "mmdet>=3.0.0rc5,<3.1.0"
 ```
 
 **Note:**
@@ -213,7 +214,7 @@ thus we only need to install MMEngine, MMCV, MMDetection, and MMYOLO with the fo
 !pip3 install openmim
 !mim install "mmengine==0.1.0"
 !mim install "mmcv>=2.0.0rc1,<2.1.0"
-!mim install "mmdet>=3.0.0.rc1"
+!mim install "mmdet>=3.0.0rc5,<3.1.0"
 ```
 
 **Step 2.** Install MMYOLO from the source.
diff --git a/docs/en/index.rst b/docs/en/index.rst
index 7e29af975..123680de7 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -51,6 +51,7 @@ Welcome to MMYOLO's documentation!
    notes/changelog.md
    notes/faq.md
    notes/compatibility.md
+   notes/conventions.md
 
 .. toctree::
    :maxdepth: 2
diff --git a/docs/en/notes/changelog.md b/docs/en/notes/changelog.md
index fc09f75d2..f05b39b1a 100644
--- a/docs/en/notes/changelog.md
+++ b/docs/en/notes/changelog.md
@@ -1,5 +1,67 @@
 # Changelog
 
+## v0.3.0 (8/1/2023)
+
+### Highlights
+
+1. Implement fast version of [RTMDet](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md). RTMDet-s 8xA100 training takes only 14 hours. The training speed is 2.6 times faster than the previous version.
+2. Support [PPYOLOE](https://github.com/open-mmlab/mmyolo/blob/dev/configs/ppyoloe/README.md) training
+3. Support `iscrowd` attribute training in [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py)
+4. Support [YOLOv5 assigner result visualization](https://github.com/open-mmlab/mmyolo/blob/dev/projects/assigner_visualization/README.md)
+
+### New Features
+
+01. Add `crowdhuman` dataset (#368)
+02. Easydeploy support TensorRT inference (#377)
+03. Add `YOLOX` structure description (#402)
+04. Add a feature for the video demo (#392)
+05. Support `YOLOv7` easy deploy (#427)
+06. Add resume from specific checkpoint in CLI (#393)
+07. Set `metainfo` fields to lower case (#362, #412)
+08. Add module combination doc (#349, #352, #345)
+09. Add docs about how to freeze the weight of backbone or neck (#418)
+10. Add don't used pre-training weights doc in `how_to.md` (#404)
+11. Add docs about how to set the random seed (#386)
+12. Translate `rtmdet_description.md` document to English (#353)
+13. Add doc of `yolov6_description.md` (#382, #372)
+
+### Bug Fixes
+
+01. Fix bugs in the output annotation file when `--class-id-txt` is set (#430)
+02. Fix batch inference bug in `YOLOv5` head (#413)
+03. Fix typehint in some heads (#415, #416, #443)
+04. Fix RuntimeError of `torch.cat()` expected a non-empty list of Tensors (#376)
+05. Fix the device inconsistency error in `YOLOv7` training (#397)
+06. Fix the `scale_factor` and `pad_param` value in `LetterResize` (#387)
+07. Fix docstring graph rendering error of readthedocs (#400)
+08. Fix AssertionError when `YOLOv6` from training to val (#378)
+09. Fix CI error due to `np.int` and legacy builder.py (#389)
+10. Fix MMDeploy rewriter (#366)
+11. Fix MMYOLO unittest scope bug (#351)
+12. Fix `pad_param` error (#354)
+13. Fix twice head inference bug (#342)
+14. Fix customize dataset training (#428)
+
+### Improvements
+
+01. Update `useful_tools.md` (#384)
+02. update the English version of `custom_dataset.md` (#381)
+03. Remove context argument from the rewriter function (#395)
+04. deprecating `np.bool` type alias (#396)
+05. Add new video link for custom dataset (#365)
+06. Export onnx for model only (#361)
+07. Add MMYOLO regression test yml (#359)
+08. Update video tutorials in `article.md` (#350)
+09. Add deploy demo (#343)
+10. Optimize the vis results of large images in debug mode (#346)
+11. Improve args for `browse_dataset` and support `RepeatDataset` (#340, #338)
+
+### Contributors
+
+A total of 28 developers contributed to this release.
+
+Thank @RangeKing, @PeterH0323, @Nioolek, @triple-Mu, @matrixgame2018, @xin-li-67, @tang576225574, @kitecats, @Seperendity, @diplomatist, @vaew, @wzr-skn, @VoyagerXvoyagerx, @MambaWong, @tianleiSHI, @caj-github, @zhubochao, @lvhan028, @dsghaonan, @lyviva, @yuewangg, @wang-tf, @satuoqaq, @grimoire, @RunningLeon, @hanrui1sensetime, @RangiLyu, @hhaAndroid
+
 ## v0.2.0（1/12/2022)
 
 ### Highlights
diff --git a/docs/en/notes/compatibility.md b/docs/en/notes/compatibility.md
index 336a0fac9..7e6ad3da3 100644
--- a/docs/en/notes/compatibility.md
+++ b/docs/en/notes/compatibility.md
@@ -2,6 +2,8 @@
 
 ## MMYOLO 0.3.0
 
+### METAINFO modification
+
 To unify with other OpenMMLab repositories, change all keys of `METAINFO` in Dataset from upper case to lower case.
 
 | Before v0.3.0 | after v0.3.0 |
@@ -9,3 +11,36 @@ To unify with other OpenMMLab repositories, change all keys of `METAINFO` in Dat
 |    CLASSES    |   classes    |
 |    PALETTE    |   palette    |
 | DATASET_TYPE  | dataset_type |
+
+### About the order of image shape
+
+In OpenMMLab 2.0, to be consistent with the input argument of OpenCV, the argument about image shape in the data transformation pipeline is always in the `(width, height)` order. On the contrary, for computation convenience, the order of the field going through the data pipeline and the model is `(height, width)`. Specifically, in the results processed by each data transform pipeline, the fields and their value meaning is as below:
+
+- img_shape: (height, width)
+- ori_shape: (height, width)
+- pad_shape: (height, width)
+- batch_input_shape: (height, width)
+
+As an example, the initialization arguments of `Mosaic` are as below:
+
+```python
+@TRANSFORMS.register_module()
+class Mosaic(BaseTransform):
+    def __init__(self,
+                img_scale: Tuple[int, int] = (640, 640),
+                center_ratio_range: Tuple[float, float] = (0.5, 1.5),
+                bbox_clip_border: bool = True,
+                pad_val: float = 114.0,
+                prob: float = 1.0) -> None:
+       ...
+
+       # img_scale order should be (width, height)
+       self.img_scale = img_scale
+
+    def transform(self, results: dict) -> dict:
+        ...
+
+        results['img'] = mosaic_img
+        # (height, width)
+        results['img_shape'] = mosaic_img.shape[:2]
+```
diff --git a/docs/en/notes/conventions.md b/docs/en/notes/conventions.md
new file mode 100644
index 000000000..40ca991c6
--- /dev/null
+++ b/docs/en/notes/conventions.md
@@ -0,0 +1,36 @@
+# Conventions
+
+Please check the following conventions if you would like to modify MMYOLO as your own project.
+
+## About the order of image shape
+
+In OpenMMLab 2.0, to be consistent with the input argument of OpenCV, the argument about image shape in the data transformation pipeline is always in the `(width, height)` order. On the contrary, for computation convenience, the order of the field going through the data pipeline and the model is `(height, width)`. Specifically, in the results processed by each data transform pipeline, the fields and their value meaning is as below:
+
+- img_shape: (height, width)
+- ori_shape: (height, width)
+- pad_shape: (height, width)
+- batch_input_shape: (height, width)
+
+As an example, the initialization arguments of `Mosaic` are as below:
+
+```python
+@TRANSFORMS.register_module()
+class Mosaic(BaseTransform):
+    def __init__(self,
+                img_scale: Tuple[int, int] = (640, 640),
+                center_ratio_range: Tuple[float, float] = (0.5, 1.5),
+                bbox_clip_border: bool = True,
+                pad_val: float = 114.0,
+                prob: float = 1.0) -> None:
+       ...
+
+       # img_scale order should be (width, height)
+       self.img_scale = img_scale
+
+    def transform(self, results: dict) -> dict:
+        ...
+
+        results['img'] = mosaic_img
+        # (height, width)
+        results['img_shape'] = mosaic_img.shape[:2]
+```
diff --git a/docs/en/user_guides/yolov5_tutorial.md b/docs/en/user_guides/yolov5_tutorial.md
index c225757ff..ff9d703af 100644
--- a/docs/en/user_guides/yolov5_tutorial.md
+++ b/docs/en/user_guides/yolov5_tutorial.md
@@ -12,7 +12,7 @@ conda install pytorch torchvision -c pytorch
 pip install -U openmim
 mim install "mmengine>=0.3.1"
 mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc3,<3.1.0"
+mim install "mmdet>=3.0.0rc5,<3.1.0"
 git clone https://github.com/open-mmlab/mmyolo.git
 cd mmyolo
 # Install albumentations
diff --git a/docs/zh_cn/advanced_guides/index.rst b/docs/zh_cn/advanced_guides/index.rst
index 81810a88c..02b06e615 100644
--- a/docs/zh_cn/advanced_guides/index.rst
+++ b/docs/zh_cn/advanced_guides/index.rst
@@ -1,3 +1,11 @@
+模块组合
+************************
+
+.. toctree::
+   :maxdepth: 1
+
+   module_combination.md
+
 数据流
 ************************
 
diff --git a/docs/zh_cn/advanced_guides/module_combination.md b/docs/zh_cn/advanced_guides/module_combination.md
index 1c4f824bd..265d926a3 100644
--- a/docs/zh_cn/advanced_guides/module_combination.md
+++ b/docs/zh_cn/advanced_guides/module_combination.md
@@ -171,3 +171,110 @@ model = dict(
             beta=6)
     ))
 ```
+
+## Backbone + Neck + HeadModule 的组合替换
+
+### 1. YOLOv5 Backbone 替换
+
+(1) 假设想将 `RTMDet backbone + yolov5 neck + yolov5 head` 作为 `YOLOv5` 的完整网络，则配置文件如下：
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+widen_factor = 0.5
+deepen_factor = 0.33
+
+model = dict(
+    backbone=dict(
+        _delete_=True,
+        type='CSPNeXt',
+        arch='P5',
+        expand_ratio=0.5,
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        channel_attention=True,
+        norm_cfg=dict(type='BN'),
+        act_cfg=dict(type='SiLU', inplace=True))
+)
+```
+
+(2)  `YOLOv6EfficientRep backbone + yolov5 neck + yolov5 head` 作为 `YOLOv5` 的完整网络，则配置文件如下：
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+model = dict(
+    backbone=dict(
+        type='YOLOv6EfficientRep',
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='ReLU', inplace=True))
+)
+```
+
+### 2. YOLOv5 Neck 替换
+
+(1) 假设想将 `yolov5 backbone + yolov6 neck + yolov5 head` 作为 `YOLOv5` 的完整网络，则配置文件如下：
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+model = dict(
+    neck = dict(
+        type = 'YOLOv6RepPAFPN',
+        in_channels = [256, 512, 1024],
+        out_channels = [128, 256, 512], # 注意 YOLOv6RepPAFPN 的输出通道是[128, 256, 512]
+        num_csp_blocks = 12,
+        act_cfg = dict(type='ReLU', inplace = True),
+    ),
+    bbox_head = dict(
+        head_module = dict(
+            in_channels = [128, 256, 512])) # head 部分输入通道要做相应更改
+)
+```
+
+(2) 假设想将 `yolov5 backbone + yolov7 neck + yolov5 head` 作为 `YOLOv5` 的完整网络，则配置文件如下：
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+deepen_factor = _base_.deepen_factor
+widen_factor = _base_.widen_factor
+
+model = dict(
+    neck = dict(
+        _delete_=True, # 将 _base_ 中关于 neck 的字段删除
+        type = 'YOLOv7PAFPN',
+        deepen_factor = deepen_factor,
+        widen_factor = widen_factor,
+        upsample_feats_cat_first = False,
+        in_channels = [256, 512, 1024],
+        out_channels = [128, 256, 512],
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg = dict(type='SiLU', inplace=True),
+    ),
+    bbox_head = dict(
+        head_module = dict(
+            in_channels = [256, 512, 1024])) # 注意使用 YOLOv7PAFPN 后 head 部分输入通道数是 neck 输出通道数的两倍
+)
+```
+
+### 3. YOLOv5 HeadModule 替换
+
+(1) 假设想将 `yolov5 backbone + yolov5 neck + yolo7 headmodule` 作为 `YOLOv5` 的完整网络，则配置文件如下：
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+strides = [8, 16, 32]
+num_classes = 1 # 根据自己的数据集调整
+
+model = dict(
+    bbox_head=dict(
+        type='YOLOv7Head',
+        head_module=dict(
+            type='YOLOv7HeadModule',
+            num_classes=num_classes,
+            in_channels=[256, 512, 1024],
+            featmap_strides=strides,
+            num_base_priors=3)))
+```
diff --git a/docs/zh_cn/algorithm_descriptions/yolov5_description.md b/docs/zh_cn/algorithm_descriptions/yolov5_description.md
index e65a00b7b..2ef01111b 100644
--- a/docs/zh_cn/algorithm_descriptions/yolov5_description.md
+++ b/docs/zh_cn/algorithm_descriptions/yolov5_description.md
@@ -4,12 +4,12 @@
 
 <div align=center >
 <img alt="YOLOv5-P5_structure_v3.4" src="https://user-images.githubusercontent.com/27466624/200000324-70ae078f-cea7-4189-8baa-440656797dad.jpg"/>
-图 1：YOLOv5-P5 模型结构
+图 1：YOLOv5-l-P5 模型结构
 </div>
 
 <div align=center >
-<img alt="YOLOv5-P6_structure_v1.0" src="https://user-images.githubusercontent.com/27466624/200845705-c9f3300f-9847-4933-b79d-0efcf0286e16.jpg"/>
-图 2：YOLOv5-P6 模型结构
+<img alt="YOLOv5-P6_structure_v1.1" src="https://user-images.githubusercontent.com/27466624/211143533-1725c1b2-6189-4c3a-a046-ad968e03cb9d.jpg"/>
+图 2：YOLOv5-l-P6 模型结构
 </div>
 
 以上结构图由 RangeKing@github 绘制。
diff --git a/docs/zh_cn/algorithm_descriptions/yolov6_description.md b/docs/zh_cn/algorithm_descriptions/yolov6_description.md
index 6731f7e01..60d59f89a 100644
--- a/docs/zh_cn/algorithm_descriptions/yolov6_description.md
+++ b/docs/zh_cn/algorithm_descriptions/yolov6_description.md
@@ -2,9 +2,19 @@
 
 ## 0 简介
 
-以上结构图 xxx 绘制。
+<div align=center >
+<img alt="YOLOv6-S" src="https://user-images.githubusercontent.com/58845482/209790152-21c29d42-30cc-4c48-a723-39b198286c4d.png"/>
+图 1：YOLOv6-S 模型结构
+</div>
+
+<div align=center >
+<img alt="YOLOv6-L" src="https://user-images.githubusercontent.com/58845482/209787949-d57691c0-a2ea-4a0a-829f-e8a64ac29c7e.png"/>
+图 2：YOLOv6-L 模型结构
+</div>
 
-YOLOv6 有一系列适用于各种工业场景的模型，包括N/T/S/M/L，考虑到模型的大小，其架构有所不同，以获得更好的精度-速度权衡。 此外，还引入了一些 "Bag-of-freebies "方法来进一步提高性能，如自我渐变和更多的训练周期。 在工业部署方面，我们采用QAT与信道蒸馏和图形优化来追求极端的性能（后续支持）。
+以上结构图由 wzr-skn@github 绘制。
+
+YOLOv6 提出了一系列适用于各种工业场景的模型，包括 N/T/S/M/L，考虑到模型的大小，其架构有所不同，以获得更好的精度-速度权衡。本算法专注于检测的精度和推理效率，并在网络结构、训练策略等算法层面进行了多项改进和优化。
 
 简单来说 YOLOv6 开源库的主要特点为：
 
@@ -20,7 +30,7 @@ MMYOLO 实现配置：https://github.com/open-mmlab/mmyolo/blob/main/configs/yol
 
 YOLOv6 官方开源库地址：https://github.com/meituan/YOLOv6
 
-## 1 YLOLv6 2.0 算法原理和 MMYOLO 实现解析
+## 1 YOLOv6 2.0 算法原理和 MMYOLO 实现解析
 
 YOLOv6 2.0 官方 release 地址：https://github.com/meituan/YOLOv6/releases/tag/0.2.0
 
@@ -57,12 +67,35 @@ YOLOv6 目标检测算法中使用的数据增强与 YOLOv5 基本一致，唯
 
 ### 1.2 网络结构
 
+YOLOv6 N/T/S 模型的网络结构由 `EfficientRep` + `Rep-PAN` + `Efficient decoupled Head` 构成，M/L 模型的网络结构则由 `CSPBep` + `CSPRepPAFPN` +  `Efficient decoupled Head` 构成。其中，Backbone 和 Neck 部分的结构与 YOLOv5 较为相似，但不同的是其采用了重参数化结构 `RepVGG Block` 替换掉了原本的 `ConvModule`，在此基础上，将 `CSPLayer` 改进为了多个 `RepVGG` 堆叠的 `RepStageBlock`（N/T/S 模型）或 `BepC3StageBlock`（M/L 模型）；Head 部分则参考了 FCOS 和 YOLOX 的检测头，将回归与分类分支解耦成两个分支进行预测。YOLOv6-S 和 YOLOv6-L 整体结构分别如图 1 和图 2 所示。
+
 #### 1.2.1 Backbone
 
+已有研究表明，多分支的网络结构通常比单分支网络性能更加优异，例如 YOLOv5 的 `CSPDarknet`，但是这种结构会导致并行度降低进而增加推理延时；相反，类似于 `VGG` 的单分支网络则具有并行度高、内存占用小的优点，因此推理效率更高。而 `RepVGG` 则同时具备上述两种结构的优点，在训练时可解耦成多分支拓扑结构提升模型精度，实际部署时可等效融合为单个 3×3 卷积提升推理速度，`RepVGG` 示意图如下。因此，YOLOv6 基于 `RepVGG` 重参数化结构设计了高效的骨干网络 `EfficientRep` 和 `CSPBep`，其可以充分利用硬件算力，提升模型表征能力的同时降低推理延时。
+
+<img src="https://user-images.githubusercontent.com/58845482/209788313-05e3870b-9b25-4dbb-89c8-7c9502c84577.png" alt="image" style="zoom: 40%;" />
+
+在 N/T/S 模型中，YOLOv6 使用了 `EfficientRep` 作为骨干网络，其包含 1 个 `Stem Layer` 和 4 个 `Stage Layer`，具体细节如下：
+
+- `Stem Layer` 中采用 stride=2 的 `RepVGGBlock` 替换了 stride=2 的 6×6 `ConvModule`。
+- `Stage Layer` 结构与 YOLOv5 基本相似，将每个 `Stage layer` 的 1 个 `ConvModule` 和 1 个 `CSPLayer` 分别替换为 1 个 `RepVGGBlock` 和 1 个 `RepStageBlock`，如上图 Details 部分所示。其中，第一个 `RepVGGBlock` 会做下采样和 `Channel` 维度变换，而每个 `RepStageBlock` 则由 n 个 `RepVGGBlock` 组成。此外，仍然在第 4 个 `Stage Layer` 最后增加 `SPPF` 模块后输出。
+
+在 M/L 模型中，由于模型容量进一步增大，直接使用多个 `RepVGGBlock` 堆叠的 `RepStageBlock` 结构计算量和参数量呈现指数增长。因此，为了权衡计算负担和模型精度，在 M/L 模型中使用了 `CSPBep` 骨干网络，其采用 `BepC3StageBlock` 替换了小模型中的 `RepStageBlock` 。如下图所示，`BepC3StageBlock` 由 3 个 1×1 的 `ConvModule` 和多个子块（每个子块由两个 `RepVGGBlock` 残差连接）组成。
+
+<img src="https://user-images.githubusercontent.com/58845482/208235469-a85865a5-5d15-435d-bb74-0be6f56dd03f.png" alt="image" style="zoom: 67%;" />
+
 #### 1.2.2 Neck
 
+Neck 部分结构仍然在 YOLOv5 基础上进行了模块的改动，同样采用 `RepStageBlock` 或 `BepC3StageBlock` 对原本的 `CSPLayer` 进行了替换，需要注意的是，Neck 中 `Down Sample` 部分仍然使用了 stride=2 的 3×3 `ConvModule`，而不是像 Backbone 一样替换为 `RepVGGBlock`。
+
 #### 1.2.3 Head
 
+不同于传统的 YOLO 系列检测头，YOLOv6 参考了 FCOS 和 YOLOX 中的做法，将分类和回归分支解耦成两个分支进行预测并且去掉了 obj 分支。同时，采用了 hybrid-channel 策略构建了更高效的解耦检测头，将中间 3×3 的 `ConvModule` 减少为 1 个，在维持精度的同时进一步减少了模型耗费，降低了推理延时。此外，需要说明的是，YOLOv6 在 Backobone 和 Neck 部分使用的激活函数是 `ReLU`，而在 Head 部分则使用的是 `SiLU`。
+
+由于 YOLOv6 是解耦输出，分类和 bbox 检测通过不同卷积完成。以 COCO 80 类为例：
+
+- P5 模型在输入为 640x640 分辨率情况下，其 Head 模块输出的 shape 分别为 `(B,4,80,80)`, `(B,80,80,80)`, `(B,4,40,40)`, `(B,80,40,40)`, `(B,4,20,20)`, `(B,80,20,20)`。
+
 ### 1.3 正负样本匹配策略
 
 YOLOv6 采用的标签匹配策略与 [TOOD](https://arxiv.org/abs/2108.07755)
diff --git a/docs/zh_cn/article.md b/docs/zh_cn/article.md
index 6c999e5cd..706f11d0e 100644
--- a/docs/zh_cn/article.md
+++ b/docs/zh_cn/article.md
@@ -7,18 +7,14 @@
 ### 文章
 
 - [社区协作，简洁易用，快来开箱新一代 YOLO 系列开源库](https://zhuanlan.zhihu.com/p/575615805)
-
 - [MMYOLO 社区倾情贡献，RTMDet 原理社区开发者解读来啦！](https://zhuanlan.zhihu.com/p/569777684)
-
 - [玩转 MMYOLO 基础类第一期： 配置文件太复杂？继承用法看不懂？配置全解读来了](https://zhuanlan.zhihu.com/p/577715188)
-
 - [玩转 MMYOLO 工具类第一期： 特征图可视化](https://zhuanlan.zhihu.com/p/578141381?)
-
 - [玩转 MMYOLO 实用类第二期：源码阅读和调试「必备」技巧文档](https://zhuanlan.zhihu.com/p/580885852)
-
 - [玩转 MMYOLO 基础类第二期：工程文件结构简析](https://zhuanlan.zhihu.com/p/584807195)
-
 - [玩转 MMYOLO 实用类第二期：10分钟换遍主干网络文档](https://zhuanlan.zhihu.com/p/585641598)
+- [MMYOLO 自定义数据集从标注到部署保姆级教程](https://zhuanlan.zhihu.com/p/595497726)
+- [满足一切需求的 MMYOLO 可视化：测试过程可视化](https://zhuanlan.zhihu.com/p/593179372)
 
 ### 视频
 
diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md
index c1371f379..880942c36 100644
--- a/docs/zh_cn/get_started.md
+++ b/docs/zh_cn/get_started.md
@@ -6,7 +6,8 @@
 
 | MMYOLO version |   MMDetection version    |     MMEngine version     |      MMCV version       |
 | :------------: | :----------------------: | :----------------------: | :---------------------: |
-|      main      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|      main      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.3.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.2.0      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.1.3      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.1.2      | mmdet>=3.0.0rc2, \<3.1.0 | mmengine>=0.3.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
@@ -15,7 +16,7 @@
 
 本节中，我们将演示如何用 PyTorch 准备一个环境。
 
-MMYOLO 支持在 Linux，Windows 和 macOS 上运行。它需要 Python 3.6 以上，CUDA 9.2 以上和 PyTorch 1.7 以上。
+MMYOLO 支持在 Linux，Windows 和 macOS 上运行。它需要 Python 3.7 以上，CUDA 9.2 以上和 PyTorch 1.7 以上。
 
 ```{note}
 如果你对 PyTorch 有经验并且已经安装了它，你可以直接跳转到[下一小节](#安装流程)。否则，你可以按照下述步骤进行准备
@@ -54,7 +55,7 @@ conda install pytorch torchvision cpuonly -c pytorch
 pip install -U openmim
 mim install "mmengine>=0.3.1"
 mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc3,<3.1.0"
+mim install "mmdet>=3.0.0rc5,<3.1.0"
 ```
 
 **注意：**
@@ -144,7 +145,7 @@ inference_detector(model, 'demo/demo.jpg')
 - 对于 Ampere 架构的 NVIDIA GPU，例如 GeForce 30 系列 以及 NVIDIA A100，CUDA 11 是必需的。
 - 对于更早的 NVIDIA GPU，CUDA 11 是向后兼容 (backward compatible) 的，但 CUDA 10.2 能够提供更好的兼容性，也更加轻量。
 
-请确保你的 GPU 驱动版本满足最低的版本需求，参阅 NVIDIA 官方的 [CUDA工具箱和相应的驱动版本关系表](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions__table-cuda-toolkit-driver-versions)。
+请确保你的 GPU 驱动版本满足最低的版本需求，参阅 NVIDIA 官方的 [CUDA 工具箱和相应的驱动版本关系表](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions__table-cuda-toolkit-driver-versions)。
 
 ```{note}
 如果按照我们的最佳实践进行安装，CUDA 运行时库就足够了，因为我们提供相关 CUDA 代码的预编译，不需要进行本地编译。
@@ -214,7 +215,7 @@ pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu116/t
 !pip3 install openmim
 !mim install "mmengine==0.1.0"
 !mim install "mmcv>=2.0.0rc1,<2.1.0"
-!mim install "mmdet>=3.0.0.rc1"
+!mim install "mmdet>=3.0.0rc5,<3.1.0"
 ```
 
 **步骤 2.** 使用源码安装 MMYOLO：
@@ -239,7 +240,7 @@ print(mmyolo.__version__)
 
 #### 通过 Docker 使用 MMYOLO
 
-我们提供了一个 [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/master/docker/Dockerfile) 来构建一个镜像。请确保你的 [docker版本](https://docs.docker.com/engine/install/) >=`19.03`。
+我们提供了一个 [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/master/docker/Dockerfile) 来构建一个镜像。请确保你的 [docker 版本](https://docs.docker.com/engine/install/) >=`19.03`。
 
 温馨提示；国内用户建议取消掉 [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/master/docker/Dockerfile#L19-L20) 里面 `Optional` 后两行的注释，可以获得火箭一般的下载提速：
 
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index 15bcae2a2..5ce41a6b7 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -57,6 +57,7 @@
    notes/faq.md
    notes/changelog.md
    notes/compatibility.md
+   notes/conventions.md
 
 .. toctree::
    :maxdepth: 2
diff --git a/docs/zh_cn/notes/changelog.md b/docs/zh_cn/notes/changelog.md
index ac5df1dc2..7f5a9b3d8 100644
--- a/docs/zh_cn/notes/changelog.md
+++ b/docs/zh_cn/notes/changelog.md
@@ -1,5 +1,72 @@
 # 更新日志
 
+## v0.3.0 (8/1/2023)
+
+### 亮点
+
+1. 实现了 [RTMDet](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md) 的快速版本。RTMDet-s 8xA100 训练只需要 14 个小时，训练速度相比原先版本提升 2.6 倍。
+2. 支持 [PPYOLOE](https://github.com/open-mmlab/mmyolo/blob/dev/configs/ppyoloe/README.md) 训练。
+3. 支持 [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py) 的 `iscrowd` 属性训练。
+4. 支持 [YOLOv5 正样本分配结果可视化](https://github.com/open-mmlab/mmyolo/blob/dev/projects/assigner_visualization/README.md)
+5. 新增 [YOLOv6 原理和实现全解析文档](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/algorithm_descriptions/yolov6_description.md)
+
+### 新特性
+
+01. 新增 `crowdhuman` 数据集 (#368)
+02. EasyDeploy 中支持 TensorRT 推理 (#377)
+03. 新增 `YOLOX` 结构图描述 (#402)
+04. 新增视频推理脚本 (#392)
+05. EasyDeploy 中支持 `YOLOv7` 部署 (#427)
+06. 支持从 CLI 中的特定检查点恢复训练 (#393)
+07. 将元信息字段设置为小写（#362、#412）
+08. 新增模块组合文档 (#349, #352, #345)
+09. 新增关于如何冻结 backbone 或 neck 权重的文档 (#418)
+10. 在 `how_to.md` 中添加不使用预训练权重的文档 (#404)
+11. 新增关于如何设置随机种子的文档 (#386)
+12. 将 `rtmdet_description.md` 文档翻译成英文 (#353)
+
+### Bug 修复
+
+01. 修复设置 `--class-id-txt` 时输出注释文件中的错误 (#430)
+02. 修复 `YOLOv5` head 中的批量推理错误 (#413)
+03. 修复某些 head 的类型提示（#415、#416、#443）
+04. 修复 expected a non-empty list of Tensors 错误 (#376)
+05. 修复 `YOLOv7` 训练中的设备不一致错误（#397）
+06. 修复 `LetterResize` 中的 `scale_factor` 和 `pad_param` 值 (#387)
+07. 修复 readthedocs 的 docstring 图形渲染错误 (#400)
+08. 修复 `YOLOv6` 从训练到验证时的断言错误 (#378)
+09. 修复 `np.int` 和旧版 builder.py 导致的 CI 错误 (#389)
+10. 修复 MMDeploy 重写器 (#366)
+11. 修复 MMYOLO 单元测试错误 (#351)
+12. 修复 `pad_param` 错误 (#354)
+13. 修复 head 推理两次的错误（#342）
+14. 修复自定义数据集训练 (#428)
+
+### 完善
+
+01. 更新 `useful_tools.md` (#384)
+02. 更新英文版 `custom_dataset.md` (#381)
+03. 重写函数删除上下文参数 (#395)
+04. 弃用 `np.bool` 类型别名 (#396)
+05. 为自定义数据集添加新的视频链接 (#365)
+06. 仅为模型导出 onnx (#361)
+07. 添加 MMYOLO 回归测试 yml (#359)
+08. 更新 `article.md` 中的视频教程 (#350)
+09. 添加部署 demo (#343)
+10. 优化 debug 模式下大图的可视化效果(#346)
+11. 改进 `browse_dataset` 的参数并支持 `RepeatDataset` (#340, #338)
+
+### 视频
+
+1. 发布了 [基于 sahi 的大图推理](https://www.bilibili.com/video/BV1EK411R7Ws/)
+2. 发布了 [自定义数据集从标注到部署保姆级教程](https://www.bilibili.com/video/BV1RG4y137i5)
+
+### 贡献者
+
+总共 28 位开发者参与了本次版本
+
+谢谢 @RangeKing, @PeterH0323, @Nioolek, @triple-Mu, @matrixgame2018, @xin-li-67, @tang576225574, @kitecats, @Seperendity, @diplomatist, @vaew, @wzr-skn, @VoyagerXvoyagerx, @MambaWong, @tianleiSHI, @caj-github, @zhubochao, @lvhan028, @dsghaonan, @lyviva, @yuewangg, @wang-tf, @satuoqaq, @grimoire, @RunningLeon, @hanrui1sensetime, @RangiLyu, @hhaAndroid
+
 ## v0.2.0（1/12/2022)
 
 ### 亮点
diff --git a/docs/zh_cn/notes/compatibility.md b/docs/zh_cn/notes/compatibility.md
index a3ad54d2d..a92521efc 100644
--- a/docs/zh_cn/notes/compatibility.md
+++ b/docs/zh_cn/notes/compatibility.md
@@ -2,6 +2,8 @@
 
 ## MMYOLO v0.3.0
 
+### METAINFO 修改
+
 为了和 OpenMMLab 其他仓库统一，将 Dataset 里 `METAINFO` 的所有键从大写改为小写。
 
 | 在 v0.3.0 之前 | v0.3.0 及之后 |
@@ -9,3 +11,37 @@
 |    CLASSES     |    classes    |
 |    PALETTE     |    palette    |
 |  DATASET_TYPE  | dataset_type  |
+
+### 关于图片 shape 顺序的说明
+
+在 OpenMMLab 2.0 中， 为了与 OpenCV 的输入参数相一致，图片处理 pipeline 中关于图像 shape 的输入参数总是以 `(width, height)` 的顺序排列。
+相反，为了计算方便，经过 pipeline 和 model 的字段的顺序是 `(height, width)`。具体来说在每个数据 pipeline 处理的结果中，字段和它们的值含义如下：
+
+- img_shape: (height, width)
+- ori_shape: (height, width)
+- pad_shape: (height, width)
+- batch_input_shape: (height, width)
+
+以 `Mosaic` 为例，其初始化参数如下所示：
+
+```python
+@TRANSFORMS.register_module()
+class Mosaic(BaseTransform):
+    def __init__(self,
+                img_scale: Tuple[int, int] = (640, 640),
+                center_ratio_range: Tuple[float, float] = (0.5, 1.5),
+                bbox_clip_border: bool = True,
+                pad_val: float = 114.0,
+                prob: float = 1.0) -> None:
+       ...
+
+       # img_scale 顺序应该是 (width, height)
+       self.img_scale = img_scale
+
+    def transform(self, results: dict) -> dict:
+        ...
+
+        results['img'] = mosaic_img
+        # (height, width)
+        results['img_shape'] = mosaic_img.shape[:2]
+```
diff --git a/docs/zh_cn/notes/conventions.md b/docs/zh_cn/notes/conventions.md
new file mode 100644
index 000000000..7c2370ffb
--- /dev/null
+++ b/docs/zh_cn/notes/conventions.md
@@ -0,0 +1,37 @@
+# 默认约定
+
+如果你想把 MMYOLO 修改为自己的项目，请遵循下面的约定。
+
+## 关于图片 shape 顺序的说明
+
+在OpenMMLab 2.0中， 为了与 OpenCV 的输入参数相一致，图片处理 pipeline 中关于图像 shape 的输入参数总是以 `(width, height)` 的顺序排列。
+相反，为了计算方便，经过 pipeline 和 model 的字段的顺序是 `(height, width)`。具体来说在每个数据 pipeline 处理的结果中，字段和它们的值含义如下：
+
+- img_shape: (height, width)
+- ori_shape: (height, width)
+- pad_shape: (height, width)
+- batch_input_shape: (height, width)
+
+以 `Mosaic` 为例，其初始化参数如下所示：
+
+```python
+@TRANSFORMS.register_module()
+class Mosaic(BaseTransform):
+    def __init__(self,
+                img_scale: Tuple[int, int] = (640, 640),
+                center_ratio_range: Tuple[float, float] = (0.5, 1.5),
+                bbox_clip_border: bool = True,
+                pad_val: float = 114.0,
+                prob: float = 1.0) -> None:
+       ...
+
+       # img_scale 顺序应该是 (width, height)
+       self.img_scale = img_scale
+
+    def transform(self, results: dict) -> dict:
+        ...
+
+        results['img'] = mosaic_img
+        # (height, width)
+        results['img_shape'] = mosaic_img.shape[:2]
+```
diff --git a/docs/zh_cn/overview.md b/docs/zh_cn/overview.md
index 1515b038b..6856b132f 100644
--- a/docs/zh_cn/overview.md
+++ b/docs/zh_cn/overview.md
@@ -51,8 +51,9 @@ MMYOLO 文件结构和 MMDetection 完全一致。为了能够充分复用 MMDet
 
 5. 参考以下教程深入了解：
 
-   - [数据流](https://mmyolo.readthedocs.io/zh_CN/latest/advanced_guides/index.html#id1)
+   - [模块组合](https://mmyolo.readthedocs.io/zh_CN/latest/advanced_guides/index.html#id1)
+   - [数据流](https://mmyolo.readthedocs.io/zh_CN/latest/advanced_guides/index.html#id2)
    - [How to](https://mmyolo.readthedocs.io/zh_CN/latest/advanced_guides/index.html#how-to)
-   - [插件](https://mmyolo.readthedocs.io/zh_CN/latest/advanced_guides/index.html#id3)
+   - [插件](https://mmyolo.readthedocs.io/zh_CN/latest/advanced_guides/index.html#id4)
 
 6. [解读文章和资源汇总](article.md)
diff --git a/docs/zh_cn/user_guides/useful_tools.md b/docs/zh_cn/user_guides/useful_tools.md
index 259854274..56243ed28 100644
--- a/docs/zh_cn/user_guides/useful_tools.md
+++ b/docs/zh_cn/user_guides/useful_tools.md
@@ -75,47 +75,62 @@ python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
 
 ### 可视化数据集
 
-脚本 `tools/analysis_tools/browse_dataset.py` 能够帮助用户去直接窗口可视化 config 配置中数据处理部分，同时可以选择保存可视化图片到指定文件夹内。
-
 ```shell
-python tools/analysis_tools/browse_dataset.py ${CONFIG} \
-                                              [--out-dir ${OUT_DIR}] \
-                                              [--not-show] \
-                                              [--show-interval ${SHOW_INTERVAL}]
+python tools/analysis_tools/browse_dataset.py \
+    ${CONFIG_FILE} \
+    [-o, --output-dir ${OUTPUT_DIR}] \
+    [-p, --phase ${DATASET_PHASE}] \
+    [-n, --show-number ${NUMBER_IMAGES_DISPLAY}] \
+    [-i, --show-interval ${SHOW_INTERRVAL}] \
+    [-m, --mode ${DISPLAY_MODE}] \
+    [--cfg-options ${CFG_OPTIONS}]
 ```
 
-例子：
+**所有参数的说明**：
 
-1. 使用 `config` 文件 `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` 可视化图片，图片直接弹出显示，同时保存到目录 `work_dirs/browse_dataset`：
+- `config` : 模型配置文件的路径。
+- `-o, --output-dir`: 保存图片文件夹，如果没有指定，默认为 `'./output'`。
+- **`-p, --phase`**: 可视化数据集的阶段，只能为 `['train', 'val', 'test']` 之一，默认为 `'train'`。
+- **`-n, --show-number`**: 可视化样本数量。如果没有指定，默认展示数据集的所有图片。
+- **`-m, --mode`**: 可视化的模式，只能为 `['original', 'transformed', 'pipeline']` 之一。 默认为 `'transformed'`。
+- `--cfg-options` : 对配置文件的修改，参考[学习配置文件](./config.md)。
 
 ```shell
-python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
-                                              --out-dir 'work_dirs/browse_dataset'
+`-m, --mode` 用于设置可视化的模式，默认设置为 'transformed'。
+- 如果 `--mode` 设置为 'original'，则获取原始图片；
+- 如果 `--mode` 设置为 'transformed'，则获取预处理后的图片；
+- 如果 `--mode` 设置为 'pipeline'，则获得数据流水线所有中间过程图片。
 ```
 
-2. 使用 `config` 文件 `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` 可视化图片，图片直接弹出显示，每张图片持续 `10` 秒，同时保存到目录 `work_dirs/browse_dataset`：
+**示例**：
+
+1. **'original'** 模式 ：
 
 ```shell
-python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
-                                              --out-dir 'work_dirs/browse_dataset' \
-                                              --show-interval 10
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py --phase val --output-dir tmp --mode original
 ```
 
-3. 使用 `config` 文件 `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` 可视化图片，图片直接弹出显示，每张图片持续 `10` 秒，图片不进行保存：
+- `--phase val`: 可视化验证集, 可简化为 `-p val`;
+- `--output-dir tmp`: 可视化结果保存在 "tmp" 文件夹, 可简化为 `-o tmp`;
+- `--mode original`: 可视化原图, 可简化为 `-m original`;
+- `--show-number 100`: 可视化100张图，可简化为 `-n 100`;
+
+2.**'transformed'** 模式 ：
 
 ```shell
-python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
-                                              --show-interval 10
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py
 ```
 
-4. 使用 `config` 文件 `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` 可视化图片，图片不直接弹出显示，仅保存到目录 `work_dirs/browse_dataset`：
+3.**'pipeline'** 模式 ：
 
 ```shell
-python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
-                                              --out-dir 'work_dirs/browse_dataset' \
-                                              --not-show
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py -m pipeline
 ```
 
+<div align=center>
+<img src="https://user-images.githubusercontent.com/45811724/204810831-0fbc7f1c-0951-4be1-a11c-491cf0d194f6.png" alt="Image">
+</div>
+
 ### 可视化数据集分析
 
 脚本 `tools/analysis_tools/dataset_analysis.py` 能够帮助用户得到四种功能的结果图，并将图片保存到当前运行目录下的 `dataset_analysis` 文件夹中。
diff --git a/docs/zh_cn/user_guides/yolov5_tutorial.md b/docs/zh_cn/user_guides/yolov5_tutorial.md
index 200498995..411c4cb44 100644
--- a/docs/zh_cn/user_guides/yolov5_tutorial.md
+++ b/docs/zh_cn/user_guides/yolov5_tutorial.md
@@ -12,7 +12,7 @@ conda install pytorch torchvision -c pytorch
 pip install -U openmim
 mim install "mmengine>=0.3.1"
 mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc3,<3.1.0"
+mim install "mmdet>=3.0.0rc5,<3.1.0"
 git clone https://github.com/open-mmlab/mmyolo.git
 cd mmyolo
 # Install albumentations
diff --git a/mmyolo/__init__.py b/mmyolo/__init__.py
index 67367292c..757c4084d 100644
--- a/mmyolo/__init__.py
+++ b/mmyolo/__init__.py
@@ -14,7 +14,7 @@
 mmengine_maximum_version = '1.0.0'
 mmengine_version = digit_version(mmengine.__version__)
 
-mmdet_minimum_version = '3.0.0rc3'
+mmdet_minimum_version = '3.0.0rc5'
 mmdet_maximum_version = '3.1.0'
 mmdet_version = digit_version(mmdet.__version__)
 
diff --git a/mmyolo/datasets/__init__.py b/mmyolo/datasets/__init__.py
index 592535eb0..7d99e375d 100644
--- a/mmyolo/datasets/__init__.py
+++ b/mmyolo/datasets/__init__.py
@@ -1,11 +1,11 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .transforms import *  # noqa: F401,F403
 from .utils import BatchShapePolicy, yolov5_collate
-from .yolov5_coco import YOLOv5CocoDataset
+from .yolov5_coco import PPYOLOECocoDataset, YOLOv5CocoDataset
 from .yolov5_crowdhuman import YOLOv5CrowdHumanDataset
 from .yolov5_voc import YOLOv5VOCDataset
 
 __all__ = [
     'YOLOv5CocoDataset', 'YOLOv5VOCDataset', 'BatchShapePolicy',
-    'yolov5_collate', 'YOLOv5CrowdHumanDataset'
+    'yolov5_collate', 'YOLOv5CrowdHumanDataset', 'PPYOLOECocoDataset'
 ]
diff --git a/mmyolo/datasets/transforms/__init__.py b/mmyolo/datasets/transforms/__init__.py
index 842ad641a..ea1cd41e0 100644
--- a/mmyolo/datasets/transforms/__init__.py
+++ b/mmyolo/datasets/transforms/__init__.py
@@ -1,10 +1,12 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .mix_img_transforms import Mosaic, Mosaic9, YOLOv5MixUp, YOLOXMixUp
-from .transforms import (LetterResize, LoadAnnotations, YOLOv5HSVRandomAug,
+from .transforms import (LetterResize, LoadAnnotations, PPYOLOERandomCrop,
+                         PPYOLOERandomDistort, YOLOv5HSVRandomAug,
                          YOLOv5KeepRatioResize, YOLOv5RandomAffine)
 
 __all__ = [
     'YOLOv5KeepRatioResize', 'LetterResize', 'Mosaic', 'YOLOXMixUp',
     'YOLOv5MixUp', 'YOLOv5HSVRandomAug', 'LoadAnnotations',
-    'YOLOv5RandomAffine', 'Mosaic9'
+    'YOLOv5RandomAffine', 'PPYOLOERandomDistort', 'PPYOLOERandomCrop',
+    'Mosaic9'
 ]
diff --git a/mmyolo/datasets/transforms/mix_img_transforms.py b/mmyolo/datasets/transforms/mix_img_transforms.py
index 5fe730b08..9cd5ad981 100644
--- a/mmyolo/datasets/transforms/mix_img_transforms.py
+++ b/mmyolo/datasets/transforms/mix_img_transforms.py
@@ -234,7 +234,7 @@ class Mosaic(BaseMixImageTransform):
 
     Args:
         img_scale (Sequence[int]): Image size after mosaic pipeline of single
-            image. The shape order should be (height, width).
+            image. The shape order should be (width, height).
             Defaults to (640, 640).
         center_ratio_range (Sequence[float]): Center ratio range of mosaic
             output. Defaults to (0.5, 1.5).
@@ -317,22 +317,22 @@ def mix_img_transform(self, results: dict) -> dict:
         mosaic_bboxes = []
         mosaic_bboxes_labels = []
         mosaic_ignore_flags = []
+        # self.img_scale is wh format
+        img_scale_w, img_scale_h = self.img_scale
+
         if len(results['img'].shape) == 3:
             mosaic_img = np.full(
-                (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2), 3),
+                (int(img_scale_h * 2), int(img_scale_w * 2), 3),
                 self.pad_val,
                 dtype=results['img'].dtype)
         else:
-            mosaic_img = np.full(
-                (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2)),
-                self.pad_val,
-                dtype=results['img'].dtype)
+            mosaic_img = np.full((int(img_scale_h * 2), int(img_scale_w * 2)),
+                                 self.pad_val,
+                                 dtype=results['img'].dtype)
 
         # mosaic center x, y
-        center_x = int(
-            random.uniform(*self.center_ratio_range) * self.img_scale[1])
-        center_y = int(
-            random.uniform(*self.center_ratio_range) * self.img_scale[0])
+        center_x = int(random.uniform(*self.center_ratio_range) * img_scale_w)
+        center_y = int(random.uniform(*self.center_ratio_range) * img_scale_h)
         center_position = (center_x, center_y)
 
         loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right')
@@ -345,8 +345,7 @@ def mix_img_transform(self, results: dict) -> dict:
             img_i = results_patch['img']
             h_i, w_i = img_i.shape[:2]
             # keep_ratio resize
-            scale_ratio_i = min(self.img_scale[0] / h_i,
-                                self.img_scale[1] / w_i)
+            scale_ratio_i = min(img_scale_h / h_i, img_scale_w / w_i)
             img_i = mmcv.imresize(
                 img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i)))
 
@@ -377,11 +376,11 @@ def mix_img_transform(self, results: dict) -> dict:
         mosaic_ignore_flags = np.concatenate(mosaic_ignore_flags, 0)
 
         if self.bbox_clip_border:
-            mosaic_bboxes.clip_([2 * self.img_scale[0], 2 * self.img_scale[1]])
+            mosaic_bboxes.clip_([2 * img_scale_h, 2 * img_scale_w])
         else:
             # remove outside bboxes
             inside_inds = mosaic_bboxes.is_inside(
-                [2 * self.img_scale[0], 2 * self.img_scale[1]]).numpy()
+                [2 * img_scale_h, 2 * img_scale_w]).numpy()
             mosaic_bboxes = mosaic_bboxes[inside_inds]
             mosaic_bboxes_labels = mosaic_bboxes_labels[inside_inds]
             mosaic_ignore_flags = mosaic_ignore_flags[inside_inds]
@@ -427,7 +426,7 @@ def _mosaic_combine(
             x1, y1, x2, y2 = center_position_xy[0], \
                              max(center_position_xy[1] - img_shape_wh[1], 0), \
                              min(center_position_xy[0] + img_shape_wh[0],
-                                 self.img_scale[1] * 2), \
+                                 self.img_scale[0] * 2), \
                              center_position_xy[1]
             crop_coord = 0, img_shape_wh[1] - (y2 - y1), min(
                 img_shape_wh[0], x2 - x1), img_shape_wh[1]
@@ -437,7 +436,7 @@ def _mosaic_combine(
             x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \
                              center_position_xy[1], \
                              center_position_xy[0], \
-                             min(self.img_scale[0] * 2, center_position_xy[1] +
+                             min(self.img_scale[1] * 2, center_position_xy[1] +
                                  img_shape_wh[1])
             crop_coord = img_shape_wh[0] - (x2 - x1), 0, img_shape_wh[0], min(
                 y2 - y1, img_shape_wh[1])
@@ -447,8 +446,8 @@ def _mosaic_combine(
             x1, y1, x2, y2 = center_position_xy[0], \
                              center_position_xy[1], \
                              min(center_position_xy[0] + img_shape_wh[0],
-                                 self.img_scale[1] * 2), \
-                             min(self.img_scale[0] * 2, center_position_xy[1] +
+                                 self.img_scale[0] * 2), \
+                             min(self.img_scale[1] * 2, center_position_xy[1] +
                                  img_shape_wh[1])
             crop_coord = 0, 0, min(img_shape_wh[0],
                                    x2 - x1), min(y2 - y1, img_shape_wh[1])
@@ -519,7 +518,7 @@ class Mosaic9(BaseMixImageTransform):
 
     Args:
         img_scale (Sequence[int]): Image size after mosaic pipeline of single
-            image. The shape order should be (height, width).
+            image. The shape order should be (width, height).
             Defaults to (640, 640).
         bbox_clip_border (bool, optional): Whether to clip the objects outside
             the border of the image. In some dataset like MOT17, the gt bboxes
@@ -605,7 +604,7 @@ def mix_img_transform(self, results: dict) -> dict:
         mosaic_bboxes_labels = []
         mosaic_ignore_flags = []
 
-        img_scale_h, img_scale_w = self.img_scale
+        img_scale_w, img_scale_h = self.img_scale
 
         if len(results['img'].shape) == 3:
             mosaic_img = np.full(
@@ -691,7 +690,7 @@ def _mosaic_combine(self, loc: str,
         assert loc in ('center', 'top', 'top_right', 'right', 'bottom_right',
                        'bottom', 'bottom_left', 'left', 'top_left')
 
-        img_scale_h, img_scale_w = self.img_scale
+        img_scale_w, img_scale_h = self.img_scale
 
         self._current_img_shape = img_shape_hw
         current_img_h, current_img_w = self._current_img_shape
@@ -934,7 +933,7 @@ class YOLOXMixUp(BaseMixImageTransform):
 
     Args:
         img_scale (Sequence[int]): Image output size after mixup pipeline.
-            The shape order should be (height, width). Defaults to (640, 640).
+            The shape order should be (width, height). Defaults to (640, 640).
         ratio_range (Sequence[float]): Scale ratio of mixup image.
             Defaults to (0.5, 1.5).
         flip_ratio (float): Horizontal flip ratio of mixup image.
@@ -1025,15 +1024,15 @@ def mix_img_transform(self, results: dict) -> dict:
         is_filp = random.uniform(0, 1) > self.flip_ratio
 
         if len(retrieve_img.shape) == 3:
-            out_img = np.ones((self.img_scale[0], self.img_scale[1], 3),
+            out_img = np.ones((self.img_scale[1], self.img_scale[0], 3),
                               dtype=retrieve_img.dtype) * self.pad_val
         else:
             out_img = np.ones(
-                self.img_scale, dtype=retrieve_img.dtype) * self.pad_val
+                self.img_scale[::-1], dtype=retrieve_img.dtype) * self.pad_val
 
         # 1. keep_ratio resize
-        scale_ratio = min(self.img_scale[0] / retrieve_img.shape[0],
-                          self.img_scale[1] / retrieve_img.shape[1])
+        scale_ratio = min(self.img_scale[1] / retrieve_img.shape[0],
+                          self.img_scale[0] / retrieve_img.shape[1])
         retrieve_img = mmcv.imresize(
             retrieve_img, (int(retrieve_img.shape[1] * scale_ratio),
                            int(retrieve_img.shape[0] * scale_ratio)))
diff --git a/mmyolo/datasets/transforms/transforms.py b/mmyolo/datasets/transforms/transforms.py
index e970c97aa..720f7756c 100644
--- a/mmyolo/datasets/transforms/transforms.py
+++ b/mmyolo/datasets/transforms/transforms.py
@@ -1,6 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import math
-from typing import Tuple, Union
+from typing import List, Tuple, Union
 
 import cv2
 import mmcv
@@ -166,9 +166,9 @@ def _resize_img(self, results: dict):
 
         # Use batch_shape if a batch_shape policy is configured
         if 'batch_shape' in results:
-            scale = tuple(results['batch_shape'])
+            scale = tuple(results['batch_shape'])  # hw
         else:
-            scale = self.scale
+            scale = self.scale[::-1]  # wh -> hw
 
         image_shape = image.shape[:2]  # height, width
 
@@ -240,7 +240,7 @@ def _resize_img(self, results: dict):
         results['img_shape'] = image.shape
         if 'pad_param' in results:
             results['pad_param_origin'] = results['pad_param'] * \
-                np.repeat(scale_factor, 2)
+                np.repeat(ratio, 2)
         results['pad_param'] = np.array(padding_list, dtype=np.float32)
 
     def _resize_masks(self, results: dict):
@@ -250,9 +250,9 @@ def _resize_masks(self, results: dict):
 
         # resize the gt_masks
         gt_mask_height = results['gt_masks'].height * \
-            results['scale_factor'][0]
-        gt_mask_width = results['gt_masks'].width * \
             results['scale_factor'][1]
+        gt_mask_width = results['gt_masks'].width * \
+            results['scale_factor'][0]
         gt_masks = results['gt_masks'].resize(
             (int(round(gt_mask_height)), int(round(gt_mask_width))))
 
@@ -441,7 +441,7 @@ class YOLOv5RandomAffine(BaseTransform):
             scaling transform. Defaults to (0.5, 1.5).
         max_shear_degree (float): Maximum degrees of shear
             transform. Defaults to 2.
-        border (tuple[int]): Distance from height and width sides of input
+        border (tuple[int]): Distance from width and height sides of input
             image to adjust output shape. Only used in mosaic dataset.
             Defaults to (0, 0).
         border_val (tuple[int]): Border padding values of 3 channels.
@@ -529,8 +529,9 @@ def transform(self, results: dict) -> dict:
             dict: The result dict.
         """
         img = results['img']
-        height = img.shape[0] + self.border[0] * 2
-        width = img.shape[1] + self.border[1] * 2
+        # self.border is wh format
+        height = img.shape[0] + self.border[1] * 2
+        width = img.shape[1] + self.border[0] * 2
 
         # Note: Different from YOLOX
         center_matrix = np.eye(3, dtype=np.float32)
@@ -675,3 +676,397 @@ def _get_translation_matrix(x: float, y: float) -> np.ndarray:
         translation_matrix = np.array([[1, 0., x], [0., 1, y], [0., 0., 1.]],
                                       dtype=np.float32)
         return translation_matrix
+
+
+@TRANSFORMS.register_module()
+class PPYOLOERandomDistort(BaseTransform):
+    """Random hue, saturation, contrast and brightness distortion.
+
+    Required Keys:
+
+    - img
+
+    Modified Keys:
+
+    - img (np.float32)
+
+    Args:
+        hue_cfg (dict): Hue settings. Defaults to dict(min=-18,
+            max=18, prob=0.5).
+        saturation_cfg (dict): Saturation settings. Defaults to dict(
+            min=0.5, max=1.5, prob=0.5).
+        contrast_cfg (dict): Contrast settings. Defaults to dict(
+            min=0.5, max=1.5, prob=0.5).
+        brightness_cfg (dict): Brightness settings. Defaults to dict(
+            min=0.5, max=1.5, prob=0.5).
+        num_distort_func (int): The number of distort function. Defaults
+            to 4.
+    """
+
+    def __init__(self,
+                 hue_cfg: dict = dict(min=-18, max=18, prob=0.5),
+                 saturation_cfg: dict = dict(min=0.5, max=1.5, prob=0.5),
+                 contrast_cfg: dict = dict(min=0.5, max=1.5, prob=0.5),
+                 brightness_cfg: dict = dict(min=0.5, max=1.5, prob=0.5),
+                 num_distort_func: int = 4):
+        self.hue_cfg = hue_cfg
+        self.saturation_cfg = saturation_cfg
+        self.contrast_cfg = contrast_cfg
+        self.brightness_cfg = brightness_cfg
+        self.num_distort_func = num_distort_func
+        assert 0 < self.num_distort_func <= 4,\
+            'num_distort_func must > 0 and <= 4'
+        for cfg in [
+                self.hue_cfg, self.saturation_cfg, self.contrast_cfg,
+                self.brightness_cfg
+        ]:
+            assert 0. <= cfg['prob'] <= 1., 'prob must >=0 and <=1'
+
+    def transform_hue(self, results):
+        """Transform hue randomly."""
+        if random.uniform(0., 1.) >= self.hue_cfg['prob']:
+            return results
+        img = results['img']
+        delta = random.uniform(self.hue_cfg['min'], self.hue_cfg['max'])
+        u = np.cos(delta * np.pi)
+        w = np.sin(delta * np.pi)
+        delta_iq = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
+        rgb2yiq_matrix = np.array([[0.114, 0.587, 0.299],
+                                   [-0.321, -0.274, 0.596],
+                                   [0.311, -0.523, 0.211]])
+        yiq2rgb_matric = np.array([[1.0, -1.107, 1.705], [1.0, -0.272, -0.647],
+                                   [1.0, 0.956, 0.621]])
+        t = np.dot(np.dot(yiq2rgb_matric, delta_iq), rgb2yiq_matrix).T
+        img = np.dot(img, t)
+        results['img'] = img
+        return results
+
+    def transform_saturation(self, results):
+        """Transform saturation randomly."""
+        if random.uniform(0., 1.) >= self.saturation_cfg['prob']:
+            return results
+        img = results['img']
+        delta = random.uniform(self.saturation_cfg['min'],
+                               self.saturation_cfg['max'])
+
+        # convert bgr img to gray img
+        gray = img * np.array([[[0.114, 0.587, 0.299]]], dtype=np.float32)
+        gray = gray.sum(axis=2, keepdims=True)
+        gray *= (1.0 - delta)
+        img *= delta
+        img += gray
+        results['img'] = img
+        return results
+
+    def transform_contrast(self, results):
+        """Transform contrast randomly."""
+        if random.uniform(0., 1.) >= self.contrast_cfg['prob']:
+            return results
+        img = results['img']
+        delta = random.uniform(self.contrast_cfg['min'],
+                               self.contrast_cfg['max'])
+        img *= delta
+        results['img'] = img
+        return results
+
+    def transform_brightness(self, results):
+        """Transform brightness randomly."""
+        if random.uniform(0., 1.) >= self.brightness_cfg['prob']:
+            return results
+        img = results['img']
+        delta = random.uniform(self.brightness_cfg['min'],
+                               self.brightness_cfg['max'])
+        img += delta
+        results['img'] = img
+        return results
+
+    def transform(self, results: dict) -> dict:
+        """The hue, saturation, contrast and brightness distortion function.
+
+        Args:
+            results (dict): The result dict.
+
+        Returns:
+            dict: The result dict.
+        """
+        results['img'] = results['img'].astype(np.float32)
+
+        functions = [
+            self.transform_brightness, self.transform_contrast,
+            self.transform_saturation, self.transform_hue
+        ]
+        distortions = random.permutation(functions)[:self.num_distort_func]
+        for func in distortions:
+            results = func(results)
+        return results
+
+
+@TRANSFORMS.register_module()
+class PPYOLOERandomCrop(BaseTransform):
+    """Random crop the img and bboxes. Different thresholds are used in PPYOLOE
+    to judge whether the clipped image meets the requirements. This
+    implementation is different from the implementation of RandomCrop in mmdet.
+
+    Required Keys:
+
+    - img
+    - gt_bboxes (BaseBoxes[torch.float32]) (optional)
+    - gt_bboxes_labels (np.int64) (optional)
+    - gt_ignore_flags (bool) (optional)
+
+    Modified Keys:
+
+    - img
+    - img_shape
+    - gt_bboxes (optional)
+    - gt_bboxes_labels (optional)
+    - gt_ignore_flags (optional)
+
+    Added Keys:
+    - pad_param (np.float32)
+
+    Args:
+        aspect_ratio (List[float]): Aspect ratio of cropped region. Default to
+             [.5, 2].
+        thresholds (List[float]): Iou thresholds for decide a valid bbox crop
+            in [min, max] format. Defaults to [.0, .1, .3, .5, .7, .9].
+        scaling (List[float]): Ratio between a cropped region and the original
+            image in [min, max] format. Default to [.3, 1.].
+        num_attempts (int): Number of tries for each threshold before
+            giving up. Default to 50.
+        allow_no_crop (bool): Allow return without actually cropping them.
+            Default to True.
+        cover_all_box (bool): Ensure all bboxes are covered in the final crop.
+            Default to False.
+    """
+
+    def __init__(self,
+                 aspect_ratio: List[float] = [.5, 2.],
+                 thresholds: List[float] = [.0, .1, .3, .5, .7, .9],
+                 scaling: List[float] = [.3, 1.],
+                 num_attempts: int = 50,
+                 allow_no_crop: bool = True,
+                 cover_all_box: bool = False):
+        self.aspect_ratio = aspect_ratio
+        self.thresholds = thresholds
+        self.scaling = scaling
+        self.num_attempts = num_attempts
+        self.allow_no_crop = allow_no_crop
+        self.cover_all_box = cover_all_box
+
+    def _crop_data(self, results: dict, crop_box: Tuple[int, int, int, int],
+                   valid_inds: np.ndarray) -> Union[dict, None]:
+        """Function to randomly crop images, bounding boxes, masks, semantic
+        segmentation maps.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+            crop_box (Tuple[int, int, int, int]): Expected absolute coordinates
+                for cropping, (x1, y1, x2, y2).
+            valid_inds (np.ndarray): The indexes of gt that needs to be
+                retained.
+
+        Returns:
+            results (Union[dict, None]): Randomly cropped results, 'img_shape'
+                key in result dict is updated according to crop size. None will
+                be returned when there is no valid bbox after cropping.
+        """
+        # crop the image
+        img = results['img']
+        crop_x1, crop_y1, crop_x2, crop_y2 = crop_box
+        img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
+        results['img'] = img
+        img_shape = img.shape
+        results['img_shape'] = img.shape
+
+        # crop bboxes accordingly and clip to the image boundary
+        if results.get('gt_bboxes', None) is not None:
+            bboxes = results['gt_bboxes']
+            bboxes.translate_([-crop_x1, -crop_y1])
+            bboxes.clip_(img_shape[:2])
+
+            results['gt_bboxes'] = bboxes[valid_inds]
+
+            if results.get('gt_ignore_flags', None) is not None:
+                results['gt_ignore_flags'] = \
+                    results['gt_ignore_flags'][valid_inds]
+
+            if results.get('gt_bboxes_labels', None) is not None:
+                results['gt_bboxes_labels'] = \
+                    results['gt_bboxes_labels'][valid_inds]
+
+            if results.get('gt_masks', None) is not None:
+                results['gt_masks'] = results['gt_masks'][
+                    valid_inds.nonzero()[0]].crop(
+                        np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]))
+
+        # crop semantic seg
+        if results.get('gt_seg_map', None) is not None:
+            results['gt_seg_map'] = results['gt_seg_map'][crop_y1:crop_y2,
+                                                          crop_x1:crop_x2]
+
+        return results
+
+    @autocast_box_type()
+    def transform(self, results: dict) -> Union[dict, None]:
+        """The random crop transform function.
+
+        Args:
+            results (dict): The result dict.
+
+        Returns:
+            dict: The result dict.
+        """
+        if results.get('gt_bboxes', None) is None or len(
+                results['gt_bboxes']) == 0:
+            return results
+
+        orig_img_h, orig_img_w = results['img'].shape[:2]
+        gt_bboxes = results['gt_bboxes']
+
+        thresholds = list(self.thresholds)
+        if self.allow_no_crop:
+            thresholds.append('no_crop')
+        random.shuffle(thresholds)
+
+        for thresh in thresholds:
+            # Determine the coordinates for cropping
+            if thresh == 'no_crop':
+                return results
+
+            found = False
+            for i in range(self.num_attempts):
+                crop_h, crop_w = self._get_crop_size((orig_img_h, orig_img_w))
+                if self.aspect_ratio is None:
+                    if crop_h / crop_w < 0.5 or crop_h / crop_w > 2.0:
+                        continue
+
+                # get image crop_box
+                margin_h = max(orig_img_h - crop_h, 0)
+                margin_w = max(orig_img_w - crop_w, 0)
+                offset_h, offset_w = self._rand_offset((margin_h, margin_w))
+                crop_y1, crop_y2 = offset_h, offset_h + crop_h
+                crop_x1, crop_x2 = offset_w, offset_w + crop_w
+
+                crop_box = [crop_x1, crop_y1, crop_x2, crop_y2]
+                # Calculate the iou between gt_bboxes and crop_boxes
+                iou = self._iou_matrix(gt_bboxes,
+                                       np.array([crop_box], dtype=np.float32))
+                # If the maximum value of the iou is less than thresh,
+                # the current crop_box is considered invalid.
+                if iou.max() < thresh:
+                    continue
+
+                # If cover_all_box == True and the minimum value of
+                # the iou is less than thresh, the current crop_box
+                # is considered invalid.
+                if self.cover_all_box and iou.min() < thresh:
+                    continue
+
+                # Get which gt_bboxes to keep after cropping.
+                valid_inds = self._get_valid_inds(
+                    gt_bboxes, np.array(crop_box, dtype=np.float32))
+                if valid_inds.size > 0:
+                    found = True
+                    break
+
+            if found:
+                results = self._crop_data(results, crop_box, valid_inds)
+                return results
+        return results
+
+    @cache_randomness
+    def _rand_offset(self, margin: Tuple[int, int]) -> Tuple[int, int]:
+        """Randomly generate crop offset.
+
+        Args:
+            margin (Tuple[int, int]): The upper bound for the offset generated
+                randomly.
+
+        Returns:
+            Tuple[int, int]: The random offset for the crop.
+        """
+        margin_h, margin_w = margin
+        offset_h = np.random.randint(0, margin_h + 1)
+        offset_w = np.random.randint(0, margin_w + 1)
+
+        return (offset_h, offset_w)
+
+    @cache_randomness
+    def _get_crop_size(self, image_size: Tuple[int, int]) -> Tuple[int, int]:
+        """Randomly generates the crop size based on `image_size`.
+
+        Args:
+            image_size (Tuple[int, int]): (h, w).
+
+        Returns:
+            crop_size (Tuple[int, int]): (crop_h, crop_w) in absolute pixels.
+        """
+        h, w = image_size
+        scale = random.uniform(*self.scaling)
+        if self.aspect_ratio is not None:
+            min_ar, max_ar = self.aspect_ratio
+            aspect_ratio = random.uniform(
+                max(min_ar, scale**2), min(max_ar, scale**-2))
+            h_scale = scale / np.sqrt(aspect_ratio)
+            w_scale = scale * np.sqrt(aspect_ratio)
+        else:
+            h_scale = random.uniform(*self.scaling)
+            w_scale = random.uniform(*self.scaling)
+        crop_h = h * h_scale
+        crop_w = w * w_scale
+        return int(crop_h), int(crop_w)
+
+    def _iou_matrix(self,
+                    gt_bbox: HorizontalBoxes,
+                    crop_bbox: np.ndarray,
+                    eps: float = 1e-10) -> np.ndarray:
+        """Calculate iou between gt and image crop box.
+
+        Args:
+            gt_bbox (HorizontalBoxes): Ground truth bounding boxes.
+            crop_bbox (np.ndarray): Image crop coordinates in
+                [x1, y1, x2, y2] format.
+            eps (float): Default to 1e-10.
+        Return:
+            (np.ndarray): IoU.
+        """
+        gt_bbox = gt_bbox.tensor.numpy()
+        lefttop = np.maximum(gt_bbox[:, np.newaxis, :2], crop_bbox[:, :2])
+        rightbottom = np.minimum(gt_bbox[:, np.newaxis, 2:], crop_bbox[:, 2:])
+
+        overlap = np.prod(
+            rightbottom - lefttop,
+            axis=2) * (lefttop < rightbottom).all(axis=2)
+        area_gt_bbox = np.prod(gt_bbox[:, 2:] - crop_bbox[:, :2], axis=1)
+        area_crop_bbox = np.prod(gt_bbox[:, 2:] - crop_bbox[:, :2], axis=1)
+        area_o = (area_gt_bbox[:, np.newaxis] + area_crop_bbox - overlap)
+        return overlap / (area_o + eps)
+
+    def _get_valid_inds(self, gt_bbox: HorizontalBoxes,
+                        img_crop_bbox: np.ndarray) -> np.ndarray:
+        """Get which Bboxes to keep at the current cropping coordinates.
+
+        Args:
+            gt_bbox (HorizontalBoxes): Ground truth bounding boxes.
+            img_crop_bbox (np.ndarray): Image crop coordinates in
+                [x1, y1, x2, y2] format.
+
+        Returns:
+            (np.ndarray): Valid indexes.
+        """
+        cropped_box = gt_bbox.tensor.numpy().copy()
+        gt_bbox = gt_bbox.tensor.numpy().copy()
+
+        cropped_box[:, :2] = np.maximum(gt_bbox[:, :2], img_crop_bbox[:2])
+        cropped_box[:, 2:] = np.minimum(gt_bbox[:, 2:], img_crop_bbox[2:])
+        cropped_box[:, :2] -= img_crop_bbox[:2]
+        cropped_box[:, 2:] -= img_crop_bbox[:2]
+
+        centers = (gt_bbox[:, :2] + gt_bbox[:, 2:]) / 2
+        valid = np.logical_and(img_crop_bbox[:2] <= centers,
+                               centers < img_crop_bbox[2:]).all(axis=1)
+        valid = np.logical_and(
+            valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
+
+        return np.where(valid)[0]
diff --git a/mmyolo/datasets/utils.py b/mmyolo/datasets/utils.py
index 1d84d39b7..0cca341b4 100644
--- a/mmyolo/datasets/utils.py
+++ b/mmyolo/datasets/utils.py
@@ -9,8 +9,14 @@
 
 
 @COLLATE_FUNCTIONS.register_module()
-def yolov5_collate(data_batch: Sequence) -> dict:
-    """Rewrite collate_fn to get faster training speed."""
+def yolov5_collate(data_batch: Sequence,
+                   use_ms_training: bool = False) -> dict:
+    """Rewrite collate_fn to get faster training speed.
+
+    Args:
+       data_batch (Sequence): Batch of data.
+       use_ms_training (bool): Whether to use multi-scale training.
+    """
     batch_imgs = []
     batch_bboxes_labels = []
     for i in range(len(data_batch)):
@@ -25,10 +31,16 @@ def yolov5_collate(data_batch: Sequence) -> dict:
         batch_bboxes_labels.append(bboxes_labels)
 
         batch_imgs.append(inputs)
-    return {
-        'inputs': torch.stack(batch_imgs, 0),
-        'data_samples': torch.cat(batch_bboxes_labels, 0)
-    }
+    if use_ms_training:
+        return {
+            'inputs': batch_imgs,
+            'data_samples': torch.cat(batch_bboxes_labels, 0)
+        }
+    else:
+        return {
+            'inputs': torch.stack(batch_imgs, 0),
+            'data_samples': torch.cat(batch_bboxes_labels, 0)
+        }
 
 
 @TASK_UTILS.register_module()
diff --git a/mmyolo/datasets/yolov5_coco.py b/mmyolo/datasets/yolov5_coco.py
index 55bc899ab..0c59ce4e9 100644
--- a/mmyolo/datasets/yolov5_coco.py
+++ b/mmyolo/datasets/yolov5_coco.py
@@ -1,5 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Any, Optional
+from typing import Any, List, Optional
 
 from mmdet.datasets import BaseDetDataset, CocoDataset
 
@@ -63,3 +63,46 @@ class YOLOv5CocoDataset(BatchShapePolicyDataset, CocoDataset):
     `mmyolo/datasets/utils.py#BatchShapePolicy` for details
     """
     pass
+
+
+@DATASETS.register_module()
+class PPYOLOECocoDataset(YOLOv5CocoDataset):
+
+    def filter_data(self) -> List[dict]:
+        """Filter annotations according to filter_cfg.
+
+        Returns:
+            List[dict]: Filtered results.
+        """
+
+        pass
+        # 过滤宽或高小于1e-5的
+        eps = 1e-5
+        for i in self.data_list:
+            instances = i['instances']
+            filter_instances = []
+            for instance in instances:
+                bbox = instance['bbox']
+                x1, y1, x2, y2 = bbox
+                if (x2 - x1 > eps) and (y2 - y1 > eps):
+                    filter_instances.append(instance)
+                else:
+                    print('filter', x1, y1, x2, y2)
+            i['instances'] = filter_instances
+
+        filter_data_list = []
+        # 过滤没有gt的图
+        for i in self.data_list:
+            instances = i['instances']
+            ignore_flag_list = [k['ignore_flag'] for k in instances]
+            # 没有gtbbox的图过滤掉
+            if len(instances) == 0:
+                print('filter no gt img', i['img_id'], self.test_mode)
+                continue
+            # 如果一个图里的gt_bbox都ignore，也过滤
+            if sum(ignore_flag_list) == len(instances):
+                print('filter all bboxes are ignore img', i['img_id'])
+                continue
+            filter_data_list.append(i)
+
+        return filter_data_list
diff --git a/mmyolo/engine/hooks/__init__.py b/mmyolo/engine/hooks/__init__.py
index 466fa511f..0b8deebc8 100644
--- a/mmyolo/engine/hooks/__init__.py
+++ b/mmyolo/engine/hooks/__init__.py
@@ -1,8 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from .ppyoloe_param_scheduler_hook import PPYOLOEParamSchedulerHook
 from .switch_to_deploy_hook import SwitchToDeployHook
 from .yolov5_param_scheduler_hook import YOLOv5ParamSchedulerHook
 from .yolox_mode_switch_hook import YOLOXModeSwitchHook
 
 __all__ = [
-    'YOLOv5ParamSchedulerHook', 'YOLOXModeSwitchHook', 'SwitchToDeployHook'
+    'YOLOv5ParamSchedulerHook', 'YOLOXModeSwitchHook', 'SwitchToDeployHook',
+    'PPYOLOEParamSchedulerHook'
 ]
diff --git a/mmyolo/engine/hooks/ppyoloe_param_scheduler_hook.py b/mmyolo/engine/hooks/ppyoloe_param_scheduler_hook.py
new file mode 100644
index 000000000..26dfe6ef2
--- /dev/null
+++ b/mmyolo/engine/hooks/ppyoloe_param_scheduler_hook.py
@@ -0,0 +1,96 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+from typing import Optional
+
+from mmengine.hooks import ParamSchedulerHook
+from mmengine.runner import Runner
+
+from mmyolo.registry import HOOKS
+
+
+@HOOKS.register_module()
+class PPYOLOEParamSchedulerHook(ParamSchedulerHook):
+    """A hook to update learning rate and momentum in optimizer of PPYOLOE. We
+    use this hook to implement adaptive computation for `warmup_total_iters`,
+    which is not possible with the built-in ParamScheduler in mmyolo.
+
+    Args:
+        warmup_min_iter (int): Minimum warmup iters. Defaults to 1000.
+        start_factor (float): The number we multiply learning rate in the
+            first epoch. The multiplication factor changes towards end_factor
+            in the following epochs. Defaults to 0.
+        warmup_epochs (int): Epochs for warmup. Defaults to 5.
+        min_lr_ratio (float): Minimum learning rate ratio.
+        total_epochs (int): In PPYOLOE, `total_epochs` is set to
+            training_epochs x 1.2. Defaults to 360.
+    """
+    priority = 9
+
+    def __init__(self,
+                 warmup_min_iter: int = 1000,
+                 start_factor: float = 0.,
+                 warmup_epochs: int = 5,
+                 min_lr_ratio: float = 0.0,
+                 total_epochs: int = 360):
+
+        self.warmup_min_iter = warmup_min_iter
+        self.start_factor = start_factor
+        self.warmup_epochs = warmup_epochs
+        self.min_lr_ratio = min_lr_ratio
+        self.total_epochs = total_epochs
+
+        self._warmup_end = False
+        self._base_lr = None
+
+    def before_train(self, runner: Runner):
+        """Operations before train.
+
+        Args:
+            runner (Runner): The runner of the training process.
+        """
+        optimizer = runner.optim_wrapper.optimizer
+        for group in optimizer.param_groups:
+            # If the param is never be scheduled, record the current value
+            # as the initial value.
+            group.setdefault('initial_lr', group['lr'])
+
+        self._base_lr = [
+            group['initial_lr'] for group in optimizer.param_groups
+        ]
+        self._min_lr = [i * self.min_lr_ratio for i in self._base_lr]
+
+    def before_train_iter(self,
+                          runner: Runner,
+                          batch_idx: int,
+                          data_batch: Optional[dict] = None):
+        """Operations before each training iteration.
+
+        Args:
+            runner (Runner): The runner of the training process.
+            batch_idx (int): The index of the current batch in the train loop.
+            data_batch (dict or tuple or list, optional): Data from dataloader.
+        """
+        cur_iters = runner.iter
+        optimizer = runner.optim_wrapper.optimizer
+        dataloader_len = len(runner.train_dataloader)
+
+        # The minimum warmup is self.warmup_min_iter
+        warmup_total_iters = max(
+            round(self.warmup_epochs * dataloader_len), self.warmup_min_iter)
+
+        if cur_iters <= warmup_total_iters:
+            # warm up
+            alpha = cur_iters / warmup_total_iters
+            factor = self.start_factor * (1 - alpha) + alpha
+
+            for group_idx, param in enumerate(optimizer.param_groups):
+                param['lr'] = self._base_lr[group_idx] * factor
+        else:
+            for group_idx, param in enumerate(optimizer.param_groups):
+                total_iters = self.total_epochs * dataloader_len
+                lr = self._min_lr[group_idx] + (
+                    self._base_lr[group_idx] -
+                    self._min_lr[group_idx]) * 0.5 * (
+                        math.cos((cur_iters - warmup_total_iters) * math.pi /
+                                 (total_iters - warmup_total_iters)) + 1.0)
+                param['lr'] = lr
diff --git a/mmyolo/models/data_preprocessors/__init__.py b/mmyolo/models/data_preprocessors/__init__.py
index d9edbfd24..1d11bfc6c 100644
--- a/mmyolo/models/data_preprocessors/__init__.py
+++ b/mmyolo/models/data_preprocessors/__init__.py
@@ -1,4 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .data_preprocessor import YOLOv5DetDataPreprocessor
+from .data_preprocessor import (PPYOLOEBatchRandomResize,
+                                PPYOLOEBatchSyncRandomResizeallopencv,
+                                PPYOLOEDetDataPreprocessor,
+                                YOLOv5DetDataPreprocessor)
 
-__all__ = ['YOLOv5DetDataPreprocessor']
+__all__ = [
+    'YOLOv5DetDataPreprocessor', 'PPYOLOEDetDataPreprocessor',
+    'PPYOLOEBatchRandomResize', 'PPYOLOEBatchSyncRandomResizeallopencv'
+]
diff --git a/mmyolo/models/data_preprocessors/data_preprocessor.py b/mmyolo/models/data_preprocessors/data_preprocessor.py
index 04a628212..3696a7ce1 100644
--- a/mmyolo/models/data_preprocessors/data_preprocessor.py
+++ b/mmyolo/models/data_preprocessors/data_preprocessor.py
@@ -1,6 +1,14 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import random
+from typing import List, Tuple, Union
+
+import cv2
 import torch
+import torch.nn.functional as F
+from mmdet.models import BatchSyncRandomResize
 from mmdet.models.data_preprocessors import DetDataPreprocessor
+from mmengine import MessageHub, is_list_of
+from torch import Tensor
 
 from mmyolo.registry import MODELS
 
@@ -50,3 +58,315 @@ def forward(self, data: dict, training: bool = False) -> dict:
         data_samples = {'bboxes_labels': data_samples, 'img_metas': img_metas}
 
         return {'inputs': inputs, 'data_samples': data_samples}
+
+
+@MODELS.register_module()
+class PPYOLOEDetDataPreprocessor(DetDataPreprocessor):
+    """Image pre-processor for detection tasks.
+
+    The main difference between PPYOLOEDetDataPreprocessor and
+    DetDataPreprocessor is the normalization order. The official
+    PPYOLOE resize image first, and then normalize image.
+    In DetDataPreprocessor, the order is reversed.
+
+    Note: It must be used together with
+    `mmyolo.datasets.utils.yolov5_collate`
+    """
+
+    def forward(self, data: dict, training: bool = False) -> dict:
+        """Perform normalization、padding and bgr2rgb conversion based on
+        ``BaseDataPreprocessor``. This class use batch_augments first, and then
+        normalize the image, which is different from the `DetDataPreprocessor`
+        .
+
+        Args:
+            data (dict): Data sampled from dataloader.
+            training (bool): Whether to enable training time augmentation.
+
+        Returns:
+            dict: Data in the same format as the model input.
+        """
+        if not training:
+            return super().forward(data, training)
+
+        assert isinstance(data['inputs'], list) and is_list_of(
+            data['inputs'], torch.Tensor), \
+            '"inputs" should be a list of Tensor, but got ' \
+            f'{type(data["inputs"])}. The possible reason for this ' \
+            'is that you are not using it with ' \
+            '"mmyolo.datasets.utils.yolov5_collate". Please refer to ' \
+            '"cconfigs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py".'
+
+        data = self.cast_data(data)
+        inputs, data_samples = data['inputs'], data['data_samples']
+
+        # Process data.
+        batch_inputs = []
+        for _batch_input, data_sample in zip(inputs, data_samples):
+            # channel transform
+            if self._channel_conversion:
+                _batch_input = _batch_input[[2, 1, 0], ...]
+            # Convert to float after channel conversion to ensure
+            # efficiency
+            _batch_input = _batch_input.float()
+
+            batch_inputs.append(_batch_input)
+
+        # Batch random resize image.
+        if self.batch_augments is not None:
+            for batch_aug in self.batch_augments:
+                inputs, data_samples = batch_aug(batch_inputs, data_samples)
+
+        if self._enable_normalize:
+            inputs = (inputs - self.mean) / self.std
+
+        img_metas = [{'batch_input_shape': inputs.shape[2:]}] * len(inputs)
+        data_samples = {'bboxes_labels': data_samples, 'img_metas': img_metas}
+
+        return {'inputs': inputs, 'data_samples': data_samples}
+
+
+# TODO: No generality. Its input data format is different
+#  mmdet's batch aug, and it must be compatible in the future.
+@MODELS.register_module()
+class PPYOLOEBatchRandomResize(BatchSyncRandomResize):
+    """PPYOLOE batch random resize.
+
+    Args:
+        random_size_range (tuple): The multi-scale random range during
+            multi-scale training.
+        interval (int): The iter interval of change
+            image size. Defaults to 10.
+        size_divisor (int): Image size divisible factor.
+            Defaults to 32.
+        random_interp (bool): Whether to choose interp_mode randomly.
+            If set to True, the type of `interp_mode` must be list.
+            If set to False, the type of `interp_mode` must be str.
+            Defaults to True.
+        interp_mode (Union[List, str]): The modes available for resizing
+            are ('nearest', 'bilinear', 'bicubic', 'area').
+        keep_ratio (bool): Whether to keep the aspect ratio when resizing
+            the image. Now we only support keep_ratio=False.
+            Defaults to False.
+    """
+
+    def __init__(self,
+                 random_size_range: Tuple[int, int],
+                 interval: int = 1,
+                 size_divisor: int = 32,
+                 random_interp=True,
+                 interp_mode: Union[List[str], str] = [
+                     'nearest', 'bilinear', 'bicubic', 'area'
+                 ],
+                 keep_ratio: bool = False) -> None:
+        super().__init__(random_size_range, interval, size_divisor)
+        self.random_interp = random_interp
+        self.keep_ratio = keep_ratio
+        # TODO: need to support keep_ratio==True
+        assert not self.keep_ratio, 'We do not yet support keep_ratio=True'
+
+        if self.random_interp:
+            assert isinstance(interp_mode, list) and len(interp_mode) > 1,\
+                'While random_interp==True, the type of `interp_mode`' \
+                ' must be list and len(interp_mode) must large than 1'
+            self.interp_mode_list = interp_mode
+            self.interp_mode = None
+        else:
+            assert isinstance(interp_mode, str),\
+                'While random_interp==False, the type of ' \
+                '`interp_mode` must be str'
+            assert interp_mode in ['nearest', 'bilinear', 'bicubic', 'area']
+            self.interp_mode_list = None
+            self.interp_mode = interp_mode
+
+    def forward(self, inputs: list,
+                data_samples: Tensor) -> Tuple[Tensor, Tensor]:
+        """Resize a batch of images and bboxes to shape ``self._input_size``.
+
+        The inputs and data_samples should be list, and
+        ``PPYOLOEBatchRandomResize`` must be used with
+        ``PPYOLOEDetDataPreprocessor`` and ``yolov5_collate`` with
+        ``use_ms_training == True``.
+        """
+        assert isinstance(inputs, list),\
+            'The type of inputs must be list. The possible reason for this ' \
+            'is that you are not using it with `PPYOLOEDetDataPreprocessor` ' \
+            'and `yolov5_collate` with use_ms_training == True.'
+        message_hub = MessageHub.get_current_instance()
+        if (message_hub.get_info('iter') + 1) % self._interval == 0:
+            # get current input size
+            self._input_size, interp_mode = self._get_random_size_and_interp()
+            if self.random_interp:
+                self.interp_mode = interp_mode
+
+        # TODO: need to support type(inputs)==Tensor
+        if isinstance(inputs, list):
+            outputs = []
+            for i in range(len(inputs)):
+                _batch_input = inputs[i]
+                h, w = _batch_input.shape[-2:]
+                scale_y = self._input_size[0] / h
+                scale_x = self._input_size[1] / w
+                if scale_x != 1. or scale_y != 1.:
+                    if self.interp_mode in ('nearest', 'area'):
+                        align_corners = None
+                    else:
+                        align_corners = False
+                    _batch_input = F.interpolate(
+                        _batch_input.unsqueeze(0),
+                        size=self._input_size,
+                        mode=self.interp_mode,
+                        align_corners=align_corners)
+
+                    # rescale boxes
+                    indexes = data_samples[:, 0] == i
+                    data_samples[indexes, 2] *= scale_x
+                    data_samples[indexes, 3] *= scale_y
+                    data_samples[indexes, 4] *= scale_x
+                    data_samples[indexes, 5] *= scale_y
+                else:
+                    _batch_input = _batch_input.unsqueeze(0)
+
+                outputs.append(_batch_input)
+
+            # convert to Tensor
+            return torch.cat(outputs, dim=0), data_samples
+        else:
+            raise NotImplementedError('Not implemented yet!')
+
+    def _get_random_size_and_interp(self) -> Tuple[int, int]:
+        """Randomly generate a shape in ``_random_size_range`` and a
+        interp_mode in interp_mode_list."""
+        size = random.randint(*self._random_size_range)
+        input_size = (self._size_divisor * size, self._size_divisor * size)
+
+        if self.random_interp:
+            interp_ind = random.randint(0, len(self.interp_mode_list) - 1)
+            interp_mode = self.interp_mode_list[interp_ind]
+        else:
+            interp_mode = None
+        return input_size, interp_mode
+
+
+@MODELS.register_module()
+class PPYOLOEBatchSyncRandomResizeallopencv(BatchSyncRandomResize):
+    """PPYOLOE batch random resize which synchronizes the random size across
+    ranks.
+
+    Args:
+        random_size_range (tuple): The multi-scale random range during
+            multi-scale training.
+        interval (int): The iter interval of change
+            image size. Defaults to 10.
+        size_divisor (int): Image size divisible factor.
+            Defaults to 32.
+        random_interp (bool): Whether to choose interp_mode randomly.
+            If set to True, the type of `interp_mode` must be list.
+            If set to False, the type of `interp_mode` must be str.
+            Defaults to True.
+        interp_mode (Union[List, str]): The modes available for resizing
+            are ('nearest', 'bilinear', 'bicubic', 'area').
+        keep_ratio (bool): Whether to keep the aspect ratio when resizing
+            the image. Now we only support keep_ratio=False.
+            Defaults to False.
+        broadcast_flag (bool): Whether to use same image size between
+            gpus while resize image.
+            Defaults to True.
+    """
+
+    def __init__(self,
+                 random_size_range: Tuple[int, int],
+                 interval: int = 1,
+                 size_divisor: int = 32,
+                 random_interp=True,
+                 interp_mode: Union[List[int], int] = [
+                     'nearest', 'bilinear', 'bicubic', 'area', 'lanczos4'
+                 ],
+                 keep_ratio: bool = False) -> None:
+        super().__init__(random_size_range, interval, size_divisor)
+        self.random_interp = random_interp
+        self.keep_ratio = keep_ratio
+        # TODO: need to support keep_ratio==True
+        assert not self.keep_ratio, 'We do not yet support keep_ratio=True'
+
+        if self.random_interp:
+            assert isinstance(interp_mode, list) and len(interp_mode) > 1,\
+                'While random_interp==True, the type of `interp_mode`' \
+                ' must be list and len(interp_mode) must large than 1'
+            self.interp_mode_list = interp_mode
+            self.interp_mode = None
+        else:
+            assert isinstance(interp_mode, str),\
+                'While random_interp==False, the type of ' \
+                '`interp_mode` must be str'
+            assert interp_mode in ['nearest', 'bilinear', 'bicubic', 'area']
+            self.interp_mode_list = None
+            self.interp_mode = interp_mode
+
+        self.interp_dict = {
+            'nearest': cv2.INTER_NEAREST,
+            'bilinear': cv2.INTER_LINEAR,
+            'bicubic': cv2.INTER_CUBIC,
+            'area': cv2.INTER_AREA,
+            'lanczos4': cv2.INTER_LANCZOS4
+        }
+
+    def forward(self, inputs, data_samples):
+        assert isinstance(inputs, list)
+        message_hub = MessageHub.get_current_instance()
+        if (message_hub.get_info('iter') + 1) % self._interval == 0:
+            # get current input size
+            self._input_size, interp_mode = self._get_random_size_and_interp()
+            if self.random_interp:
+                self.interp_mode = interp_mode
+
+        # TODO: need to support type(inputs)==Tensor
+        if isinstance(inputs, list):
+            outputs = []
+            for i in range(len(inputs)):
+                _batch_input = inputs[i]
+                h, w = _batch_input.shape[-2:]
+                scale_y = self._input_size[0] / h
+                scale_x = self._input_size[1] / w
+                if scale_x != 1. or scale_y != 1.:
+                    # if self.interp_mode in ('area', 'lanczos4'):
+                    # print('interp', self.interp_mode)
+                    device = _batch_input.device
+                    input_numpy = _batch_input.cpu().numpy().transpose(
+                        (1, 2, 0))
+                    input_numpy = cv2.resize(
+                        input_numpy,
+                        (self._input_size[0], self._input_size[1]),
+                        interpolation=self.interp_dict[self.interp_mode])
+                    _batch_input = input_numpy.transpose((2, 0, 1))
+                    _batch_input = torch.from_numpy(_batch_input).to(
+                        device).unsqueeze(0)
+
+                    # rescale boxes
+                    indexes = data_samples[:, 0] == i
+                    data_samples[indexes, 2] *= scale_x
+                    data_samples[indexes, 3] *= scale_y
+                    data_samples[indexes, 4] *= scale_x
+                    data_samples[indexes, 5] *= scale_y
+                else:
+                    _batch_input = _batch_input.unsqueeze(0)
+
+                outputs.append(_batch_input)
+
+            # convert to Tensor
+            return torch.cat(outputs, dim=0), data_samples
+        else:
+            raise NotImplementedError('Not implemented yet!')
+
+    def _get_random_size_and_interp(self) -> Tuple[int, int]:
+        """Randomly generate a shape in ``_random_size_range`` and a
+        interp_mode in interp_mode_list."""
+        size = random.randint(*self._random_size_range)
+        input_size = (self._size_divisor * size, self._size_divisor * size)
+
+        if self.random_interp:
+            interp_ind = random.randint(0, len(self.interp_mode_list) - 1)
+            interp_mode = self.interp_mode_list[interp_ind]
+        else:
+            interp_mode = None
+        return input_size, interp_mode
diff --git a/mmyolo/models/dense_heads/ppyoloe_head.py b/mmyolo/models/dense_heads/ppyoloe_head.py
index f643a1d58..bd246c4d0 100644
--- a/mmyolo/models/dense_heads/ppyoloe_head.py
+++ b/mmyolo/models/dense_heads/ppyoloe_head.py
@@ -1,24 +1,27 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Sequence, Union
+from typing import Sequence, Tuple, Union
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from mmdet.models.utils import multi_apply
 from mmdet.utils import (ConfigType, OptConfigType, OptInstanceList,
-                         OptMultiConfig)
+                         OptMultiConfig, reduce_mean)
+from mmengine import MessageHub
 from mmengine.model import BaseModule, bias_init_with_prob
 from mmengine.structures import InstanceData
 from torch import Tensor
 
 from mmyolo.registry import MODELS
 from ..layers.yolo_bricks import PPYOLOESELayer
-from .yolov5_head import YOLOv5Head
+from .yolov6_head import YOLOv6Head
 
 
 @MODELS.register_module()
 class PPYOLOEHeadModule(BaseModule):
-    """PPYOLOEHead head module used in `PPYOLOE`
+    """PPYOLOEHead head module used in `PPYOLOE.
+
+    <https://arxiv.org/abs/2203.16250>`_.
 
     Args:
         num_classes (int): Number of categories excluding the background
@@ -30,7 +33,8 @@ class PPYOLOEHeadModule(BaseModule):
             on the feature grid.
         featmap_strides (Sequence[int]): Downsample factor of each feature map.
              Defaults to (8, 16, 32).
-        reg_max (int): TOOD reg_max param.
+        reg_max (int): Max value of integral set :math: ``{0, ..., reg_max}``
+            in QFL setting. Defaults to 16.
         norm_cfg (dict): Config dict for normalization layer.
             Defaults to dict(type='BN', momentum=0.03, eps=0.001).
         act_cfg (dict): Config dict for activation layer.
@@ -100,15 +104,12 @@ def _init_layers(self):
             self.reg_preds.append(
                 nn.Conv2d(in_channel, 4 * (self.reg_max + 1), 3, padding=1))
 
-        self.proj_conv = nn.Conv2d(self.reg_max + 1, 1, 1, bias=False)
-        self.proj = nn.Parameter(
-            torch.linspace(0, self.reg_max, self.reg_max + 1),
-            requires_grad=False)
-        self.proj_conv.weight = nn.Parameter(
-            self.proj.view([1, self.reg_max + 1, 1, 1]).clone().detach(),
-            requires_grad=False)
+        # init proj
+        proj = torch.linspace(0, self.reg_max, self.reg_max + 1).view(
+            [1, self.reg_max + 1, 1, 1])
+        self.register_buffer('proj', proj, persistent=False)
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: Tuple[Tensor]) -> Tensor:
         """Forward features from the upstream network.
 
         Args:
@@ -131,17 +132,24 @@ def forward_single(self, x: Tensor, cls_stem: nn.ModuleList,
         hw = h * w
         avg_feat = F.adaptive_avg_pool2d(x, (1, 1))
         cls_logit = cls_pred(cls_stem(x, avg_feat) + x)
-        reg_dist = reg_pred(reg_stem(x, avg_feat))
-        reg_dist = reg_dist.reshape([-1, 4, self.reg_max + 1,
-                                     hw]).permute(0, 2, 3, 1)
-        reg_dist = self.proj_conv(F.softmax(reg_dist, dim=1))
+        bbox_dist_preds = reg_pred(reg_stem(x, avg_feat))
+        # TODO: Test whether use matmul instead of conv can speed up training.
+        bbox_dist_preds = bbox_dist_preds.reshape(
+            [-1, 4, self.reg_max + 1, hw]).permute(0, 2, 3, 1)
+
+        bbox_preds = F.conv2d(F.softmax(bbox_dist_preds, dim=1), self.proj)
 
-        return cls_logit, reg_dist
+        if self.training:
+            return cls_logit, bbox_preds, bbox_dist_preds
+        else:
+            return cls_logit, bbox_preds
 
 
 @MODELS.register_module()
-class PPYOLOEHead(YOLOv5Head):
-    """PPYOLOEHead head used in `PPYOLOE`.
+class PPYOLOEHead(YOLOv6Head):
+    """PPYOLOEHead head used in `PPYOLOE <https://arxiv.org/abs/2203.16250>`_.
+    The YOLOv6 head and the PPYOLOE head are only slightly different.
+    Distribution focal loss is extra used in PPYOLOE, but not in YOLOv6.
 
     Args:
         head_module(ConfigType): Base module used for YOLOv5Head
@@ -150,7 +158,8 @@ class PPYOLOEHead(YOLOv5Head):
         bbox_coder (:obj:`ConfigDict` or dict): Config of bbox coder.
         loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
         loss_bbox (:obj:`ConfigDict` or dict): Config of localization loss.
-        loss_obj (:obj:`ConfigDict` or dict): Config of objectness loss.
+        loss_dfl (:obj:`ConfigDict` or dict): Config of distribution focal
+            loss.
         train_cfg (:obj:`ConfigDict` or dict, optional): Training config of
             anchor head. Defaults to None.
         test_cfg (:obj:`ConfigDict` or dict, optional): Testing config of
@@ -168,17 +177,24 @@ def __init__(self,
                      strides=[8, 16, 32]),
                  bbox_coder: ConfigType = dict(type='DistancePointBBoxCoder'),
                  loss_cls: ConfigType = dict(
-                     type='mmdet.CrossEntropyLoss',
+                     type='mmdet.VarifocalLoss',
                      use_sigmoid=True,
+                     alpha=0.75,
+                     gamma=2.0,
+                     iou_weighted=True,
                      reduction='sum',
                      loss_weight=1.0),
                  loss_bbox: ConfigType = dict(
-                     type='mmdet.GIoULoss', reduction='sum', loss_weight=5.0),
-                 loss_obj: ConfigType = dict(
-                     type='mmdet.CrossEntropyLoss',
-                     use_sigmoid=True,
-                     reduction='sum',
-                     loss_weight=1.0),
+                     type='IoULoss',
+                     iou_mode='giou',
+                     bbox_format='xyxy',
+                     reduction='mean',
+                     loss_weight=2.5,
+                     return_iou=False),
+                 loss_dfl: ConfigType = dict(
+                     type='mmdet.DistributionFocalLoss',
+                     reduction='mean',
+                     loss_weight=0.5 / 4),
                  train_cfg: OptConfigType = None,
                  test_cfg: OptConfigType = None,
                  init_cfg: OptMultiConfig = None):
@@ -188,19 +204,18 @@ def __init__(self,
             bbox_coder=bbox_coder,
             loss_cls=loss_cls,
             loss_bbox=loss_bbox,
-            loss_obj=loss_obj,
             train_cfg=train_cfg,
             test_cfg=test_cfg,
             init_cfg=init_cfg)
-
-    def special_init(self):
-        """Not Implenented."""
-        pass
+        self.loss_dfl = MODELS.build(loss_dfl)
+        # ppyoloe doesn't need loss_obj
+        self.loss_obj = None
 
     def loss_by_feat(
             self,
             cls_scores: Sequence[Tensor],
             bbox_preds: Sequence[Tensor],
+            bbox_dist_preds: Sequence[Tensor],
             batch_gt_instances: Sequence[InstanceData],
             batch_img_metas: Sequence[dict],
             batch_gt_instances_ignore: OptInstanceList = None) -> dict:
@@ -214,6 +229,8 @@ def loss_by_feat(
             bbox_preds (Sequence[Tensor]): Box energies / deltas for each scale
                 level, each is a 4D-tensor, the channel number is
                 num_priors * 4.
+            bbox_dist_preds (Sequence[Tensor]): Box distribution logits for
+                each scale level with shape (bs, reg_max + 1, H*W, 4).
             batch_gt_instances (list[:obj:`InstanceData`]): Batch of
                 gt_instance. It usually includes ``bboxes`` and ``labels``
                 attributes.
@@ -226,4 +243,131 @@ def loss_by_feat(
         Returns:
             dict[str, Tensor]: A dictionary of losses.
         """
-        raise NotImplementedError('Not implemented yet！')
+
+        # get epoch information from message hub
+        message_hub = MessageHub.get_current_instance()
+        current_epoch = message_hub.get_info('epoch')
+
+        num_imgs = len(batch_img_metas)
+
+        current_featmap_sizes = [
+            cls_score.shape[2:] for cls_score in cls_scores
+        ]
+        # If the shape does not equal, generate new one
+        if current_featmap_sizes != self.featmap_sizes_train:
+            self.featmap_sizes_train = current_featmap_sizes
+
+            mlvl_priors_with_stride = self.prior_generator.grid_priors(
+                self.featmap_sizes_train,
+                dtype=cls_scores[0].dtype,
+                device=cls_scores[0].device,
+                with_stride=True)
+
+            self.num_level_priors = [len(n) for n in mlvl_priors_with_stride]
+            self.flatten_priors_train = torch.cat(
+                mlvl_priors_with_stride, dim=0)
+            self.stride_tensor = self.flatten_priors_train[..., [2]]
+
+        # gt info
+        gt_info = self.gt_instances_preprocess(batch_gt_instances, num_imgs)
+        gt_labels = gt_info[:, :, :1]
+        gt_bboxes = gt_info[:, :, 1:]  # xyxy
+        pad_bbox_flag = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
+
+        # pred info
+        flatten_cls_preds = [
+            cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1,
+                                                 self.num_classes)
+            for cls_pred in cls_scores
+        ]
+        flatten_pred_bboxes = [
+            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)
+            for bbox_pred in bbox_preds
+        ]
+        # (bs, reg_max+1, n, 4) -> (bs, n, 4, reg_max+1)
+        flatten_pred_dists = [
+            bbox_pred_org.permute(0, 2, 3, 1).reshape(
+                num_imgs, -1, (self.head_module.reg_max + 1) * 4)
+            for bbox_pred_org in bbox_dist_preds
+        ]
+
+        flatten_dist_preds = torch.cat(flatten_pred_dists, dim=1)
+        flatten_cls_preds = torch.cat(flatten_cls_preds, dim=1)
+        flatten_pred_bboxes = torch.cat(flatten_pred_bboxes, dim=1)
+        flatten_pred_bboxes = self.bbox_coder.decode(
+            self.flatten_priors_train[..., :2], flatten_pred_bboxes,
+            self.stride_tensor[..., 0])
+        pred_scores = torch.sigmoid(flatten_cls_preds)
+
+        if current_epoch < self.initial_epoch:
+            assigned_result = self.initial_assigner(
+                flatten_pred_bboxes.detach(), self.flatten_priors_train,
+                self.num_level_priors, gt_labels, gt_bboxes, pad_bbox_flag)
+        else:
+            assigned_result = self.assigner(flatten_pred_bboxes.detach(),
+                                            pred_scores.detach(),
+                                            self.flatten_priors_train,
+                                            gt_labels, gt_bboxes,
+                                            pad_bbox_flag)
+
+        assigned_bboxes = assigned_result['assigned_bboxes']
+        assigned_scores = assigned_result['assigned_scores']
+        fg_mask_pre_prior = assigned_result['fg_mask_pre_prior']
+
+        # cls loss
+        with torch.cuda.amp.autocast(enabled=False):
+            loss_cls = self.loss_cls(flatten_cls_preds, assigned_scores)
+
+        # rescale bbox
+        assigned_bboxes /= self.stride_tensor
+        flatten_pred_bboxes /= self.stride_tensor
+
+        assigned_scores_sum = assigned_scores.sum()
+        # reduce_mean between all gpus
+        assigned_scores_sum = torch.clamp(
+            reduce_mean(assigned_scores_sum), min=1)
+        loss_cls /= assigned_scores_sum
+
+        # select positive samples mask
+        num_pos = fg_mask_pre_prior.sum()
+        if num_pos > 0:
+            # when num_pos > 0, assigned_scores_sum will >0, so the loss_bbox
+            # will not report an error
+            # iou loss
+            prior_bbox_mask = fg_mask_pre_prior.unsqueeze(-1).repeat([1, 1, 4])
+            pred_bboxes_pos = torch.masked_select(
+                flatten_pred_bboxes, prior_bbox_mask).reshape([-1, 4])
+            assigned_bboxes_pos = torch.masked_select(
+                assigned_bboxes, prior_bbox_mask).reshape([-1, 4])
+            bbox_weight = torch.masked_select(
+                assigned_scores.sum(-1), fg_mask_pre_prior).unsqueeze(-1)
+            loss_bbox = self.loss_bbox(
+                pred_bboxes_pos,
+                assigned_bboxes_pos,
+                weight=bbox_weight,
+                avg_factor=assigned_scores_sum)
+
+            # dfl loss
+            dist_mask = fg_mask_pre_prior.unsqueeze(-1).repeat(
+                [1, 1, (self.head_module.reg_max + 1) * 4])
+
+            pred_dist_pos = torch.masked_select(
+                flatten_dist_preds,
+                dist_mask).reshape([-1, 4, self.head_module.reg_max + 1])
+            assigned_ltrb = self.bbox_coder.encode(
+                self.flatten_priors_train[..., :2] / self.stride_tensor,
+                assigned_bboxes,
+                max_dis=self.head_module.reg_max,
+                eps=0.01)
+            assigned_ltrb_pos = torch.masked_select(
+                assigned_ltrb, prior_bbox_mask).reshape([-1, 4])
+            loss_dfl = self.loss_dfl(
+                pred_dist_pos.reshape(-1, self.head_module.reg_max + 1),
+                assigned_ltrb_pos.reshape(-1),
+                weight=bbox_weight.expand(-1, 4).reshape(-1),
+                avg_factor=assigned_scores_sum)
+        else:
+            loss_bbox = flatten_pred_bboxes.sum() * 0
+            loss_dfl = flatten_pred_bboxes.sum() * 0
+
+        return dict(loss_cls=loss_cls, loss_bbox=loss_bbox, loss_dfl=loss_dfl)
diff --git a/mmyolo/models/dense_heads/rtmdet_head.py b/mmyolo/models/dense_heads/rtmdet_head.py
index b88147c37..a6651bd09 100644
--- a/mmyolo/models/dense_heads/rtmdet_head.py
+++ b/mmyolo/models/dense_heads/rtmdet_head.py
@@ -260,6 +260,9 @@ def special_init(self):
             else:
                 self.sampler = PseudoSampler(context=self)
 
+            self.featmap_sizes_train = None
+            self.flatten_priors_train = None
+
     def forward(self, x: Tuple[Tensor]) -> Tuple[List]:
         """Forward features from the upstream network.
 
@@ -312,12 +315,12 @@ def loss_by_feat(
         device = cls_scores[0].device
 
         # If the shape does not equal, generate new one
-        if featmap_sizes != self.featmap_sizes:
-            self.featmap_sizes = featmap_sizes
-            mlvl_priors = self.prior_generator.grid_priors(
+        if featmap_sizes != self.featmap_sizes_train:
+            self.featmap_sizes_train = featmap_sizes
+            mlvl_priors_with_stride = self.prior_generator.grid_priors(
                 featmap_sizes, device=device, with_stride=True)
-            self.flatten_priors = torch.cat(mlvl_priors, dim=0)
-            self.mlvl_priors = [mlvl[:, :2] for mlvl in mlvl_priors]
+            self.flatten_priors_train = torch.cat(
+                mlvl_priors_with_stride, dim=0)
 
         flatten_cls_scores = torch.cat([
             cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1,
@@ -329,13 +332,14 @@ def loss_by_feat(
             bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)
             for bbox_pred in bbox_preds
         ], 1)
-        flatten_bboxes = flatten_bboxes * self.flatten_priors[..., -1, None]
-        flatten_bboxes = distance2bbox(self.flatten_priors[..., :2],
+        flatten_bboxes = flatten_bboxes * self.flatten_priors_train[..., -1,
+                                                                    None]
+        flatten_bboxes = distance2bbox(self.flatten_priors_train[..., :2],
                                        flatten_bboxes)
 
         assigned_result = self.assigner(flatten_bboxes.detach(),
                                         flatten_cls_scores.detach(),
-                                        self.flatten_priors, gt_labels,
+                                        self.flatten_priors_train, gt_labels,
                                         gt_bboxes, pad_bbox_flag)
 
         labels = assigned_result['assigned_labels'].reshape(-1)
diff --git a/mmyolo/models/dense_heads/yolov6_head.py b/mmyolo/models/dense_heads/yolov6_head.py
index e85cd8284..60abf29d6 100644
--- a/mmyolo/models/dense_heads/yolov6_head.py
+++ b/mmyolo/models/dense_heads/yolov6_head.py
@@ -169,7 +169,6 @@ class YOLOv6Head(YOLOv5Head):
             in 2D points-based detectors.
         loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
         loss_bbox (:obj:`ConfigDict` or dict): Config of localization loss.
-        loss_obj (:obj:`ConfigDict` or dict): Config of objectness loss.
         train_cfg (:obj:`ConfigDict` or dict, optional): Training config of
             anchor head. Defaults to None.
         test_cfg (:obj:`ConfigDict` or dict, optional): Testing config of
@@ -201,11 +200,6 @@ def __init__(self,
                      reduction='mean',
                      loss_weight=2.5,
                      return_iou=False),
-                 loss_obj: ConfigType = dict(
-                     type='mmdet.CrossEntropyLoss',
-                     use_sigmoid=True,
-                     reduction='sum',
-                     loss_weight=1.0),
                  train_cfg: OptConfigType = None,
                  test_cfg: OptConfigType = None,
                  init_cfg: OptMultiConfig = None):
@@ -215,13 +209,11 @@ def __init__(self,
             bbox_coder=bbox_coder,
             loss_cls=loss_cls,
             loss_bbox=loss_bbox,
-            loss_obj=loss_obj,
             train_cfg=train_cfg,
             test_cfg=test_cfg,
             init_cfg=init_cfg)
-
-        self.loss_bbox = MODELS.build(loss_bbox)
-        self.loss_cls = MODELS.build(loss_cls)
+        # yolov6 doesn't need loss_obj
+        self.loss_obj = None
 
     def special_init(self):
         """Since YOLO series algorithms will inherit from YOLOv5Head, but
@@ -236,10 +228,9 @@ def special_init(self):
             self.assigner = TASK_UTILS.build(self.train_cfg.assigner)
 
             # Add common attributes to reduce calculation
-            self.featmap_sizes = None
-            self.mlvl_priors = None
+            self.featmap_sizes_train = None
             self.num_level_priors = None
-            self.flatten_priors = None
+            self.flatten_priors_train = None
             self.stride_tensor = None
 
     def loss_by_feat(
@@ -284,19 +275,19 @@ def loss_by_feat(
             cls_score.shape[2:] for cls_score in cls_scores
         ]
         # If the shape does not equal, generate new one
-        if current_featmap_sizes != self.featmap_sizes:
-            self.featmap_sizes = current_featmap_sizes
+        if current_featmap_sizes != self.featmap_sizes_train:
+            self.featmap_sizes_train = current_featmap_sizes
 
-            mlvl_priors = self.prior_generator.grid_priors(
-                self.featmap_sizes,
+            mlvl_priors_with_stride = self.prior_generator.grid_priors(
+                self.featmap_sizes_train,
                 dtype=cls_scores[0].dtype,
                 device=cls_scores[0].device,
                 with_stride=True)
 
-            self.num_level_priors = [len(n) for n in mlvl_priors]
-            self.flatten_priors = torch.cat(mlvl_priors, dim=0)
-            self.stride_tensor = self.flatten_priors[..., [2]]
-            self.mlvl_priors = [mlvl[:, :2] for mlvl in mlvl_priors]
+            self.num_level_priors = [len(n) for n in mlvl_priors_with_stride]
+            self.flatten_priors_train = torch.cat(
+                mlvl_priors_with_stride, dim=0)
+            self.stride_tensor = self.flatten_priors_train[..., [2]]
 
         # gt info
         gt_info = self.gt_instances_preprocess(batch_gt_instances, num_imgs)
@@ -319,19 +310,20 @@ def loss_by_feat(
         flatten_cls_preds = torch.cat(flatten_cls_preds, dim=1)
         flatten_pred_bboxes = torch.cat(flatten_pred_bboxes, dim=1)
         flatten_pred_bboxes = self.bbox_coder.decode(
-            self.flatten_priors[..., :2], flatten_pred_bboxes,
-            self.flatten_priors[..., 2])
+            self.flatten_priors_train[..., :2], flatten_pred_bboxes,
+            self.stride_tensor[:, 0])
         pred_scores = torch.sigmoid(flatten_cls_preds)
 
         if current_epoch < self.initial_epoch:
             assigned_result = self.initial_assigner(
-                flatten_pred_bboxes.detach(), self.flatten_priors,
+                flatten_pred_bboxes.detach(), self.flatten_priors_train,
                 self.num_level_priors, gt_labels, gt_bboxes, pad_bbox_flag)
         else:
             assigned_result = self.assigner(flatten_pred_bboxes.detach(),
                                             pred_scores.detach(),
-                                            self.flatten_priors, gt_labels,
-                                            gt_bboxes, pad_bbox_flag)
+                                            self.flatten_priors_train,
+                                            gt_labels, gt_bboxes,
+                                            pad_bbox_flag)
 
         assigned_bboxes = assigned_result['assigned_bboxes']
         assigned_scores = assigned_result['assigned_scores']
diff --git a/mmyolo/models/detectors/yolo_detector.py b/mmyolo/models/detectors/yolo_detector.py
index 9d38fb467..e6783fbab 100644
--- a/mmyolo/models/detectors/yolo_detector.py
+++ b/mmyolo/models/detectors/yolo_detector.py
@@ -17,9 +17,9 @@ class YOLODetector(SingleStageDetector):
         neck (:obj:`ConfigDict` or dict): The neck config.
         bbox_head (:obj:`ConfigDict` or dict): The bbox head config.
         train_cfg (:obj:`ConfigDict` or dict, optional): The training config
-            of YOLOX. Defaults to None.
+            of YOLO. Defaults to None.
         test_cfg (:obj:`ConfigDict` or dict, optional): The testing config
-            of YOLOX. Defaults to None.
+            of YOLO. Defaults to None.
         data_preprocessor (:obj:`ConfigDict` or dict, optional): Config of
             :class:`DetDataPreprocessor` to process the input data.
             Defaults to None.
diff --git a/mmyolo/models/layers/yolo_bricks.py b/mmyolo/models/layers/yolo_bricks.py
index f284acfa3..cd91bd6d0 100644
--- a/mmyolo/models/layers/yolo_bricks.py
+++ b/mmyolo/models/layers/yolo_bricks.py
@@ -140,7 +140,7 @@ class RepVGGBlock(nn.Module):
             In PPYOLOE+ model backbone, `use_alpha` will be set to True.
             Default: False.
         use_bn_first (bool): Whether to use bn layer before conv.
-            In YOLOv6 and YLOv7, this will be set to True.
+            In YOLOv6 and YOLOv7, this will be set to True.
             In PPYOLOE, this will be set to False.
             Default: True.
         deploy (bool): Whether in deploy mode. Default: False
diff --git a/mmyolo/models/necks/cspnext_pafpn.py b/mmyolo/models/necks/cspnext_pafpn.py
index de944db88..7736432e9 100644
--- a/mmyolo/models/necks/cspnext_pafpn.py
+++ b/mmyolo/models/necks/cspnext_pafpn.py
@@ -126,7 +126,7 @@ def build_top_down_layer(self, idx: int) -> nn.Module:
                 conv_cfg=self.conv_cfg,
                 norm_cfg=self.norm_cfg,
                 act_cfg=self.act_cfg)
-        elif idx == 2:
+        else:
             return nn.Sequential(
                 CSPLayer(
                     self.in_channels[idx - 1] * 2,
diff --git a/mmyolo/models/task_modules/assigners/batch_atss_assigner.py b/mmyolo/models/task_modules/assigners/batch_atss_assigner.py
index 5b2ed50ec..45b3069af 100644
--- a/mmyolo/models/task_modules/assigners/batch_atss_assigner.py
+++ b/mmyolo/models/task_modules/assigners/batch_atss_assigner.py
@@ -92,7 +92,7 @@ def forward(self, pred_bboxes: Tensor, priors: Tensor,
         Args:
             pred_bboxes (Tensor): Predicted bounding boxes,
                 shape(batch_size, num_priors, 4)
-            priors (Tensor): Model priors, shape(num_priors, 4)
+            priors (Tensor): Model priors with stride, shape(num_priors, 4)
             num_level_priors (List): Number of bboxes in each level, len(3)
             gt_labels (Tensor): Ground truth label,
                 shape(batch_size, num_gt, 1)
diff --git a/mmyolo/models/task_modules/assigners/batch_yolov7_assigner.py b/mmyolo/models/task_modules/assigners/batch_yolov7_assigner.py
index 2bfb05623..6709968ee 100644
--- a/mmyolo/models/task_modules/assigners/batch_yolov7_assigner.py
+++ b/mmyolo/models/task_modules/assigners/batch_yolov7_assigner.py
@@ -254,6 +254,9 @@ def simota_assigner(self, pred_results, batch_targets_normed,
             _mlvl_decoderd_bboxes = torch.cat(_mlvl_decoderd_bboxes, dim=0)
             num_pred_positive = _mlvl_decoderd_bboxes.shape[0]
 
+            if num_pred_positive == 0:
+                continue
+
             # scaled xywh
             batch_input_shape_wh = pred_results[0].new_tensor(
                 batch_input_shape[::-1]).repeat((1, 2))
diff --git a/mmyolo/models/task_modules/coders/distance_point_bbox_coder.py b/mmyolo/models/task_modules/coders/distance_point_bbox_coder.py
index f43890ecc..16417b8ab 100644
--- a/mmyolo/models/task_modules/coders/distance_point_bbox_coder.py
+++ b/mmyolo/models/task_modules/coders/distance_point_bbox_coder.py
@@ -4,7 +4,7 @@
 import torch
 from mmdet.models.task_modules.coders import \
     DistancePointBBoxCoder as MMDET_DistancePointBBoxCoder
-from mmdet.structures.bbox import distance2bbox
+from mmdet.structures.bbox import bbox2distance, distance2bbox
 
 from mmyolo.registry import TASK_UTILS
 
@@ -51,3 +51,29 @@ def decode(
         pred_bboxes = pred_bboxes * stride[None, :, None]
 
         return distance2bbox(points, pred_bboxes, max_shape)
+
+    def encode(self,
+               points: torch.Tensor,
+               gt_bboxes: torch.Tensor,
+               max_dis: float = 16.,
+               eps: float = 0.01) -> torch.Tensor:
+        """Encode bounding box to distances. The rewrite is to support batch
+        operations.
+
+        Args:
+            points (Tensor): Shape (B, N, 2) or (N, 2), The format is [x, y].
+            gt_bboxes (Tensor or :obj:`BaseBoxes`): Shape (N, 4), The format
+                is "xyxy"
+            max_dis (float): Upper bound of the distance. Default to 16..
+            eps (float): a small value to ensure target < max_dis, instead <=.
+                Default 0.01.
+
+        Returns:
+            Tensor: Box transformation deltas. The shape is (N, 4) or
+             (B, N, 4).
+        """
+
+        assert points.size(-2) == gt_bboxes.size(-2)
+        assert points.size(-1) == 2
+        assert gt_bboxes.size(-1) == 4
+        return bbox2distance(points, gt_bboxes, max_dis, eps)
diff --git a/mmyolo/version.py b/mmyolo/version.py
index f823adabf..f3c663b44 100644
--- a/mmyolo/version.py
+++ b/mmyolo/version.py
@@ -1,6 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 
-__version__ = '0.2.0'
+__version__ = '0.3.0'
 
 from typing import Tuple
 
diff --git a/model-index.yml b/model-index.yml
index de8794ca9..d804a9392 100644
--- a/model-index.yml
+++ b/model-index.yml
@@ -4,3 +4,4 @@ Import:
   - configs/yolox/metafile.yml
   - configs/rtmdet/metafile.yml
   - configs/yolov7/metafile.yml
+  - configs/ppyoloe/metafile.yml
diff --git a/projects/assigner_visualization/assigner_visualization.py b/projects/assigner_visualization/assigner_visualization.py
index df489c430..0086985fe 100644
--- a/projects/assigner_visualization/assigner_visualization.py
+++ b/projects/assigner_visualization/assigner_visualization.py
@@ -111,6 +111,7 @@ def main():
 
     # make output dir
     os.makedirs(args.output_dir, exist_ok=True)
+    print('Results will save to ', args.output_dir)
 
     # init visualization image number
     assert args.show_number > 0
diff --git a/projects/easydeploy/model/model.py b/projects/easydeploy/model/model.py
index 95f573ad7..0e3c9a559 100644
--- a/projects/easydeploy/model/model.py
+++ b/projects/easydeploy/model/model.py
@@ -9,7 +9,7 @@
 from torch import Tensor
 
 from mmyolo.models import RepVGGBlock
-from mmyolo.models.dense_heads import RTMDetHead, YOLOv5Head
+from mmyolo.models.dense_heads import RTMDetHead, YOLOv5Head, YOLOv7Head
 from ..backbone import DeployFocus, GConvFocus, NcnnFocus
 from ..bbox_code import rtmdet_bbox_decoder, yolov5_bbox_decoder
 from ..nms import batched_nms, efficient_nms, onnx_nms
@@ -68,7 +68,7 @@ def pred_by_feat(self,
         device = cls_scores[0].device
 
         nms_func = self.select_nms()
-        if self.detector_type is YOLOv5Head:
+        if self.detector_type in (YOLOv5Head, YOLOv7Head):
             bbox_decoder = yolov5_bbox_decoder
         elif self.detector_type is RTMDetHead:
             bbox_decoder = rtmdet_bbox_decoder
@@ -130,7 +130,7 @@ def select_nms(self):
             nms_func = batched_nms
         else:
             raise NotImplementedError
-        if type(self.baseHead) is YOLOv5Head:
+        if type(self.baseHead) in (YOLOv5Head, YOLOv7Head):
             nms_func = partial(nms_func, box_coding=1)
         return nms_func
 
diff --git a/projects/easydeploy/tools/export.py b/projects/easydeploy/tools/export.py
index 39d9fcfce..341d697ae 100644
--- a/projects/easydeploy/tools/export.py
+++ b/projects/easydeploy/tools/export.py
@@ -7,6 +7,7 @@
 import torch
 from mmdet.apis import init_detector
 from mmengine.config import ConfigDict
+from mmengine.utils.path import mkdir_or_exist
 
 from mmyolo.utils import register_all_modules
 from projects.easydeploy.model import DeployModel
@@ -77,8 +78,7 @@ def main():
     args = parse_args()
     register_all_modules()
 
-    if not os.path.exists(args.work_dir):
-        os.mkdir(args.work_dir)
+    mkdir_or_exist(args.work_dir)
 
     if args.model_only:
         postprocess_cfg = None
diff --git a/projects/easydeploy/tools/image-demo.py b/projects/easydeploy/tools/image-demo.py
index 2b1da95f1..d900d6556 100644
--- a/projects/easydeploy/tools/image-demo.py
+++ b/projects/easydeploy/tools/image-demo.py
@@ -1,5 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from projects.easydeploy.model import ORTWrapper, TRTWrapper  # isort:skip
 import os
 import random
 from argparse import ArgumentParser
@@ -10,12 +9,14 @@
 import torch
 from mmcv.transforms import Compose
 from mmdet.utils import get_test_pipeline_cfg
-from mmengine.config import Config
-from mmengine.utils import ProgressBar
+from mmengine.config import Config, ConfigDict
+from mmengine.utils import ProgressBar, path
 
 from mmyolo.utils import register_all_modules
 from mmyolo.utils.misc import get_file_list
 
+from projects.easydeploy.model import ORTWrapper, TRTWrapper  # isort:skip
+
 
 def parse_args():
     parser = ArgumentParser()
@@ -76,13 +77,13 @@ def main():
     cfg = Config.fromfile(args.config)
 
     test_pipeline = get_test_pipeline_cfg(cfg)
-    test_pipeline[0].type = 'mmdet.LoadImageFromNDArray'
+    test_pipeline[0] = ConfigDict({'type': 'mmdet.LoadImageFromNDArray'})
     test_pipeline = Compose(test_pipeline)
 
     pre_pipeline = preprocess(cfg)
 
-    if not os.path.exists(args.out_dir) and not args.show:
-        os.mkdir(args.out_dir)
+    if not args.show:
+        path.mkdir_or_exist(args.out_dir)
 
     # get file list
     files, source_type = get_file_list(args.img)
@@ -90,8 +91,6 @@ def main():
     # start detector inference
     progress_bar = ProgressBar(len(files))
     for i, file in enumerate(files):
-        # result = inference_detector(model, file)
-
         bgr = mmcv.imread(file)
         rgb = mmcv.imconvert(bgr, 'bgr', 'rgb')
         data, samples = test_pipeline(dict(img=rgb, img_id=i)).values()
diff --git a/projects/example_project/README.md b/projects/example_project/README.md
new file mode 100644
index 000000000..24c84d980
--- /dev/null
+++ b/projects/example_project/README.md
@@ -0,0 +1,141 @@
+# Dummy YOLOv5CSPDarknet Wrapper
+
+This is an example README for community `projects/`. We have provided detailed explanations for each field in the form of html comments, which are visible when you read the source of this README file. If you wish to submit your project to our main repository, then all the fields in this README are mandatory for others to understand what you have achieved in this implementation. For more details, read our [contribution guide](https://mmyolo.readthedocs.io/en/latest/community/contributing.html) or approach us in [Discussions](https://github.com/open-mmlab/mmyolo/discussions).
+
+## Description
+
+<!-- Share any information you would like others to know. For example:
+Author: @xxx.
+This is an implementation of \[XXX\]. -->
+
+This project implements a dummy YOLOv5CSPDarknet wrapper, which literally does nothing new but prints "hello world" during initialization.
+
+## Usage
+
+<!-- For a typical model, this section should contain the commands for training and testing. You are also suggested to dump your environment specification to env.yml by `conda env export > env.yml`. -->
+
+### Training commands
+
+In MMYOLO's root directory, run the following command to train the model:
+
+```bash
+python tools/train.py projects/example_project/configs/yolov5_s_dummy-backbone_v61_syncbn_8xb16-300e_coco.py
+```
+
+### Testing commands
+
+In MMYOLO's root directory, run the following command to test the model:
+
+```bash
+python tools/test.py projects/example_project/configs/yolov5_s_dummy-backbone_v61_syncbn_8xb16-300e_coco.py ${CHECKPOINT_PATH}
+```
+
+## Results
+
+<!-- List the results as usually done in other model's README. [Example](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5#results-and-models)
+You should claim whether this is based on the pre-trained weights, which are converted from the official release; or it's a reproduced result obtained from retraining the model in this project. -->
+
+|                                    Method                                     |       Backbone        | Pretrained Model |  Training set  |   Test set   | #epoch | box AP |         Download         |
+| :---------------------------------------------------------------------------: | :-------------------: | :--------------: | :------------: | :----------: | :----: | :----: | :----------------------: |
+| [YOLOv5 dummy](configs/yolov5_s_dummy-backbone_v61_syncbn_8xb16-300e_coco.py) | DummyYOLOv5CSPDarknet |        -         | COCO2017 Train | COCO2017 Val |  300   |  37.7  | [model](<>) \| [log](<>) |
+
+## Citation
+
+<!-- You may remove this section if not applicable. -->
+
+```latex
+@software{glenn_jocher_2022_7002879,
+  author       = {Glenn Jocher and
+                  Ayush Chaurasia and
+                  Alex Stoken and
+                  Jirka Borovec and
+                  NanoCode012 and
+                  Yonghye Kwon and
+                  TaoXie and
+                  Kalen Michael and
+                  Jiacong Fang and
+                  imyhxy and
+                  Lorna and
+                  Colin Wong and
+                  曾逸夫(Zeng Yifu) and
+                  Abhiram V and
+                  Diego Montes and
+                  Zhiqiang Wang and
+                  Cristi Fati and
+                  Jebastin Nadar and
+                  Laughing and
+                  UnglvKitDe and
+                  tkianai and
+                  yxNONG and
+                  Piotr Skalski and
+                  Adam Hogan and
+                  Max Strobel and
+                  Mrinal Jain and
+                  Lorenzo Mammana and
+                  xylieong},
+  title        = {{ultralytics/yolov5: v6.2 - YOLOv5 Classification
+                   Models, Apple M1, Reproducibility, ClearML and
+                   Deci.ai integrations}},
+  month        = aug,
+  year         = 2022,
+  publisher    = {Zenodo},
+  version      = {v6.2},
+  doi          = {10.5281/zenodo.7002879},
+  url          = {https://doi.org/10.5281/zenodo.7002879}
+}
+```
+
+## Checklist
+
+<!-- Here is a checklist illustrating a usual development workflow of a successful project, and also serves as an overview of this project's progress. The PIC (person in charge) or contributors of this project should check all the items that they believe have been finished, which will further be verified by codebase maintainers via a PR.
+OpenMMLab's maintainer will review the code to ensure the project's quality. Reaching the first milestone means that this project suffices the minimum requirement of being merged into 'projects/'. But this project is only eligible to become a part of the core package upon attaining the last milestone.
+Note that keeping this section up-to-date is crucial not only for this project's developers but the entire community, since there might be some other contributors joining this project and deciding their starting point from this list. It also helps maintainers accurately estimate time and effort on further code polishing, if needed.
+A project does not necessarily have to be finished in a single PR, but it's essential for the project to at least reach the first milestone in its very first PR. -->
+
+- [ ] Milestone 1: PR-ready, and acceptable to be one of the `projects/`.
+
+  - [ ] Finish the code
+
+    <!-- The code's design shall follow existing interfaces and convention. For example, each model component should be registered into `mmyolo.registry.MODELS` and configurable via a config file. -->
+
+  - [ ] Basic docstrings & proper citation
+
+    <!-- Each major object should contain a docstring, describing its functionality and arguments. If you have adapted the code from other open-source projects, don't forget to cite the source project in docstring and make sure your behavior is not against its license. Typically, we do not accept any code snippet under GPL license. [A Short Guide to Open Source Licenses](https://medium.com/nationwide-technology/a-short-guide-to-open-source-licenses-cf5b1c329edd) -->
+
+  - [ ] Test-time correctness
+
+    <!-- If you are reproducing the result from a paper, make sure your model's inference-time performance matches that in the original paper. The weights usually could be obtained by simply renaming the keys in the official pre-trained weights. This test could be skipped though, if you are able to prove the training-time correctness and check the second milestone. -->
+
+  - [ ] A full README
+
+    <!-- As this template does. -->
+
+- [ ] Milestone 2: Indicates a successful model implementation.
+
+  - [ ] Training-time correctness
+
+    <!-- If you are reproducing the result from a paper, checking this item means that you should have trained your model from scratch based on the original paper's specification and verified that the final result matches the report within a minor error range. -->
+
+- [ ] Milestone 3: Good to be a part of our core package!
+
+  - [ ] Type hints and docstrings
+
+    <!-- Ideally *all* the methods should have [type hints](https://www.pythontutorial.net/python-basics/python-type-hints/) and [docstrings](https://google.github.io/styleguide/pyguide.html#381-docstrings). [Example](https://github.com/open-mmlab/mmyolo/blob/27487fd587398348d59eb8c40af740cabee6b7fe/mmyolo/models/layers/yolo_bricks.py#L32-L54) -->
+
+  - [ ] Unit tests
+
+    <!-- Unit tests for each module are required. [Example](https://github.com/open-mmlab/mmyolo/blob/27487fd587398348d59eb8c40af740cabee6b7fe/tests/test_models/test_layers/test_yolo_bricks.py#L13-L34) -->
+
+  - [ ] Code polishing
+
+    <!-- Refactor your code according to reviewer's comment. -->
+
+  - [ ] Metafile.yml
+
+    <!-- It will be parsed by MIM and Inference. [Example](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5/metafile.yml) -->
+
+- [ ] Move your modules into the core package following the codebase's file hierarchy structure.
+
+  <!-- In particular, you may have to refactor this README into a standard one. [Example](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5/README.md) -->
+
+- [ ] Refactor your modules into the core package following the codebase's file hierarchy structure.
diff --git a/projects/example_project/configs/yolov5_s_dummy-backbone_v61_syncbn_8xb16-300e_coco.py b/projects/example_project/configs/yolov5_s_dummy-backbone_v61_syncbn_8xb16-300e_coco.py
new file mode 100644
index 000000000..55b43bb3e
--- /dev/null
+++ b/projects/example_project/configs/yolov5_s_dummy-backbone_v61_syncbn_8xb16-300e_coco.py
@@ -0,0 +1,5 @@
+_base_ = '../../../configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+custom_imports = dict(imports=['projects.example_project.dummy'])
+
+_base_.model.backbone.type = 'DummyYOLOv5CSPDarknet'
diff --git a/projects/example_project/dummy/__init__.py b/projects/example_project/dummy/__init__.py
new file mode 100644
index 000000000..ca1028c87
--- /dev/null
+++ b/projects/example_project/dummy/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .dummy_yolov5cspdarknet import DummyYOLOv5CSPDarknet
+
+__all__ = ['DummyYOLOv5CSPDarknet']
diff --git a/projects/example_project/dummy/dummy_yolov5cspdarknet.py b/projects/example_project/dummy/dummy_yolov5cspdarknet.py
new file mode 100644
index 000000000..c500abb42
--- /dev/null
+++ b/projects/example_project/dummy/dummy_yolov5cspdarknet.py
@@ -0,0 +1,16 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+from mmyolo.models import YOLOv5CSPDarknet
+from mmyolo.registry import MODELS
+
+
+@MODELS.register_module()
+class DummyYOLOv5CSPDarknet(YOLOv5CSPDarknet):
+    """Implements a dummy YOLOv5CSPDarknet wrapper for demonstration purpose.
+    Args:
+        **kwargs: All the arguments are passed to the parent class.
+    """
+
+    def __init__(self, **kwargs) -> None:
+        print('Hello world!')
+        super().__init__(**kwargs)
diff --git a/projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py b/projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py
index ae156d587..1d6a9d3b0 100644
--- a/projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py
+++ b/projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py
@@ -1,51 +1,38 @@
 _base_ = '../yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
 
-max_epochs = 100  # 训练的最大 epoch
-data_root = './data/cat/'  # 数据集目录的绝对路径
-# data_root = '/root/workspace/mmyolo/data/cat/'  # Docker 容器里面数据集目录的绝对路径
+max_epochs = 100
+data_root = './data/cat/'
+# data_root = '/root/workspace/mmyolo/data/cat/'  # Docker
 
-# 结果保存的路径，可以省略，省略保存的文件名位于 work_dirs 下 config 同名的文件夹中
-# 如果某个 config 只是修改了部分参数，修改这个变量就可以将新的训练文件保存到其他地方
 work_dir = './work_dirs/yolov5_s-v61_syncbn_fast_1xb32-100e_cat'
 
-# load_from 可以指定本地路径或者 URL，设置了 URL 会自动进行下载，因为上面已经下载过，我们这里设置本地路径
-# 因为本教程是在 cat 数据集上微调，故这里需要使用 `load_from` 来加载 MMYOLO 中的预训练模型，这样可以在加快收敛速度的同时保证精度
-load_from = './work_dirs/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'  # noqa
+load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'  # noqa
 
-# 根据自己的 GPU 情况，修改 batch size，YOLOv5-s 默认为 8卡 x 16bs
 train_batch_size_per_gpu = 32
-train_num_workers = 4  # 推荐使用 train_num_workers = nGPU x 4
+train_num_workers = 4
 
-save_epoch_intervals = 2  # 每 interval 轮迭代进行一次保存一次权重
+save_epoch_intervals = 2
 
-# 根据自己的 GPU 情况，修改 base_lr，修改的比例是 base_lr_default * (your_bs / default_bs)
+# base_lr_default * (your_bs / default_bs)
 base_lr = _base_.base_lr / 4
 
-anchors = [  # 此处已经根据数据集特点更新了 anchor，关于 anchor 的生成，后面小节会讲解
+anchors = [
     [(68, 69), (154, 91), (143, 162)],  # P3/8
     [(242, 160), (189, 287), (391, 207)],  # P4/16
     [(353, 337), (539, 341), (443, 432)]  # P5/32
 ]
 
-class_name = ('cat', )  # 根据 class_with_id.txt 类别信息，设置 class_name
+class_name = ('cat', )
 num_classes = len(class_name)
-metainfo = dict(
-    classes=class_name,
-    palette=[(220, 20, 60)]  # 画图时候的颜色，随便设置即可
-)
+metainfo = dict(classes=class_name, palette=[(220, 20, 60)])
 
 train_cfg = dict(
-    max_epochs=max_epochs,
-    val_begin=20,  # 第几个 epoch 后验证，这里设置 20 是因为前 20 个 epoch 精度不高，测试意义不大，故跳过
-    val_interval=save_epoch_intervals  # 每 val_interval 轮迭代进行一次测试评估
-)
+    max_epochs=max_epochs, val_begin=20, val_interval=save_epoch_intervals)
 
 model = dict(
     bbox_head=dict(
         head_module=dict(num_classes=num_classes),
         prior_generator=dict(base_sizes=anchors),
-
-        # loss_cls 会根据 num_classes 动态调整，但是 num_classes = 1 的时候，loss_cls 恒为 0
         loss_cls=dict(loss_weight=0.5 *
                       (num_classes / 80 * 3 / _base_.num_det_layers))))
 
@@ -55,7 +42,6 @@
     dataset=dict(
         _delete_=True,
         type='RepeatDataset',
-        # 数据量太少的话，可以使用 RepeatDataset ，在每个 epoch 内重复当前数据集 n 次，这里设置 5 是重复 5 次
         times=5,
         dataset=dict(
             type=_base_.dataset_type,
@@ -81,12 +67,10 @@
 optim_wrapper = dict(optimizer=dict(lr=base_lr))
 
 default_hooks = dict(
-    # 设置间隔多少个 epoch 保存模型，以及保存模型最多几个，`save_best` 是另外保存最佳模型（推荐）
     checkpoint=dict(
         type='CheckpointHook',
         interval=save_epoch_intervals,
         max_keep_ckpts=5,
         save_best='auto'),
     param_scheduler=dict(max_epochs=max_epochs),
-    # logger 输出的间隔
     logger=dict(type='LoggerHook', interval=10))
diff --git a/projects/misc/custom_dataset/yolov6_s_syncbn_fast_1xb32-100e_cat.py b/projects/misc/custom_dataset/yolov6_s_syncbn_fast_1xb32-100e_cat.py
index bd738bdd6..67d5638aa 100644
--- a/projects/misc/custom_dataset/yolov6_s_syncbn_fast_1xb32-100e_cat.py
+++ b/projects/misc/custom_dataset/yolov6_s_syncbn_fast_1xb32-100e_cat.py
@@ -1,36 +1,28 @@
 _base_ = '../yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py'
 
-max_epochs = 100  # 训练的最大 epoch
-data_root = './data/cat/'  # 数据集目录的绝对路径
+max_epochs = 100
+data_root = './data/cat/'
 
-# 结果保存的路径，可以省略，省略保存的文件名位于 work_dirs 下 config 同名的文件夹中
-# 如果某个 config 只是修改了部分参数，修改这个变量就可以将新的训练文件保存到其他地方
 work_dir = './work_dirs/yolov6_s_syncbn_fast_1xb32-100e_cat'
 
-# load_from 可以指定本地路径或者 URL，设置了 URL 会自动进行下载，因为上面已经下载过，我们这里设置本地路径
-# 因为本教程是在 cat 数据集上微调，故这里需要使用 `load_from` 来加载 MMYOLO 中的预训练模型，这样可以在加快收敛速度的同时保证精度
-load_from = './work_dirs/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth'  # noqa
+load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth'  # noqa
 
-# 根据自己的 GPU 情况，修改 batch size，YOLOv6-s 默认为 8卡 x 32bs
 train_batch_size_per_gpu = 32
-train_num_workers = 4  # 推荐使用 train_num_workers = nGPU x 4
+train_num_workers = 4  # train_num_workers = nGPU x 4
 
-save_epoch_intervals = 2  # 每 interval 轮迭代进行一次保存一次权重
+save_epoch_intervals = 2
 
-# 根据自己的 GPU 情况，修改 base_lr，修改的比例是 base_lr_default * (your_bs / default_bs)
+# base_lr_default * (your_bs / default_bs)
 base_lr = _base_.base_lr / 8
 
-class_name = ('cat', )  # 根据 class_with_id.txt 类别信息，设置 class_name
+class_name = ('cat', )
 num_classes = len(class_name)
-metainfo = dict(
-    classes=class_name,
-    palette=[(220, 20, 60)]  # 画图时候的颜色，随便设置即可
-)
+metainfo = dict(classes=class_name, palette=[(220, 20, 60)])
 
 train_cfg = dict(
     max_epochs=max_epochs,
-    val_begin=20,  # 第几个 epoch 后验证，这里设置 20 是因为前 20 个 epoch 精度不高，测试意义不大，故跳过
-    val_interval=save_epoch_intervals,  # 每 val_interval 轮迭代进行一次测试评估
+    val_begin=20,
+    val_interval=save_epoch_intervals,
     dynamic_intervals=[(max_epochs - _base_.num_last_epochs, 1)])
 
 model = dict(
@@ -45,7 +37,6 @@
     dataset=dict(
         _delete_=True,
         type='RepeatDataset',
-        # 数据量太少的话，可以使用 RepeatDataset ，在每个 epoch 内重复当前数据集 n 次，这里设置 5 是重复 5 次
         times=5,
         dataset=dict(
             type=_base_.dataset_type,
@@ -71,14 +62,12 @@
 optim_wrapper = dict(optimizer=dict(lr=base_lr))
 
 default_hooks = dict(
-    # 设置间隔多少个 epoch 保存模型，以及保存模型最多几个，`save_best` 是另外保存最佳模型（推荐）
     checkpoint=dict(
         type='CheckpointHook',
         interval=save_epoch_intervals,
         max_keep_ckpts=5,
         save_best='auto'),
     param_scheduler=dict(max_epochs=max_epochs),
-    # logger 输出的间隔
     logger=dict(type='LoggerHook', interval=10))
 
 custom_hooks = [
diff --git a/projects/misc/custom_dataset/yolov7_tiny_syncbn_fast_1xb32-100e_cat.py b/projects/misc/custom_dataset/yolov7_tiny_syncbn_fast_1xb32-100e_cat.py
new file mode 100644
index 000000000..fff59cb3d
--- /dev/null
+++ b/projects/misc/custom_dataset/yolov7_tiny_syncbn_fast_1xb32-100e_cat.py
@@ -0,0 +1,78 @@
+_base_ = '../yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py'
+
+max_epochs = 100
+data_root = './data/cat/'
+
+work_dir = './work_dirs/yolov7_tiny_syncbn_fast_1xb32-100e_cat'
+
+load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth'  # noqa
+
+train_batch_size_per_gpu = 32
+train_num_workers = 4  # train_num_workers = nGPU x 4
+
+save_epoch_intervals = 2
+
+# base_lr_default * (your_bs / default_bs)
+base_lr = 0.01 / 4
+
+anchors = [
+    [(68, 69), (154, 91), (143, 162)],  # P3/8
+    [(242, 160), (189, 287), (391, 207)],  # P4/16
+    [(353, 337), (539, 341), (443, 432)]  # P5/32
+]
+
+class_name = ('cat', )
+num_classes = len(class_name)
+metainfo = dict(classes=class_name, palette=[(220, 20, 60)])
+
+train_cfg = dict(
+    max_epochs=max_epochs,
+    val_begin=20,
+    val_interval=save_epoch_intervals,
+    dynamic_intervals=[(max_epochs - 10, 1)])
+
+model = dict(
+    bbox_head=dict(
+        head_module=dict(num_classes=num_classes),
+        prior_generator=dict(base_sizes=anchors),
+        loss_cls=dict(loss_weight=0.5 *
+                      (num_classes / 80 * 3 / _base_.num_det_layers))))
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    dataset=dict(
+        _delete_=True,
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type=_base_.dataset_type,
+            data_root=data_root,
+            metainfo=metainfo,
+            ann_file='annotations/trainval.json',
+            data_prefix=dict(img='images/'),
+            filter_cfg=dict(filter_empty_gt=False, min_size=32),
+            pipeline=_base_.train_pipeline)))
+
+val_dataloader = dict(
+    dataset=dict(
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
+
+test_dataloader = val_dataloader
+
+val_evaluator = dict(ann_file=data_root + 'annotations/trainval.json')
+test_evaluator = val_evaluator
+
+optim_wrapper = dict(optimizer=dict(lr=base_lr))
+
+default_hooks = dict(
+    checkpoint=dict(
+        type='CheckpointHook',
+        interval=save_epoch_intervals,
+        max_keep_ckpts=2,
+        save_best='auto'),
+    param_scheduler=dict(max_epochs=max_epochs),
+    logger=dict(type='LoggerHook', interval=10))
diff --git a/setup.cfg b/setup.cfg
index c62d88cce..d30673d0f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -18,4 +18,4 @@ SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
 [codespell]
 skip = *.ipynb
 quiet-level = 3
-ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids,TOOD,tood,ba,warmup,elease
+ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids,tood,ba,warmup,elease,dota
diff --git a/setup.py b/setup.py
index a87073590..f37c89791 100755
--- a/setup.py
+++ b/setup.py
@@ -174,7 +174,6 @@ def add_mim_extension():
             'License :: OSI Approved :: Apache Software License',
             'Operating System :: OS Independent',
             'Programming Language :: Python :: 3',
-            'Programming Language :: Python :: 3.6',
             'Programming Language :: Python :: 3.7',
             'Programming Language :: Python :: 3.8',
             'Programming Language :: Python :: 3.9',
diff --git a/tests/test_datasets/test_transforms/test_mix_img_transforms.py b/tests/test_datasets/test_transforms/test_mix_img_transforms.py
index fa6ef7e58..253fd64ba 100644
--- a/tests/test_datasets/test_transforms/test_mix_img_transforms.py
+++ b/tests/test_datasets/test_transforms/test_mix_img_transforms.py
@@ -69,7 +69,7 @@ def test_transform(self):
             transform = Mosaic(use_cached=True, max_cached_images=1)
 
         transform = Mosaic(
-            img_scale=(10, 12), pre_transform=self.pre_transform)
+            img_scale=(12, 10), pre_transform=self.pre_transform)
         results = transform(copy.deepcopy(self.results))
         self.assertTrue(results['img'].shape[:2] == (20, 24))
         self.assertTrue(results['gt_bboxes_labels'].shape[0] ==
@@ -83,7 +83,7 @@ def test_transform_with_no_gt(self):
         self.results['gt_bboxes_labels'] = np.empty((0, ), dtype=np.int64)
         self.results['gt_ignore_flags'] = np.empty((0, ), dtype=bool)
         transform = Mosaic(
-            img_scale=(10, 12), pre_transform=self.pre_transform)
+            img_scale=(12, 10), pre_transform=self.pre_transform)
         results = transform(copy.deepcopy(self.results))
         self.assertIsInstance(results, dict)
         self.assertTrue(results['img'].shape[:2] == (20, 24))
@@ -96,7 +96,7 @@ def test_transform_with_no_gt(self):
 
     def test_transform_with_box_list(self):
         transform = Mosaic(
-            img_scale=(10, 12), pre_transform=self.pre_transform)
+            img_scale=(12, 10), pre_transform=self.pre_transform)
         results = copy.deepcopy(self.results)
         results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes'])
         results = transform(results)
@@ -162,7 +162,7 @@ def test_transform(self):
             transform = Mosaic9(use_cached=True, max_cached_images=1)
 
         transform = Mosaic9(
-            img_scale=(10, 12), pre_transform=self.pre_transform)
+            img_scale=(12, 10), pre_transform=self.pre_transform)
         results = transform(copy.deepcopy(self.results))
         self.assertTrue(results['img'].shape[:2] == (20, 24))
         self.assertTrue(results['gt_bboxes_labels'].shape[0] ==
@@ -176,7 +176,7 @@ def test_transform_with_no_gt(self):
         self.results['gt_bboxes_labels'] = np.empty((0, ), dtype=np.int64)
         self.results['gt_ignore_flags'] = np.empty((0, ), dtype=bool)
         transform = Mosaic9(
-            img_scale=(10, 12), pre_transform=self.pre_transform)
+            img_scale=(12, 10), pre_transform=self.pre_transform)
         results = transform(copy.deepcopy(self.results))
         self.assertIsInstance(results, dict)
         self.assertTrue(results['img'].shape[:2] == (20, 24))
@@ -189,7 +189,7 @@ def test_transform_with_no_gt(self):
 
     def test_transform_with_box_list(self):
         transform = Mosaic9(
-            img_scale=(10, 12), pre_transform=self.pre_transform)
+            img_scale=(12, 10), pre_transform=self.pre_transform)
         results = copy.deepcopy(self.results)
         results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes'])
         results = transform(results)
diff --git a/tests/test_datasets/test_transforms/test_transforms.py b/tests/test_datasets/test_transforms/test_transforms.py
index 8a793f8fb..d256dd9f6 100644
--- a/tests/test_datasets/test_transforms/test_transforms.py
+++ b/tests/test_datasets/test_transforms/test_transforms.py
@@ -13,6 +13,8 @@
                                         YOLOv5HSVRandomAug,
                                         YOLOv5KeepRatioResize,
                                         YOLOv5RandomAffine)
+from mmyolo.datasets.transforms.transforms import (PPYOLOERandomCrop,
+                                                   PPYOLOERandomDistort)
 
 
 class TestLetterResize(unittest.TestCase):
@@ -126,22 +128,42 @@ def test_letter_resize(self):
         # TODO: Testing the existence of multiple scale_factor and pad_param
         transform = [
             YOLOv5KeepRatioResize(scale=(32, 32)),
-            LetterResize(scale=(64, 68))
+            LetterResize(scale=(64, 68), pad_val=dict(img=144))
         ]
         for _ in range(5):
-            input_h, input_w = np.random.randint(10, 50), np.random.randint(
-                10, 50)
-            output_h, output_w = np.random.randint(10, 50), np.random.randint(
-                10, 50)
+            input_h, input_w = np.random.randint(100, 700), np.random.randint(
+                100, 700)
+            output_h, output_w = np.random.randint(100,
+                                                   700), np.random.randint(
+                                                       100, 700)
             data_info = dict(
                 img=np.random.random((input_h, input_w, 3)),
                 gt_bboxes=np.array([[0, 0, 5, 5]], dtype=np.float32),
                 batch_shape=np.array([output_h, output_w], dtype=np.int64))
             for t in transform:
                 data_info = t(data_info)
-
+            # because of the "math.round" operation,
+            # it is unable to strictly restore the original input shape
+            # we just validate the correctness of scale_factor and pad_param
             self.assertIn('scale_factor', data_info)
             self.assertIn('pad_param', data_info)
+            pad_param = data_info['pad_param'].reshape(-1, 2).sum(
+                1)  # (top, b, l, r) -> (h, w)
+            scale_factor = np.asarray(
+                data_info['scale_factor'])[::-1]  # (w, h) -> (h, w)
+            scale_factor_keepratio = np.min(
+                np.asarray((32, 32)) / (input_h, input_w))
+            validate_shape = np.floor(
+                np.asarray((input_h, input_w)) * scale_factor_keepratio + 0.5)
+            scale_factor_keepratio = np.floor(scale_factor_keepratio *
+                                              input_h + 0.5) / input_h
+            scale_factor_letter = (output_h, output_w) / validate_shape
+            scale_factor_letter = (
+                scale_factor_letter -
+                (pad_param / validate_shape))[np.argmin(scale_factor_letter)]
+            self.assertTrue(data_info['img_shape'][:2] == (output_h, output_w))
+            self.assertTrue((scale_factor == (scale_factor_keepratio *
+                                              scale_factor_letter)).all())
 
 
 class TestYOLOv5KeepRatioResize(unittest.TestCase):
@@ -335,3 +357,100 @@ def test_transform_with_boxlist(self):
         self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64)
         self.assertTrue(results['gt_bboxes'].dtype == torch.float32)
         self.assertTrue(results['gt_ignore_flags'].dtype == bool)
+
+
+class TestPPYOLOERandomCrop(unittest.TestCase):
+
+    def setUp(self):
+        """Setup the data info which are used in every test method.
+
+        TestCase calls functions in this order: setUp() -> testMethod() ->
+        tearDown() -> cleanUp()
+        """
+        self.results = {
+            'img':
+            np.random.random((224, 224, 3)),
+            'img_shape': (224, 224),
+            'gt_bboxes_labels':
+            np.array([1, 2, 3], dtype=np.int64),
+            'gt_bboxes':
+            np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]],
+                     dtype=np.float32),
+            'gt_ignore_flags':
+            np.array([0, 0, 1], dtype=bool),
+        }
+
+    def test_transform(self):
+        transform = PPYOLOERandomCrop()
+        results = transform(copy.deepcopy(self.results))
+        self.assertTrue(results['gt_bboxes_labels'].shape[0] ==
+                        results['gt_bboxes'].shape[0])
+        self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64)
+        self.assertTrue(results['gt_bboxes'].dtype == np.float32)
+        self.assertTrue(results['gt_ignore_flags'].dtype == bool)
+
+    def test_transform_with_boxlist(self):
+        results = copy.deepcopy(self.results)
+        results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes'])
+
+        transform = PPYOLOERandomCrop()
+        results = transform(copy.deepcopy(results))
+        self.assertTrue(results['gt_bboxes_labels'].shape[0] ==
+                        results['gt_bboxes'].shape[0])
+        self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64)
+        self.assertTrue(results['gt_bboxes'].dtype == torch.float32)
+        self.assertTrue(results['gt_ignore_flags'].dtype == bool)
+
+
+class TestPPYOLOERandomDistort(unittest.TestCase):
+
+    def setUp(self):
+        """Setup the data info which are used in every test method.
+
+        TestCase calls functions in this order: setUp() -> testMethod() ->
+        tearDown() -> cleanUp()
+        """
+        self.results = {
+            'img':
+            np.random.random((224, 224, 3)),
+            'img_shape': (224, 224),
+            'gt_bboxes_labels':
+            np.array([1, 2, 3], dtype=np.int64),
+            'gt_bboxes':
+            np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]],
+                     dtype=np.float32),
+            'gt_ignore_flags':
+            np.array([0, 0, 1], dtype=bool),
+        }
+
+    def test_transform(self):
+        # test assertion for invalid prob
+        with self.assertRaises(AssertionError):
+            transform = PPYOLOERandomDistort(
+                hue_cfg=dict(min=-18, max=18, prob=1.5))
+
+        # test assertion for invalid num_distort_func
+        with self.assertRaises(AssertionError):
+            transform = PPYOLOERandomDistort(num_distort_func=5)
+
+        transform = PPYOLOERandomDistort()
+        results = transform(copy.deepcopy(self.results))
+        self.assertTrue(results['img'].shape[:2] == (224, 224))
+        self.assertTrue(results['gt_bboxes_labels'].shape[0] ==
+                        results['gt_bboxes'].shape[0])
+        self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64)
+        self.assertTrue(results['gt_bboxes'].dtype == np.float32)
+        self.assertTrue(results['gt_ignore_flags'].dtype == bool)
+
+    def test_transform_with_boxlist(self):
+        results = copy.deepcopy(self.results)
+        results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes'])
+
+        transform = PPYOLOERandomDistort()
+        results = transform(copy.deepcopy(results))
+        self.assertTrue(results['img'].shape[:2] == (224, 224))
+        self.assertTrue(results['gt_bboxes_labels'].shape[0] ==
+                        results['gt_bboxes'].shape[0])
+        self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64)
+        self.assertTrue(results['gt_bboxes'].dtype == torch.float32)
+        self.assertTrue(results['gt_ignore_flags'].dtype == bool)
diff --git a/tests/test_datasets/test_utils.py b/tests/test_datasets/test_utils.py
index 136eda532..43c8e61f4 100644
--- a/tests/test_datasets/test_utils.py
+++ b/tests/test_datasets/test_utils.py
@@ -47,6 +47,36 @@ def test_yolov5_collate(self):
         self.assertTrue(out['inputs'].shape == (2, 3, 10, 10))
         self.assertTrue(out['data_samples'].shape == (8, 6))
 
+    def test_yolov5_collate_with_multi_scale(self):
+        rng = np.random.RandomState(0)
+
+        inputs = torch.randn((3, 10, 10))
+        data_samples = DetDataSample()
+        gt_instances = InstanceData()
+        bboxes = _rand_bboxes(rng, 4, 6, 8)
+        gt_instances.bboxes = HorizontalBoxes(bboxes, dtype=torch.float32)
+        labels = rng.randint(1, 2, size=len(bboxes))
+        gt_instances.labels = torch.LongTensor(labels)
+        data_samples.gt_instances = gt_instances
+
+        out = yolov5_collate([dict(inputs=inputs, data_samples=data_samples)],
+                             use_ms_training=True)
+        self.assertIsInstance(out, dict)
+        self.assertTrue(out['inputs'][0].shape == (3, 10, 10))
+        print(out['data_samples'].shape)
+        self.assertTrue(out['data_samples'].shape == (4, 6))
+        self.assertIsInstance(out['inputs'], list)
+        self.assertIsInstance(out['data_samples'], torch.Tensor)
+
+        out = yolov5_collate(
+            [dict(inputs=inputs, data_samples=data_samples)] * 2,
+            use_ms_training=True)
+        self.assertIsInstance(out, dict)
+        self.assertTrue(out['inputs'][0].shape == (3, 10, 10))
+        self.assertTrue(out['data_samples'].shape == (8, 6))
+        self.assertIsInstance(out['inputs'], list)
+        self.assertIsInstance(out['data_samples'], torch.Tensor)
+
 
 class TestBatchShapePolicy(unittest.TestCase):
 
diff --git a/tests/test_models/test_data_preprocessor/test_data_preprocessor.py b/tests/test_models/test_data_preprocessor/test_data_preprocessor.py
index 85cdb7428..203660ae3 100644
--- a/tests/test_models/test_data_preprocessor/test_data_preprocessor.py
+++ b/tests/test_models/test_data_preprocessor/test_data_preprocessor.py
@@ -3,8 +3,13 @@
 
 import torch
 from mmdet.structures import DetDataSample
+from mmengine import MessageHub
 
+from mmyolo.models import PPYOLOEBatchRandomResize, PPYOLOEDetDataPreprocessor
 from mmyolo.models.data_preprocessors import YOLOv5DetDataPreprocessor
+from mmyolo.utils import register_all_modules
+
+register_all_modules()
 
 
 class TestYOLOv5DetDataPreprocessor(TestCase):
@@ -69,3 +74,51 @@ def test_forward(self):
         # data_samples must be tensor
         with self.assertRaises(AssertionError):
             processor(data, training=True)
+
+
+class TestPPYOLOEDetDataPreprocessor(TestCase):
+
+    def test_batch_random_resize(self):
+        processor = PPYOLOEDetDataPreprocessor(
+            pad_size_divisor=32,
+            batch_augments=[
+                dict(
+                    type='PPYOLOEBatchRandomResize',
+                    random_size_range=(320, 480),
+                    interval=1,
+                    size_divisor=32,
+                    random_interp=True,
+                    keep_ratio=False)
+            ],
+            mean=[0., 0., 0.],
+            std=[255., 255., 255.],
+            bgr_to_rgb=True)
+        self.assertTrue(
+            isinstance(processor.batch_augments[0], PPYOLOEBatchRandomResize))
+        message_hub = MessageHub.get_instance('test_batch_random_resize')
+        message_hub.update_info('iter', 0)
+
+        # test training
+        data = {
+            'inputs': [
+                torch.randint(0, 256, (3, 10, 11)),
+                torch.randint(0, 256, (3, 10, 11))
+            ],
+            'data_samples':
+            torch.randint(0, 11, (18, 6)).float(),
+        }
+        out_data = processor(data, training=True)
+        batch_data_samples = out_data['data_samples']
+        self.assertIn('img_metas', batch_data_samples)
+        self.assertIn('bboxes_labels', batch_data_samples)
+        self.assertIsInstance(batch_data_samples['bboxes_labels'],
+                              torch.Tensor)
+        self.assertIsInstance(batch_data_samples['img_metas'], list)
+
+        data = {
+            'inputs': [torch.randint(0, 256, (3, 11, 10))],
+            'data_samples': DetDataSample()
+        }
+        # data_samples must be list
+        with self.assertRaises(TypeError):
+            processor(data, training=True)
diff --git a/tests/test_models/test_dense_heads/test_ppyoloe_head.py b/tests/test_models/test_dense_heads/test_ppyoloe_head.py
index 15879bd8e..20e0c4576 100644
--- a/tests/test_models/test_dense_heads/test_ppyoloe_head.py
+++ b/tests/test_models/test_dense_heads/test_ppyoloe_head.py
@@ -2,6 +2,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine import ConfigDict, MessageHub
 from mmengine.config import Config
 from mmengine.model import bias_init_with_prob
 from mmengine.testing import assert_allclose
@@ -12,11 +13,14 @@
 register_all_modules()
 
 
-class TestYOLOXHead(TestCase):
+class TestPPYOLOEHead(TestCase):
 
     def setUp(self):
         self.head_module = dict(
-            type='PPYOLOEHeadModule', num_classes=4, in_channels=[32, 64, 128])
+            type='PPYOLOEHeadModule',
+            num_classes=4,
+            in_channels=[32, 64, 128],
+            featmap_strides=(8, 16, 32))
 
     def test_init_weights(self):
         head = PPYOLOEHead(head_module=self.head_module)
@@ -50,6 +54,7 @@ def test_predict_by_feat(self):
                 max_per_img=300))
 
         head = PPYOLOEHead(head_module=self.head_module, test_cfg=test_cfg)
+        head.eval()
         feat = [
             torch.rand(1, in_channels, s // feat_size, s // feat_size)
             for in_channels, feat_size in [[32, 8], [64, 16], [128, 32]]
@@ -71,3 +76,130 @@ def test_predict_by_feat(self):
             cfg=test_cfg,
             rescale=False,
             with_nms=False)
+
+    def test_loss_by_feat(self):
+        message_hub = MessageHub.get_instance('test_ppyoloe_loss_by_feat')
+        message_hub.update_info('epoch', 1)
+
+        s = 256
+        img_metas = [{
+            'img_shape': (s, s, 3),
+            'batch_input_shape': (s, s),
+            'scale_factor': 1,
+        }]
+
+        head = PPYOLOEHead(
+            head_module=self.head_module,
+            train_cfg=ConfigDict(
+                initial_epoch=31,
+                initial_assigner=dict(
+                    type='BatchATSSAssigner',
+                    num_classes=4,
+                    topk=9,
+                    iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
+                assigner=dict(
+                    type='BatchTaskAlignedAssigner',
+                    num_classes=4,
+                    topk=13,
+                    alpha=1,
+                    beta=6)))
+        head.train()
+
+        feat = []
+        for i in range(len(self.head_module['in_channels'])):
+            in_channel = self.head_module['in_channels'][i]
+            feat_size = self.head_module['featmap_strides'][i]
+            feat.append(
+                torch.rand(1, in_channel, s // feat_size, s // feat_size))
+
+        cls_scores, bbox_preds, bbox_dist_preds = head.forward(feat)
+
+        # Test that empty ground truth encourages the network to predict
+        # background
+        gt_instances = torch.empty((0, 6), dtype=torch.float32)
+
+        empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds,
+                                            bbox_dist_preds, gt_instances,
+                                            img_metas)
+        # When there is no truth, the cls loss should be nonzero but there
+        # should be no box loss.
+        empty_cls_loss = empty_gt_losses['loss_cls'].sum()
+        empty_box_loss = empty_gt_losses['loss_bbox'].sum()
+        empty_dfl_loss = empty_gt_losses['loss_dfl'].sum()
+        self.assertGreater(empty_cls_loss.item(), 0,
+                           'cls loss should be non-zero')
+        self.assertEqual(
+            empty_box_loss.item(), 0,
+            'there should be no box loss when there are no true boxes')
+        self.assertEqual(
+            empty_dfl_loss.item(), 0,
+            'there should be df loss when there are no true boxes')
+
+        # When truth is non-empty then both cls and box loss should be nonzero
+        # for random inputs
+        head = PPYOLOEHead(
+            head_module=self.head_module,
+            train_cfg=ConfigDict(
+                initial_epoch=31,
+                initial_assigner=dict(
+                    type='BatchATSSAssigner',
+                    num_classes=4,
+                    topk=9,
+                    iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
+                assigner=dict(
+                    type='BatchTaskAlignedAssigner',
+                    num_classes=4,
+                    topk=13,
+                    alpha=1,
+                    beta=6)))
+        head.train()
+        gt_instances = torch.Tensor(
+            [[0., 0., 23.6667, 23.8757, 238.6326, 151.8874]])
+
+        one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds,
+                                          bbox_dist_preds, gt_instances,
+                                          img_metas)
+        onegt_cls_loss = one_gt_losses['loss_cls'].sum()
+        onegt_box_loss = one_gt_losses['loss_bbox'].sum()
+        onegt_loss_dfl = one_gt_losses['loss_dfl'].sum()
+        self.assertGreater(onegt_cls_loss.item(), 0,
+                           'cls loss should be non-zero')
+        self.assertGreater(onegt_box_loss.item(), 0,
+                           'box loss should be non-zero')
+        self.assertGreater(onegt_loss_dfl.item(), 0,
+                           'obj loss should be non-zero')
+
+        # test num_class = 1
+        self.head_module['num_classes'] = 1
+        head = PPYOLOEHead(
+            head_module=self.head_module,
+            train_cfg=ConfigDict(
+                initial_epoch=31,
+                initial_assigner=dict(
+                    type='BatchATSSAssigner',
+                    num_classes=1,
+                    topk=9,
+                    iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
+                assigner=dict(
+                    type='BatchTaskAlignedAssigner',
+                    num_classes=1,
+                    topk=13,
+                    alpha=1,
+                    beta=6)))
+        head.train()
+        gt_instances = torch.Tensor(
+            [[0., 0., 23.6667, 23.8757, 238.6326, 151.8874]])
+        cls_scores, bbox_preds, bbox_dist_preds = head.forward(feat)
+
+        one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds,
+                                          bbox_dist_preds, gt_instances,
+                                          img_metas)
+        onegt_cls_loss = one_gt_losses['loss_cls'].sum()
+        onegt_box_loss = one_gt_losses['loss_bbox'].sum()
+        onegt_loss_dfl = one_gt_losses['loss_dfl'].sum()
+        self.assertGreater(onegt_cls_loss.item(), 0,
+                           'cls loss should be non-zero')
+        self.assertGreater(onegt_box_loss.item(), 0,
+                           'box loss should be non-zero')
+        self.assertGreater(onegt_loss_dfl.item(), 0,
+                           'obj loss should be non-zero')
diff --git a/tools/dataset_converters/labelme2coco.py b/tools/dataset_converters/labelme2coco.py
index 20108b30e..e68b935db 100644
--- a/tools/dataset_converters/labelme2coco.py
+++ b/tools/dataset_converters/labelme2coco.py
@@ -171,6 +171,13 @@ def parse_labelme_to_coco(
         category_to_id = all_classes_id
         categories_labels = list(all_classes_id.keys())
 
+        # add class_ids and class_names to the categories list in coco_json
+        for class_name, class_id in category_to_id.items():
+            coco_json['categories'].append({
+                'id': class_id,
+                'name': class_name
+            })
+
     # filter incorrect image file
     img_file_list = [
         img_file for img_file in Path(image_dir).iterdir()
@@ -283,7 +290,7 @@ def convert_labelme_to_coco(image_dir: str,
                                  ' words, like "1 Big house" -> "1 '
                                  'Big-house".')
             v, k = class_info
-            all_classes_id.update({k: v})
+            all_classes_id.update({k: int(v)})
     else:
         all_classes_id = None
 
diff --git a/tools/model_converters/ppyoloe_to_mmyolo.py b/tools/model_converters/ppyoloe_to_mmyolo.py
index fa8d22335..75c4af696 100644
--- a/tools/model_converters/ppyoloe_to_mmyolo.py
+++ b/tools/model_converters/ppyoloe_to_mmyolo.py
@@ -5,13 +5,13 @@
 import torch
 
 
-def convert_bn(k):
+def convert_bn(k: str):
     name = k.replace('._mean',
                      '.running_mean').replace('._variance', '.running_var')
     return name
 
 
-def convert_repvgg(k):
+def convert_repvgg(k: str):
     if '.conv2.conv1.' in k:
         name = k.replace('.conv2.conv1.', '.conv2.rbr_dense.')
         return name
@@ -22,111 +22,142 @@ def convert_repvgg(k):
         return k
 
 
-def convert(src, dst):
-    # TODO: add pretrained model convert
+def convert(src: str, dst: str, imagenet_pretrain: bool = False):
     with open(src, 'rb') as f:
         model = pickle.load(f)
 
     new_state_dict = OrderedDict()
-    for k, v in model.items():
-        name = k
-        if k.startswith('backbone.'):
-            if '.stem.' in k:
+    if imagenet_pretrain:
+        for k, v in model.items():
+            if '@@' in k:
+                continue
+            if 'stem.' in k:
                 # backbone.stem.conv1.conv.weight
                 # -> backbone.stem.0.conv.weight
-                org_ind = k.split('.')[2][-1]
+                org_ind = k.split('.')[1][-1]
                 new_ind = str(int(org_ind) - 1)
-                name = k.replace('.stem.conv%s.' % org_ind,
-                                 '.stem.%s.' % new_ind)
+                name = k.replace('stem.conv%s.' % org_ind,
+                                 'stem.%s.' % new_ind)
             else:
                 # backbone.stages.1.conv2.bn._variance
                 # -> backbone.stage2.0.conv2.bn.running_var
-                org_stage_ind = k.split('.')[2]
+                org_stage_ind = k.split('.')[1]
                 new_stage_ind = str(int(org_stage_ind) + 1)
-                name = k.replace('.stages.%s.' % org_stage_ind,
-                                 '.stage%s.0.' % new_stage_ind)
+                name = k.replace('stages.%s.' % org_stage_ind,
+                                 'stage%s.0.' % new_stage_ind)
                 name = convert_repvgg(name)
                 if '.attn.' in k:
                     name = name.replace('.attn.fc.', '.attn.fc.conv.')
             name = convert_bn(name)
-        elif k.startswith('neck.'):
-            # fpn_stages
-            if k.startswith('neck.fpn_stages.'):
-                # neck.fpn_stages.0.0.conv1.conv.weight
-                # -> neck.reduce_layers.2.0.conv1.conv.weight
-                if k.startswith('neck.fpn_stages.0.0.'):
-                    name = k.replace('neck.fpn_stages.0.0.',
-                                     'neck.reduce_layers.2.0.')
-                    if '.spp.' in name:
-                        name = name.replace('.spp.conv.', '.spp.conv2.')
-                # neck.fpn_stages.1.0.conv1.conv.weight
-                # -> neck.top_down_layers.0.0.conv1.conv.weight
-                elif k.startswith('neck.fpn_stages.1.0.'):
-                    name = k.replace('neck.fpn_stages.1.0.',
-                                     'neck.top_down_layers.0.0.')
-                elif k.startswith('neck.fpn_stages.2.0.'):
-                    name = k.replace('neck.fpn_stages.2.0.',
-                                     'neck.top_down_layers.1.0.')
+            name = 'backbone.' + name
+
+            new_state_dict[name] = torch.from_numpy(v)
+    else:
+        for k, v in model.items():
+            name = k
+            if k.startswith('backbone.'):
+                if '.stem.' in k:
+                    # backbone.stem.conv1.conv.weight
+                    # -> backbone.stem.0.conv.weight
+                    org_ind = k.split('.')[2][-1]
+                    new_ind = str(int(org_ind) - 1)
+                    name = k.replace('.stem.conv%s.' % org_ind,
+                                     '.stem.%s.' % new_ind)
                 else:
-                    raise NotImplementedError('Not implemented.')
-                name = name.replace('.0.convs.', '.0.blocks.')
-            elif k.startswith('neck.fpn_routes.'):
-                # neck.fpn_routes.0.conv.weight
-                # -> neck.upsample_layers.0.0.conv.weight
-                index = k.split('.')[2]
-                name = 'neck.upsample_layers.' + index + '.0.' + '.'.join(
-                    k.split('.')[-2:])
-                name = name.replace('.0.convs.', '.0.blocks.')
-            elif k.startswith('neck.pan_stages.'):
-                # neck.pan_stages.0.0.conv1.conv.weight
-                # -> neck.bottom_up_layers.1.0.conv1.conv.weight
-                ind = k.split('.')[2]
-                name = k.replace(
-                    'neck.pan_stages.' + ind,
-                    'neck.bottom_up_layers.' + ('0' if ind == '1' else '1'))
-                name = name.replace('.0.convs.', '.0.blocks.')
-            elif k.startswith('neck.pan_routes.'):
-                # neck.pan_routes.0.conv.weight
-                # -> neck.downsample_layers.0.conv.weight
-                ind = k.split('.')[2]
-                name = k.replace(
-                    'neck.pan_routes.' + ind,
-                    'neck.downsample_layers.' + ('0' if ind == '1' else '1'))
-                name = name.replace('.0.convs.', '.0.blocks.')
+                    # backbone.stages.1.conv2.bn._variance
+                    # -> backbone.stage2.0.conv2.bn.running_var
+                    org_stage_ind = k.split('.')[2]
+                    new_stage_ind = str(int(org_stage_ind) + 1)
+                    name = k.replace('.stages.%s.' % org_stage_ind,
+                                     '.stage%s.0.' % new_stage_ind)
+                    name = convert_repvgg(name)
+                    if '.attn.' in k:
+                        name = name.replace('.attn.fc.', '.attn.fc.conv.')
+                name = convert_bn(name)
+            elif k.startswith('neck.'):
+                # fpn_stages
+                if k.startswith('neck.fpn_stages.'):
+                    # neck.fpn_stages.0.0.conv1.conv.weight
+                    # -> neck.reduce_layers.2.0.conv1.conv.weight
+                    if k.startswith('neck.fpn_stages.0.0.'):
+                        name = k.replace('neck.fpn_stages.0.0.',
+                                         'neck.reduce_layers.2.0.')
+                        if '.spp.' in name:
+                            name = name.replace('.spp.conv.', '.spp.conv2.')
+                    # neck.fpn_stages.1.0.conv1.conv.weight
+                    # -> neck.top_down_layers.0.0.conv1.conv.weight
+                    elif k.startswith('neck.fpn_stages.1.0.'):
+                        name = k.replace('neck.fpn_stages.1.0.',
+                                         'neck.top_down_layers.0.0.')
+                    elif k.startswith('neck.fpn_stages.2.0.'):
+                        name = k.replace('neck.fpn_stages.2.0.',
+                                         'neck.top_down_layers.1.0.')
+                    else:
+                        raise NotImplementedError('Not implemented.')
+                    name = name.replace('.0.convs.', '.0.blocks.')
+                elif k.startswith('neck.fpn_routes.'):
+                    # neck.fpn_routes.0.conv.weight
+                    # -> neck.upsample_layers.0.0.conv.weight
+                    index = k.split('.')[2]
+                    name = 'neck.upsample_layers.' + index + '.0.' + '.'.join(
+                        k.split('.')[-2:])
+                    name = name.replace('.0.convs.', '.0.blocks.')
+                elif k.startswith('neck.pan_stages.'):
+                    # neck.pan_stages.0.0.conv1.conv.weight
+                    # -> neck.bottom_up_layers.1.0.conv1.conv.weight
+                    ind = k.split('.')[2]
+                    name = k.replace(
+                        'neck.pan_stages.' + ind, 'neck.bottom_up_layers.' +
+                        ('0' if ind == '1' else '1'))
+                    name = name.replace('.0.convs.', '.0.blocks.')
+                elif k.startswith('neck.pan_routes.'):
+                    # neck.pan_routes.0.conv.weight
+                    # -> neck.downsample_layers.0.conv.weight
+                    ind = k.split('.')[2]
+                    name = k.replace(
+                        'neck.pan_routes.' + ind, 'neck.downsample_layers.' +
+                        ('0' if ind == '1' else '1'))
+                    name = name.replace('.0.convs.', '.0.blocks.')
 
+                else:
+                    raise NotImplementedError('Not implement.')
+                name = convert_repvgg(name)
+                name = convert_bn(name)
+            elif k.startswith('yolo_head.'):
+                if ('anchor_points' in k) or ('stride_tensor' in k):
+                    continue
+                if 'proj_conv' in k:
+                    name = k.replace('yolo_head.proj_conv.',
+                                     'bbox_head.head_module.proj_conv.')
+                else:
+                    for org_key, rep_key in [
+                        [
+                            'yolo_head.stem_cls.',
+                            'bbox_head.head_module.cls_stems.'
+                        ],
+                        [
+                            'yolo_head.stem_reg.',
+                            'bbox_head.head_module.reg_stems.'
+                        ],
+                        [
+                            'yolo_head.pred_cls.',
+                            'bbox_head.head_module.cls_preds.'
+                        ],
+                        [
+                            'yolo_head.pred_reg.',
+                            'bbox_head.head_module.reg_preds.'
+                        ]
+                    ]:
+                        name = name.replace(org_key, rep_key)
+                    name = name.split('.')
+                    ind = name[3]
+                    name[3] = str(2 - int(ind))
+                    name = '.'.join(name)
+                name = convert_bn(name)
             else:
-                raise NotImplementedError('Not implement.')
-            name = convert_repvgg(name)
-            name = convert_bn(name)
-        elif k.startswith('yolo_head.'):
-            if ('anchor_points' in k) or ('stride_tensor' in k):
                 continue
-            if 'proj_conv' in k:
-                name = k.replace('yolo_head.proj_conv.',
-                                 'bbox_head.head_module.proj_conv.')
-            else:
-                for org_key, rep_key in [[
-                        'yolo_head.stem_cls.',
-                        'bbox_head.head_module.cls_stems.'
-                ], ['yolo_head.stem_reg.', 'bbox_head.head_module.reg_stems.'],
-                                         [
-                                             'yolo_head.pred_cls.',
-                                             'bbox_head.head_module.cls_preds.'
-                                         ],
-                                         [
-                                             'yolo_head.pred_reg.',
-                                             'bbox_head.head_module.reg_preds.'
-                                         ]]:
-                    name = name.replace(org_key, rep_key)
-                name = name.split('.')
-                ind = name[3]
-                name[3] = str(2 - int(ind))
-                name = '.'.join(name)
-            name = convert_bn(name)
-        else:
-            continue
 
-        new_state_dict[name] = torch.from_numpy(v)
+            new_state_dict[name] = torch.from_numpy(v)
     data = {'state_dict': new_state_dict}
     torch.save(data, dst)
 
@@ -139,8 +170,14 @@ def main():
         help='src ppyoloe model path')
     parser.add_argument(
         '--dst', default='mmppyoloe_plus_s.pt', help='save path')
+    parser.add_argument(
+        '--imagenet-pretrain',
+        action='store_true',
+        default=False,
+        help='Load model pretrained on imagenet dataset which only '
+        'have weight for backbone.')
     args = parser.parse_args()
-    convert(args.src, args.dst)
+    convert(args.src, args.dst, args.imagenet_pretrain)
 
 
 if __name__ == '__main__':