Skip to content

Supports detection using only lidar data and modifies the data reading logic. #3101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 154 additions & 0 deletions configs/_base_/datasets/lidar-only-3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# dataset settings
dataset_type = 'CustomDataset'
data_root = 'data/Car/'
class_names = ['Car',]
point_cloud_range = [-25, -16, 0, 23, 24, 120]
input_modality = dict(use_lidar=True)
metainfo = dict(classes=class_names)

# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)

# data_root = 's3://openmmlab/datasets/detection3d/custom/'

# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None

db_sampler = dict(
data_root=data_root,
info_path=data_root + 'custom_dbinfos_train.pkl',
rate=1.0,
prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(fishnet=5)),
classes=class_names,
sample_groups=dict(fishnet=15),
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
backend_args=backend_args)

train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4, # x, y, z, intensity
use_dim=4,
backend_args=backend_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(type='Pack3DDetInputs', keys=['points'])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(type='Pack3DDetInputs', keys=['points'])
]
train_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='custom_infos_train.pkl',
data_prefix=dict(pts='points'),
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in custom and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
backend_args=backend_args)))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(pts='points'),
ann_file='custom_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
box_type_3d='LiDAR',
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(pts='points'),
ann_file='custom_infos_test.pkl',
pipeline=test_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
box_type_3d='LiDAR',
backend_args=backend_args))
val_evaluator = dict(
type='CustomMetric',
ann_file=data_root + 'custom_infos_val.pkl',
metric='bbox',
backend_args=backend_args)
test_evaluator = dict(
type='CustomMetric',
ann_file=data_root + 'custom_infos_test.pkl',
metric='bbox',
backend_args=backend_args)

vis_backends = [dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
145 changes: 145 additions & 0 deletions configs/point_rcnn/point-rcnn_8xb2_lidar-only-3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
_base_ = [
'../_base_/datasets/lidar-only-3d.py', '../_base_/models/point_rcnn.py',
'../_base_/default_runtime.py', '../_base_/schedules/cyclic-40e.py'
]

# dataset settings
dataset_type = 'CustomDataset'
data_root = 'data/custom/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(classes=class_names)
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
input_modality = dict(use_lidar=True, use_camera=False)
backend_args = None

db_sampler = dict(
data_root=data_root,
info_path=data_root + 'custom_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15),
classes=class_names,
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
backend_args=backend_args)

train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointSample', num_points=16384, sample_range=40.0),
dict(type='PointShuffle'),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointSample', num_points=16384, sample_range=40.0)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
dataset=dict(
type='RepeatDataset',
times=2,
dataset=dict(pipeline=train_pipeline, metainfo=metainfo)))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline, metainfo=metainfo))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline, metainfo=metainfo))

lr = 0.001 # max learning rate
optim_wrapper = dict(optimizer=dict(lr=lr, betas=(0.95, 0.85)))
train_cfg = dict(by_epoch=True, max_epochs=80, val_interval=2)

# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=16)
param_scheduler = [
# learning rate scheduler
# During the first 35 epochs, learning rate increases from 0 to lr * 10
# during the next 45 epochs, learning rate decreases from lr * 10 to
# lr * 1e-4
dict(
type='CosineAnnealingLR',
T_max=35,
eta_min=lr * 10,
begin=0,
end=35,
by_epoch=True,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=45,
eta_min=lr * 1e-4,
begin=35,
end=80,
by_epoch=True,
convert_to_iter_based=True),
# momentum scheduler
# During the first 35 epochs, momentum increases from 0 to 0.85 / 0.95
# during the next 45 epochs, momentum increases from 0.85 / 0.95 to 1
dict(
type='CosineAnnealingMomentum',
T_max=35,
eta_min=0.85 / 0.95,
begin=0,
end=35,
by_epoch=True,
convert_to_iter_based=True),
dict(
type='CosineAnnealingMomentum',
T_max=45,
eta_min=1,
begin=35,
end=80,
by_epoch=True,
convert_to_iter_based=True)
]
78 changes: 78 additions & 0 deletions docs/zh_cn/advanced_guides/datasets/lidar-only-3d.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# 仅lidar 数据集

本页提供了有关在 MMDetection3D 中使用 仅lidar 数据集的具体教程。

## 数据准备

像准备数据集的一般方法一样,建议将数据集根目录链接到 `$MMDETECTION3D/data`。

在我们处理之前,文件夹结构应按如下方式组织:

```
mmdetection3d
├── mmdet3d
├── tools
├── configs
├── data
│ ├── custom
│ │ ├── ImageSets
│ │ ├── testing
│ │ │ ├── velodyne
│ │ ├── training
│ │ │ ├── label_2
│ │ │ ├── velodyne
```

### 创建 custom 数据集

为了创建 纯lidar 点云数据,首先需要加载原始的点云数据并生成相关的包含目标标签和标注框的数据标注文件,同时还需要为 custom 数据集生成每个单独的训练目标的点云数据,并将其存储在 `data/custom/custom_gt_database` 的 `.bin` 格式的文件中,此外,需要为训练数据或者验证数据生成 `.pkl` 格式的包含数据信息的文件。随后,通过运行下面的命令来创建最终的 custom 数据:

```bash
mkdir ./data/custom/ && mkdir ./data/custom/ImageSets

python tools/create_data.py custom --root-path ./data/custom --out-dir ./data/custom --extra-tag custom --with-plane
```

需要注意的是,如果您的本地磁盘没有充足的存储空间来存储转换后的数据,您可以通过改变 `--out-dir` 来指定其他任意的存储路径。如果您没有准备 `planes` 数据,您需要移除 `--with-plane` 标志。

处理后的文件夹结构应该如下:

```
custom
├── ImageSets
│ ├── test.txt
│ ├── train.txt
│ ├── trainval.txt
│ ├── val.txt
├── testing
│ ├── velodyne
├── training
│ ├── label_2
│ ├── velodyne
├── custom_gt_database
│ ├── xxxxx.bin
├── custom_infos_train.pkl
├── custom_infos_val.pkl
├── custom_dbinfos_train.pkl
├── custom_infos_test.pkl
├── custom_infos_trainval.pkl
```

- `custom_gt_database/xxxxx.bin`:训练数据集中包含在 3D 标注框中的点云数据。
- `custom_infos_train.pkl`:训练数据集,该字典包含了两个键值:`metainfo` 和 `data_list`。`metainfo` 包含数据集的基本信息,例如 `categories`, `dataset` 和 `info_version`。`data_list` 是由字典组成的列表,每个字典(以下简称 `info`)包含了单个样本的所有详细信息。
- info\['sample_idx'\]:该样本在整个数据集的索引。
- info\['lidar_points'\]:是一个字典,包含了激光雷达点相关的信息。
- info\['lidar_points'\]\['lidar_path'\]:激光雷达点云数据的文件名。
- info\['lidar_points'\]\['num_pts_feats'\]:点的特征维度。
- info\['instances'\]:是一个字典组成的列表。每个字典包含单个实例的所有标注信息。对于其中的第 i 个实例,我们有:
- info\['instances'\]\[i\]\['bbox'\]:长度为 4 的列表,以 (x1, y1, x2, y2) 的顺序表示实例的 2D 边界框。
- info\['instances'\]\[i\]\['bbox_3d'\]:长度为 7 的列表,以 (x, y, z, l, h, w, yaw) 的顺序表示实例的 3D 边界框。
- info\['instances'\]\[i\]\['bbox_label'\]:是一个整数,表示实例的 2D 标签,-1 代表忽略。
- info\['instances'\]\[i\]\['bbox_label_3d'\]:是一个整数,表示实例的 3D 标签,-1 代表忽略。
- info\['instances'\]\[i\]\['depth'\]:3D 边界框投影到相关图像平面的中心点的深度。
- info\['instances'\]\[i\]\['num_lidar_pts'\]:3D 边界框内的激光雷达点数。
- info\['instances'\]\[i\]\['center_2d'\]:3D 边界框投影的 2D 中心。
- info\['instances'\]\[i\]\['difficulty'\]:custom 官方定义的困难度,包括简单、适中、困难。
- info\['instances'\]\[i\]\['truncated'\]:从 0(非截断)到 1(截断)的浮点数,其中截断指的是离开检测图像边界的检测目标。
- info\['instances'\]\[i\]\['occluded'\]:整数 (0,1,2,3) 表示目标的遮挡状态:0 = 完全可见,1 = 部分遮挡,2 = 大面积遮挡,3 = 未知。
- info\['instances'\]\[i\]\['group_ids'\]:用于多部分的物体。
2 changes: 2 additions & 0 deletions mmdet3d/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .dataset_wrappers import CBGSDataset
from .det3d_dataset import Det3DDataset
from .kitti_dataset import KittiDataset
from .custom_dataset import CustomDataset
from .lyft_dataset import LyftDataset
from .nuscenes_dataset import NuScenesDataset
# yapf: enable
Expand All @@ -26,6 +27,7 @@
from .waymo_dataset import WaymoDataset

__all__ = [
'CustomDataset',
'KittiDataset', 'CBGSDataset', 'NuScenesDataset', 'LyftDataset',
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter',
Expand Down
Loading