configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py

_base_ = [
    '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py'
]

data_preprocessor = dict(
    type='DetDataPreprocessor',
    mean=[123.675, 116.28, 103.53],
    std=[58.395, 57.12, 57.375],
    bgr_to_rgb=True)

# model settings
model = dict(
    type='CornerNet',
    data_preprocessor=data_preprocessor,
    backbone=dict(
        type='HourglassNet',
        downsample_times=5,
        num_stacks=2,
        stage_channels=[256, 256, 384, 384, 384, 512],
        stage_blocks=[2, 2, 2, 2, 2, 4],
        norm_cfg=dict(type='BN', requires_grad=True)),
    neck=None,
    bbox_head=dict(
        type='CornerHead',
        num_classes=80,
        in_channels=256,
        num_feat_levels=2,
        corner_emb_channels=1,
        loss_heatmap=dict(
            type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1),
        loss_embedding=dict(
            type='AssociativeEmbeddingLoss',
            pull_weight=0.10,
            push_weight=0.10),
        loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)),
    # training and testing settings
    train_cfg=None,
    test_cfg=dict(
        corner_topk=100,
        local_maximum_kernel=3,
        distance_threshold=0.5,
        score_thr=0.05,
        max_per_img=100,
        nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian')))

# data settings
train_pipeline = [
    dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='PhotoMetricDistortion',
        brightness_delta=32,
        contrast_range=(0.5, 1.5),
        saturation_range=(0.5, 1.5),
        hue_delta=18),
    dict(
        # The cropped images are padded into squares during training,
        # but may be smaller than crop_size.
        type='RandomCenterCropPad',
        crop_size=(511, 511),
        ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),
        test_mode=False,
        test_pad_mode=None,
        mean=data_preprocessor['mean'],
        std=data_preprocessor['std'],
        # Image data is not converted to rgb.
        to_rgb=data_preprocessor['bgr_to_rgb']),
    # Make sure the output is always crop_size.
    dict(type='Resize', scale=(511, 511), keep_ratio=False),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PackDetInputs'),
]

test_pipeline = [
    dict(
        type='LoadImageFromFile',
        to_float32=True,
        backend_args=_base_.backend_args,
    ),
    # don't need Resize
    dict(
        type='RandomCenterCropPad',
        crop_size=None,
        ratios=None,
        border=None,
        test_mode=True,
        test_pad_mode=['logical_or', 127],
        mean=data_preprocessor['mean'],
        std=data_preprocessor['std'],
        # Image data is not converted to rgb.
        to_rgb=data_preprocessor['bgr_to_rgb']),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='PackDetInputs',
        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'border'))
]

train_dataloader = dict(
    batch_size=6,
    num_workers=3,
    batch_sampler=None,
    dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader

# optimizer
optim_wrapper = dict(
    type='OptimWrapper',
    optimizer=dict(type='Adam', lr=0.0005),
    clip_grad=dict(max_norm=35, norm_type=2))

max_epochs = 210

# learning rate
param_scheduler = [
    dict(
        type='LinearLR',
        start_factor=1.0 / 3,
        by_epoch=False,
        begin=0,
        end=500),
    dict(
        type='MultiStepLR',
        begin=0,
        end=max_epochs,
        by_epoch=True,
        milestones=[180],
        gamma=0.1)
]

train_cfg = dict(
    type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

# NOTE: `auto_scale_lr` is for automatically scaling LR,
# USER SHOULD NOT CHANGE ITS VALUES.
# base_batch_size = (8 GPUs) x (6 samples per GPU)
auto_scale_lr = dict(base_batch_size=48)

tta_model = dict(
    type='DetTTAModel',
    tta_cfg=dict(
        nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'),
        max_per_img=100))

tta_pipeline = [
    dict(
        type='LoadImageFromFile',
        to_float32=True,
        backend_args=_base_.backend_args),
    dict(
        type='TestTimeAug',
        transforms=[
            [
                # ``RandomFlip`` must be placed before ``RandomCenterCropPad``,
                # otherwise bounding box coordinates after flipping cannot be
                # recovered correctly.
                dict(type='RandomFlip', prob=1.),
                dict(type='RandomFlip', prob=0.)
            ],
            [
                dict(
                    type='RandomCenterCropPad',
                    crop_size=None,
                    ratios=None,
                    border=None,
                    test_mode=True,
                    test_pad_mode=['logical_or', 127],
                    mean=data_preprocessor['mean'],
                    std=data_preprocessor['std'],
                    # Image data is not converted to rgb.
                    to_rgb=data_preprocessor['bgr_to_rgb'])
            ],
            [dict(type='LoadAnnotations', with_bbox=True)],
            [
                dict(
                    type='PackDetInputs',
                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
                               'flip', 'flip_direction', 'border'))
            ]
        ])
]