You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..builder import LOSSES
from .utils import convert_to_one_hot, weight_reduce_loss
def sigmoid_focal_loss(pred,
target,
weight=None,
gamma=2.0,
alpha=0.25,
reduction='mean',
avg_factor=None):
r"""Sigmoid focal loss.
Args:
pred (torch.Tensor): The prediction with shape (N, \*).
target (torch.Tensor): The ground truth label of the prediction with
shape (N, \*).
weight (torch.Tensor, optional): Sample-wise loss weight with shape
(N, ). Defaults to None.
gamma (float): The gamma for calculating the modulating factor.
Defaults to 2.0.
alpha (float): A balanced form for Focal Loss. Defaults to 0.25.
reduction (str): The method used to reduce the loss.
Options are "none", "mean" and "sum". If reduction is 'none' ,
loss is same shape as pred and label. Defaults to 'mean'.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
Returns:
torch.Tensor: Loss.
"""
assert pred.shape == \
target.shape, 'pred and target should be in the same shape.'
pred_sigmoid = pred.sigmoid()
target = target.type_as(pred)
pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
focal_weight = (alpha * target + (1 - alpha) *
(1 - target)) * pt.pow(gamma)
loss = F.binary_cross_entropy_with_logits(
pred, target, reduction='none') * focal_weight
if weight is not None:
assert weight.dim() == 1
weight = weight.float()
if pred.dim() > 1:
weight = weight.reshape(-1, 1)
loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
return loss
@LOSSES.register_module()
class FocalLoss(nn.Module):
"""Focal loss.
Args:
gamma (float): Focusing parameter in focal loss.
Defaults to 2.0.
alpha (float): The parameter in balanced form of focal
loss. Defaults to 0.25.
reduction (str): The method used to reduce the loss into
a scalar. Options are "none" and "mean". Defaults to 'mean'.
loss_weight (float): Weight of loss. Defaults to 1.0.
"""
def __init__(self,
gamma=2.0,
alpha=0.25,
reduction='mean',
loss_weight=1.0):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
r"""Sigmoid focal loss.
Args:
pred (torch.Tensor): The prediction with shape (N, \*).
target (torch.Tensor): The ground truth label of the prediction
with shape (N, \*), N or (N,1).
weight (torch.Tensor, optional): Sample-wise loss weight with shape
(N, \*). Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The method used to reduce the
loss into a scalar. Options are "none", "mean" and "sum".
Defaults to None.
Returns:
torch.Tensor: Loss.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if target.dim() == 1 or (target.dim() == 2 and target.shape[1] == 1):
target = convert_to_one_hot(target.view(-1, 1), pred.shape[-1])
loss_cls = self.loss_weight * sigmoid_focal_loss(
pred,
target,
weight,
gamma=self.gamma,
alpha=self.alpha,
reduction=reduction,
avg_factor=avg_factor)
return loss_cls
# Copyright (c) OpenMMLab. All rights reserved.
"""
follow <A strong tricks and a strong baseline for deep person re-identification.(cvprw2019,oral)>
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..builder import HEADS
from .cls_head import ClsHead
def weights_init_kaiming(m):
classname = m.__class__.__name__
if classname.find('Linear') != -1:
nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out')
nn.init.constant_(m.bias, 0.0)
elif classname.find('Conv') != -1:
nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in')
if m.bias is not None:
nn.init.constant_(m.bias, 0.0)
elif classname.find('BatchNorm') != -1:
if m.affine:
nn.init.constant_(m.weight, 1.0)
nn.init.constant_(m.bias, 0.0)
def weights_init_classifier(m):
classname = m.__class__.__name__
if classname.find('Linear') != -1:
nn.init.normal_(m.weight, std=0.001)
if m.bias:
nn.init.constant_(m.bias, 0.0)
@HEADS.register_module()
class FcBnnLinearClsHead(ClsHead):
"""Linear classifier head.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
init_cfg (dict | optional): The extra init config of layers.
Defaults to use dict(type='Normal', layer='Linear', std=0.01).
"""
def __init__(self,
num_classes,
in_channels,
init_cfg=dict(type='Normal', layer='Linear', std=0.01),
*args,
**kwargs):
super(FcBnnLinearClsHead, self).__init__(init_cfg=init_cfg, *args, **kwargs)
self.in_channels = in_channels
self.num_classes = num_classes
if self.num_classes <= 0:
raise ValueError(
f'num_classes={num_classes} must be a positive integer')
# bnn neck
self.bottleneck = nn.BatchNorm1d(self.in_channels)
self.bottleneck.bias.requires_grad_(False) # no shift
self.fc = nn.Linear(self.in_channels, self.num_classes, bias=False) # fc layer
self.bottleneck.apply(weights_init_kaiming)
self.fc.apply(weights_init_classifier)
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
return x
def simple_test(self, x, softmax=True, post_process=True):
"""Inference without augmentation.
Args:
x (tuple[Tensor]): The input features.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. The shape of every item should be
``(num_samples, in_channels)``.
softmax (bool): Whether to softmax the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
x = self.pre_logits(x)
cls_score = self.fc(x)
if softmax:
pred = (
F.softmax(cls_score, dim=1) if cls_score is not None else None)
else:
pred = cls_score
if post_process:
return self.post_process(pred)
else:
return pred
def forward_train(self, x, gt_label, **kwargs):
x = self.pre_logits(x)
feat = self.bottleneck(x) # normalize for angular softmax
cls_score = self.fc(feat)
cat_score = torch.cat((x, cls_score), dim=1) # connect no fc and fc score
losses = self.loss(cat_score, gt_label, **kwargs)
return losses
# Copyright (c) OpenMMLab. All rights reserved.
"""
no fc layer
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..builder import HEADS
from .cls_head import ClsHead
@HEADS.register_module()
class FcLinearClsHead(ClsHead):
"""Linear classifier head.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
init_cfg (dict | optional): The extra init config of layers.
Defaults to use dict(type='Normal', layer='Linear', std=0.01).
"""
def __init__(self,
num_classes,
in_channels,
init_cfg=dict(type='Normal', layer='Linear', std=0.01),
*args,
**kwargs):
super(FcLinearClsHead, self).__init__(init_cfg=init_cfg, *args, **kwargs)
self.in_channels = in_channels
self.num_classes = num_classes
if self.num_classes <= 0:
raise ValueError(
f'num_classes={num_classes} must be a positive integer')
self.fc = nn.Linear(self.in_channels, self.num_classes) # fc layer
def pre_logits(self, x):
if isinstance(x, tuple):
x = x[-1]
return x
def simple_test(self, x, softmax=True, post_process=True):
"""Inference without augmentation.
Args:
x (tuple[Tensor]): The input features.
Multi-stage inputs are acceptable but only the last stage will
be used to classify. The shape of every item should be
``(num_samples, in_channels)``.
softmax (bool): Whether to softmax the classification score.
post_process (bool): Whether to do post processing the
inference results. It will convert the output to a list.
Returns:
Tensor | list: The inference results.
- If no post processing, the output is a tensor with shape
``(num_samples, num_classes)``.
- If post processing, the output is a multi-dimentional list of
float and the dimensions are ``(num_samples, num_classes)``.
"""
x = self.pre_logits(x)
cls_score = self.fc(x)
if softmax:
pred = (
F.softmax(cls_score, dim=1) if cls_score is not None else None)
else:
pred = cls_score
if post_process:
return self.post_process(pred)
else:
return pred
def forward_train(self, x, gt_label, **kwargs):
x = self.pre_logits(x)
cls_score = self.fc(x)
cat_score = torch.cat((x, cls_score), dim=1) # connect no fc and fc score
losses = self.loss(cat_score, gt_label, **kwargs)
return losses
我尝试将您在reid-strong-baseline/modeling/baseline.py下关于bnneck的部分代码移植到mmclassification中,因为做的是细粒度图像分类,这里采用mmclassification的focal loss解决样本不平衡的问题。
mmclassification中的focal loss代码如下:
我训练的时候focal loss都按官方给的参数默认配置,即gamma=2.0,alpha=0.25。除此之外,batchsize是64,类别数是9691。
我发现,以Resnet50做beckbone,如果加入BNNeck,将BNNeck的fc层产生的特征作为pred导入到上述focal loss代码中的sigmoid_focal_loss()函数,训练生成的结果如下:
这里加了BNNeck的代码(修改了mmclassification相应head代码)如下:
如果不加BNNeck,直接将Resnet50的fc层输出特征作为pred导入到之前focal loss代码中的sigmoid_focal_loss()函数,训练生成的结果如下:
这里不加BNNeck的代码(修改了mmclassification相应head代码)如下:
我仔细检查了加入BNNeck的移植代码部分,确定没有问题后,有个疑问,就是为什么加入了BNNeck所训练出的focal loss是1000多?相比不加BNNeck的差别巨大?我的conda环境配置如下:
想请教下这是什么原因引起的?以及怎么才能将BNNeck与focal loss很好的结合在一起?
The text was updated successfully, but these errors were encountered: