kamkyu94
diff --git a/‎4. mtmc/config.py
+12 b/‎4. mtmc/config.py
+12
diff --git a/‎4. mtmc/mtmc.py
+213 b/‎4. mtmc/mtmc.py
+213
diff --git a/‎4. mtmc/nets/estimator.py
+35 b/‎4. mtmc/nets/estimator.py
+35
diff --git a/‎4. mtmc/nets/resnext.py
+133 b/‎4. mtmc/nets/resnext.py
+133
@@ -0,0 +1,12 @@
+# Tracking
+# iou_thr 0.25 is better than 0
+dist_thr = 0.837
+min_len = 5
+
+# Path
+data_path = '../../dataset/AIC21_Track3/test/S06/'
+weight_path = '../2. feat_ext/outputs/resnext_8/resnext_17.t7'
+img_w = 320
+img_h = 320
+pad_color = (0, 0, 0)
+num_ide_class = 184
@@ -0,0 +1,213 @@
+import copy
+import pickle
+import numpy as np
+from utils import utils
+import scipy.optimize as opt
+
+# Read mtsc results
+result_path = '../outputs/2. mtsc/mask_rcnn_0.2/det_small_mtsc_v2_res8_fut3_del3_post123'
+save_path = '../outputs/3. mtmc/mask_rcnn_0.2/det_small_mtsc_v2_res8_fut3_del3_post123_final'
+# result_path = '../outputs/2. mtsc/fairmot_affine_hsv_0.3/det_mtsc_v2_res8_fut3_del3_post123'
+# save_path = '../outputs/3. mtmc/fairmot_affine_hsv_0.3/det_mtsc_v2_res8_fut3_del3_post123'
+with open(result_path + '.pickle', 'rb') as f:
+    mtsc_results = pickle.load(f)
+
+
+# Measure distance between trajectory
+def measure_distance(a_track, b_track):
+    # Rearrange trajectories
+    a_track_per_cam_no_str = {}
+    for box in a_track:
+        if type(box) is not str:
+            if box[1] not in a_track_per_cam_no_str.keys():
+                a_track_per_cam_no_str[box[1]] = []
+            a_track_per_cam_no_str[box[1]].append(box)
+    b_track_no_str = [box for box in b_track if type(box) is not str]
+
+    dist = []
+    for cam in a_track_per_cam_no_str.keys():
+        # Find best object scores
+        a_track_obj_scores = [box[14] for box in a_track_per_cam_no_str[cam]]
+        a_track_best_obj_scores = sorted(a_track_obj_scores)[-max(-5, round(len(a_track_obj_scores)*0.2)):]
+        b_track_obj_scores = [box[14] for box in b_track_no_str]
+        b_track_best_obj_scores = sorted(b_track_obj_scores)[-max(-5, round(len(b_track_obj_scores)*0.2)):]
+
+        for a_track_best_obj_score in a_track_best_obj_scores:
+            # Get box and feature
+            a_feat = a_track_per_cam_no_str[cam][a_track_obj_scores.index(a_track_best_obj_score)][15]
+            for b_track_best_obj_score in b_track_best_obj_scores:
+                # Get box and feature, Measure distance
+                b_feat = b_track_no_str[b_track_obj_scores.index(b_track_best_obj_score)][15]
+                dist.append(np.sqrt(np.sum((a_feat - b_feat) ** 2)))
+
+    return np.min(dist)
+
+
+# Generate pairwise distance matrix
+def gen_dist_mat(a_mtmc, b_mtsc):
+    # Create empty matrix
+    con_mat = np.zeros((len(a_mtmc), len(b_mtsc)))
+    dist_mat = np.ones((len(a_mtmc), len(b_mtsc))) * 1000
+
+    # Overlap camera pairs (There are no overlapped cameras.)
+    overlap_cam_pairs = []
+
+    a_1 = []
+    for a_track in a_mtmc:
+        if a_track[-1] == 'to_next_cam':
+            a_1.append(copy.deepcopy(a_track))
+    a_1_diff = []
+    a_1 = sorted(a_1, key=lambda track: track[-2][2])
+    for i in range(len(a_1) - 1):
+        a_1_diff = a_1[i+1][-2][2] - a_1[i][-2][2]
+    a_1_max_diff = np.max(a_1_diff) * 1.5
+
+    b_1 = []
+    for b_track in b_mtsc:
+        if b_track[-1] == 'to_previous_cam':
+            b_1.append(copy.deepcopy(b_track))
+    b_1_diff = []
+    b_1 = sorted(b_1, key=lambda track: track[-2][2])
+    for i in range(len(b_1) - 1):
+        b_1_diff = b_1[i+1][-2][2] - b_1[i][-2][2]
+    b_1_max_diff = np.max(b_1_diff) * 1.5
+
+    # Post process the distance matrix with the prior constraints
+    for idx, a_track in enumerate(a_mtmc):
+        # Get minimum frame number and maximum frame number
+        a_f_min = np.min([box[2] for box in a_track if type(box) is not str])
+        a_f_max = np.max([box[2] for box in a_track if type(box) is not str])
+
+        for jdx, b_track in enumerate(b_mtsc):
+            # Get minimum frame number and maximum frame number
+            b_f_min = np.min([box[2] for box in b_track if type(box) is not str])
+            b_f_max = np.max([box[2] for box in b_track if type(box) is not str])
+
+            # Disconnect if connection not available
+            if a_track[-1] == 'to_next_cam' and b_track[0] == 'from_previous_cam':
+                min_f_num_diff = utils.get_min_f_num_diff(a_track, b_track, 1)
+                if a_f_max + min_f_num_diff < b_f_min < a_f_max + min_f_num_diff + a_1_max_diff:
+                    dist_mat[idx, jdx] = measure_distance(a_track, b_track)
+                    con_mat[idx, jdx] = 1
+            elif a_track[0] == 'from_next_cam' and b_track[-1] == 'to_previous_cam':
+                min_f_num_diff = utils.get_min_f_num_diff(a_track, b_track, -1)
+                if b_f_max + min_f_num_diff + b_1_max_diff > a_f_min > b_f_max + min_f_num_diff:
+                    dist_mat[idx, jdx] = measure_distance(a_track, b_track)
+                    con_mat[idx, jdx] = -1
+
+    # Post process dist mat
+    for idx in range(dist_mat.shape[0]):
+        for jdx in range(dist_mat.shape[1]):
+            dist_mat[idx, jdx] = dist_mat[idx, jdx] if dist_mat[idx, jdx] <= 1.175 else 1000
+
+    return dist_mat, con_mat
+
+
+def hungarian():
+    # Set merge order
+    print('Start MTMC Hungarian\n')
+    merge_order = ['c041', 'c042', 'c043', 'c044', 'c045', 'c046']
+
+    # Start mtmc
+    a_mtmc, result = copy.deepcopy(mtsc_results['S06'][merge_order[0]]), []
+    for c_idx in range(1, len(merge_order)):
+        # Get current mtsc results
+        print('S06_%s starts' % merge_order[c_idx])
+        b_mtsc = copy.deepcopy(mtsc_results['S06'][merge_order[c_idx]])
+
+        # Generate distance matrix between trajectories
+        print('Distance matrix pair: %d x %d' % (len(a_mtmc), len(b_mtsc)))
+        dist_mat, con_mat = gen_dist_mat(a_mtmc, b_mtsc)
+        print('Num connections: %d / %d\n' % (np.sum(con_mat != 0), len(a_mtmc) * len(b_mtsc)))
+
+        # Hungarian algorithm
+        row_ind, col_ind = opt.linear_sum_assignment(dist_mat)
+        row_ind, col_ind = list(row_ind), list(col_ind)
+
+        # Check distance between connections
+        con_row_ind, con_col_ind = [], []
+        for r_idx in range(len(row_ind)):
+            if dist_mat[row_ind[r_idx], col_ind[r_idx]] < 1000:
+                # Merge trajectories 'a' and 'b'
+                if con_mat[row_ind[r_idx], col_ind[r_idx]] == 1:
+                    a_mtmc[row_ind[r_idx]] = copy.deepcopy(a_mtmc[row_ind[r_idx]]) \
+                                             + copy.deepcopy(b_mtsc[col_ind[r_idx]])
+                elif con_mat[row_ind[r_idx], col_ind[r_idx]] == -1:
+                    a_mtmc[row_ind[r_idx]] = copy.deepcopy(b_mtsc[col_ind[r_idx]])\
+                                             + copy.deepcopy(a_mtmc[row_ind[r_idx]])
+
+                # Record
+                con_row_ind.append(row_ind[r_idx])
+                con_col_ind.append(col_ind[r_idx])
+
+        # Finish trajectories
+        fin_idx = [r for r in range(len(a_mtmc)) if r not in con_row_ind]
+        for idx, f_idx in enumerate(fin_idx):
+            result.append(copy.deepcopy(a_mtmc.pop(f_idx - idx)))
+
+        # Starting trajectories
+        for c in range(len(b_mtsc)):
+            if c not in con_col_ind:
+                a_mtmc.append(copy.deepcopy(b_mtsc[c]))
+
+    # Final merge
+    result += copy.deepcopy(a_mtmc)
+
+    # # Post process (Do not post process Recall become too low)
+    # result_post = []
+    # for track in result:
+    #     cams = list(set([box[1] for box in track if type(box) is not str]))
+    #     if 2 <= len(cams):
+    #         result_post.append(track)
+
+    return result
+
+
+def map_obj_id(result):
+    result_new_id = copy.deepcopy(result)
+    for t_idx, track in enumerate(result):
+        for b_idx, box in enumerate(track):
+            if type(box) is not str:
+                result_new_id[t_idx][b_idx][3] = t_idx
+    print('Num ID: %d' % len(result_new_id))
+
+    return result_new_id
+
+
+def write_txt(result):
+    # Open txt file, Write txt file, Close
+    num_box = 0
+    mtmc_txt = open(save_path + '.txt', 'w')
+    for track in result:
+        for box in track:
+            if type(box) is not str:
+                if 0.1 <= box[14]:
+                    # Decode
+                    left, top, w, h, img_w, img_h = box[4], box[5], box[6], box[7], box[8], box[9]
+
+                    # Expand
+                    new_w, new_h = w * 1.2, h * 1.2
+                    # new_w, new_h = w, h
+
+                    # Calculate new left and top
+                    c_x, c_y = left + w / 2, top + h / 2
+                    new_left, new_top = c_x - new_w / 2, c_y - new_h / 2
+                    new_right, new_bot = new_left + new_w, new_top + new_h
+
+                    # Threshold by image size
+                    new_left, new_top = max(0, new_left), max(0, new_top)
+                    new_right, new_bot = min(img_w, new_right), min(img_h, new_bot)
+                    new_w, new_h = new_right - new_left, new_bot - new_top
+
+                    # Write
+                    mtmc_txt.write('%d %d %d %d %d %d %d %d %d\n'
+                                   % (int(box[1][1:]), box[3], box[2], new_left, new_top, new_w, new_h, 0, 0))
+                    num_box += 1
+    mtmc_txt.close()
+    print('Num Box: %d' % num_box)
+
+
+if __name__ == "__main__":
+    result = hungarian()
+    result = map_obj_id(result)
+    write_txt(result)
@@ -0,0 +1,35 @@
+import config
+from nets.resnext import *
+
+
+class Estimator(nn.Module):
+    def __init__(self):
+        super(Estimator, self).__init__()
+        self.act = nn.ReLU(inplace=True)
+
+        # Construct backbone network (ResNext-50), Global average pooling, Dropout
+        self.ext = resnext50_32x4d()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.drop = nn.Dropout(0.5)
+   
+        # BNNeck
+        self.bnn = nn.BatchNorm2d(2048)
+        nn.init.constant_(self.bnn.weight, 1)
+        nn.init.constant_(self.bnn.bias, 0)
+        self.bnn.bias.requires_grad_(False)
+        
+        # IDE
+        self.fc_ide = nn.Linear(2048, config.num_ide_class, bias=False)
+        
+    def forward(self, patch):
+        # Extract appearance feature
+        feat_tri = self.avg_pool(self.ext(patch))
+
+        # BNNeck
+        feat_infer = self.bnn(self.drop(feat_tri))
+
+        # IDE
+        feat_ide = feat_infer.view(feat_infer.size(0), -1)
+        ide = self.fc_ide(feat_ide)
+
+        return feat_tri, feat_infer, ide
@@ -0,0 +1,133 @@
+import torch.nn as nn
+from torch.hub import load_state_dict_from_url
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1):
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False)
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
+        super(Bottleneck, self).__init__()
+
+        # Set parameters
+        self.stride = stride
+        width = int(planes * (base_width / 64.)) * groups
+
+        # Convolutions
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = nn.BatchNorm2d(width)
+        self.conv2 = conv3x3(width, width, stride, groups)
+        self.bn2 = nn.BatchNorm2d(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+
+        # Others
+        self.downsample = downsample
+
+    def forward(self, x):
+        # Convolutions
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        # Skip connection, Final activation
+        identity = self.downsample(x) if self.downsample is not None else x
+        out = self.relu(out + identity)
+
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, layers, groups=1, width_per_group=64):
+        super(ResNet, self).__init__()
+
+        self.dilation = 1
+        self.inplanes = 64
+
+        self.groups = groups
+        self.base_width = width_per_group
+        self.relu = nn.ReLU(inplace=True)
+
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(self.inplanes)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        for m in self.modules():
+            if isinstance(m, Bottleneck):
+                nn.init.constant_(m.bn3.weight, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        # First layer
+        layers = [block(self.inplanes, planes, stride, downsample, self.groups, self.base_width)]
+
+        # Other layers
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups, base_width=self.base_width))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.maxpool(self.relu(self.bn1(self.conv1(x))))
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        return x
+
+
+def resnext50_32x4d(**kwargs):
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 4
+
+    # Model
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+
+    # Get state dictionaries
+    pretrained = load_state_dict_from_url('https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth')
+    pretrained.pop('fc.weight')
+    pretrained.pop('fc.bias')
+
+    # Update and load
+    model_state_dict = model.state_dict()
+    model_state_dict.update(pretrained)
+    model.load_state_dict(model_state_dict)
+
+    return model