update dataset

thangvubk · thangvubk · commit 9f0ee8ef1ed9 · 2022-04-30T13:12:37.000Z
diff --git a/.gitignore b/.gitignore
@@ -76,3 +76,7 @@ dataset/s3dis/preprocess
 dataset/s3dis/val_gt
 dataset/s3dis/preprocess_sample
 dataset/s3dis/Stanford3dDataset_v1.2
+
+dataset/stpls3d/train
+dataset/stpls3d/val
+dataset/stpls3d/Synthetic_v3_InstanceSegmentation
diff --git a/configs/softgroup_stpls3d.yaml b/configs/softgroup_stpls3d.yaml
@@ -35,7 +35,7 @@ model:
 data:
   train:
     type: 'stpls3d'
-    data_root: 'dataset/Synthetic_v3_InstanceSegmentation'
+    data_root: 'dataset/stpls3d'
     prefix: 'train'
     suffix: '_inst_nostuff.pth'
     training: True
@@ -47,7 +47,7 @@ data:
       min_npoint: 5000
   test:
     type: 'stpls3d'
-    data_root: 'dataset/Synthetic_v3_InstanceSegmentation'
+    data_root: 'dataset/stpls3d'
     prefix: 'val'
     suffix: '_inst_nostuff.pth'
     training: False
diff --git a/configs/softgroup_stpls3d_backbone.yaml b/configs/softgroup_stpls3d_backbone.yaml
@@ -32,7 +32,7 @@ model:
 data:
   train:
     type: 'stpls3d'
-    data_root: 'dataset/Synthetic_v3_InstanceSegmentation'
+    data_root: 'dataset/stpls3d'
     prefix: 'train'
     suffix: '_inst_nostuff.pth'
     training: True
@@ -44,7 +44,7 @@ data:
       min_npoint: 5000
   test:
     type: 'stpls3d'
-    data_root: 'dataset/Synthetic_v3_InstanceSegmentation'
+    data_root: 'dataset/stpls3d'
     prefix: 'val'
     suffix: '_inst_nostuff.pth'
     training: False
@@ -56,15 +56,15 @@ data:
 
 dataloader:
   train:
-    batch_size: 12
+    batch_size: 4
     num_workers: 4
   test:
     batch_size: 1
     num_workers: 1
 
 optimizer:
   type: 'Adam'
-  lr: 0.002  # TODO change to 4 gpu
+  lr: 0.004
 
 save_cfg:
   semantic: True
diff --git a/dataset/stpls3d/prepare_data.sh b/dataset/stpls3d/prepare_data.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+echo Preprocess data
+python prepare_data_inst_instance_stpls3d.py
diff --git a/dataset/stpls3d/prepare_data_inst_instance_stpls3d.py b/dataset/stpls3d/prepare_data_inst_instance_stpls3d.py
@@ -0,0 +1,170 @@
+# https://github.com/meidachen/STPLS3D/blob/main/HAIS/data/prepare_data_inst_instance_stpls3d.py
+import glob
+import json
+import math
+import os
+import random
+
+import numpy as np
+import pandas as pd
+import torch
+
+
+def splitPointCloud(cloud, size=50.0, stride=50):
+    limitMax = np.amax(cloud[:, 0:3], axis=0)
+    width = int(np.ceil((limitMax[0] - size) / stride)) + 1
+    depth = int(np.ceil((limitMax[1] - size) / stride)) + 1
+    cells = [(x * stride, y * stride) for x in range(width) for y in range(depth)]
+    blocks = []
+    for (x, y) in cells:
+        xcond = (cloud[:, 0] <= x + size) & (cloud[:, 0] >= x)
+        ycond = (cloud[:, 1] <= y + size) & (cloud[:, 1] >= y)
+        cond = xcond & ycond
+        block = cloud[cond, :]
+        blocks.append(block)
+    return blocks
+
+
+def getFiles(files, fileSplit):
+    res = []
+    for filePath in files:
+        name = os.path.basename(filePath)
+        num = name[:2] if name[:2].isdigit() else name[:1]
+        if int(num) in fileSplit:
+            res.append(filePath)
+    return res
+
+
+def dataAug(file, semanticKeep):
+    points = pd.read_csv(file, header=None).values
+    angle = random.randint(1, 359)
+    angleRadians = math.radians(angle)
+    rotationMatrix = np.array([[math.cos(angleRadians), -math.sin(angleRadians), 0],
+                               [math.sin(angleRadians),
+                                math.cos(angleRadians), 0], [0, 0, 1]])
+    points[:, :3] = points[:, :3].dot(rotationMatrix)
+    pointsKept = points[np.in1d(points[:, 6], semanticKeep)]
+    return pointsKept
+
+
+def preparePthFiles(files, split, outPutFolder, AugTimes=0):
+    # save the coordinates so that we can merge the data to a single scene
+    # after segmentation for visualization
+    outJsonPath = os.path.join(outPutFolder, 'coordShift.json')
+    coordShift = {}
+    # used to increase z range if it is smaller than this,
+    # over come the issue where spconv may crash for voxlization.
+    zThreshold = 6
+
+    # Map relevant classes to {1,...,14}, and ignored classes to -100
+    remapper = np.ones(150) * (-100)
+    for i, x in enumerate([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]):
+        remapper[x] = i
+    # Map instance to -100 based on selected semantic
+    # (change a semantic to -100 if you want to ignore it for instance)
+    remapper_disableInstanceBySemantic = np.ones(150) * (-100)
+    for i, x in enumerate([-100, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]):
+        remapper_disableInstanceBySemantic[x] = i
+
+    # only augment data for these classes
+    semanticKeep = [0, 2, 3, 7, 8, 9, 12, 13]
+
+    counter = 0
+    for file in files:
+
+        for AugTime in range(AugTimes + 1):
+            if AugTime == 0:
+                points = pd.read_csv(file, header=None).values
+            else:
+                points = dataAug(file, semanticKeep)
+            name = os.path.basename(file).strip('.txt') + '_%d' % AugTime
+
+            if split != 'test':
+                coordShift['globalShift'] = list(points[:, :3].min(0))
+            points[:, :3] = points[:, :3] - points[:, :3].min(0)
+
+            blocks = splitPointCloud(points, size=50, stride=50)
+            for blockNum, block in enumerate(blocks):
+                if (len(block) > 10000):
+                    outFilePath = os.path.join(outPutFolder,
+                                               name + str(blockNum) + '_inst_nostuff.pth')
+                    if (block[:, 2].max(0) - block[:, 2].min(0) < zThreshold):
+                        block = np.append(
+                            block, [[
+                                block[:, 0].mean(0), block[:, 1].mean(0), block[:, 2].max(0) +
+                                (zThreshold -
+                                 (block[:, 2].max(0) - block[:, 2].min(0))), block[:, 3].mean(0),
+                                block[:, 4].mean(0), block[:, 5].mean(0), -100, -100
+                            ]],
+                            axis=0)
+                        print('range z is smaller than threshold ')
+                        print(name + str(blockNum) + '_inst_nostuff')
+                    if split != 'test':
+                        outFileName = name + str(blockNum) + '_inst_nostuff'
+                        coordShift[outFileName] = list(block[:, :3].mean(0))
+                    coords = np.ascontiguousarray(block[:, :3] - block[:, :3].mean(0))
+
+                    # coords = block[:, :3]
+                    colors = np.ascontiguousarray(block[:, 3:6]) / 127.5 - 1
+
+                    coords = np.float32(coords)
+                    colors = np.float32(colors)
+                    if split != 'test':
+                        sem_labels = np.ascontiguousarray(block[:, -2])
+                        sem_labels = sem_labels.astype(np.int32)
+                        sem_labels = remapper[np.array(sem_labels)]
+
+                        instance_labels = np.ascontiguousarray(block[:, -1])
+                        instance_labels = instance_labels.astype(np.float32)
+
+                        disableInstanceBySemantic_labels = np.ascontiguousarray(block[:, -2])
+                        disableInstanceBySemantic_labels = disableInstanceBySemantic_labels.astype(
+                            np.int32)
+                        disableInstanceBySemantic_labels = remapper_disableInstanceBySemantic[
+                            np.array(disableInstanceBySemantic_labels)]
+                        instance_labels = np.where(disableInstanceBySemantic_labels == -100, -100,
+                                                   instance_labels)
+
+                        # map instance from 0.
+                        # [1:] because there are -100
+                        uniqueInstances = (np.unique(instance_labels))[1:].astype(np.int32)
+                        remapper_instance = np.ones(50000) * (-100)
+                        for i, j in enumerate(uniqueInstances):
+                            remapper_instance[j] = i
+
+                        instance_labels = remapper_instance[instance_labels.astype(np.int32)]
+
+                        uniqueSemantics = (np.unique(sem_labels))[1:].astype(np.int32)
+
+                        if split == 'train' and (len(uniqueInstances) < 10 or
+                                                 (len(uniqueSemantics) >=
+                                                  (len(uniqueInstances) - 2))):
+                            print('unique insance: %d' % len(uniqueInstances))
+                            print('unique semantic: %d' % len(uniqueSemantics))
+                            print()
+                            counter += 1
+                        else:
+                            torch.save((coords, colors, sem_labels, instance_labels), outFilePath)
+                    else:
+                        torch.save((coords, colors), outFilePath)
+    print('Total skipped file :%d' % counter)
+    json.dump(coordShift, open(outJsonPath, 'w'))
+
+
+if __name__ == '__main__':
+    data_folder = 'Synthetic_v3_InstanceSegmentation'
+    filesOri = sorted(glob.glob(data_folder + '/*.txt'))
+
+    trainSplit = [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 24]
+    trainFiles = getFiles(filesOri, trainSplit)
+    split = 'train'
+    trainOutDir = split
+    os.makedirs(trainOutDir, exist_ok=True)
+    preparePthFiles(trainFiles, split, trainOutDir, AugTimes=6)
+
+    valSplit = [5, 10, 15, 20, 25]
+    split = 'val'
+    valFiles = getFiles(filesOri, valSplit)
+    valOutDir = split
+    os.makedirs(valOutDir, exist_ok=True)
+    preparePthFiles(valFiles, split, valOutDir)
diff --git a/dataset/stpls3d/prepare_data_statistic_stpls3d.py b/dataset/stpls3d/prepare_data_statistic_stpls3d.py
@@ -0,0 +1,67 @@
+import glob
+import math
+import os
+
+import numpy as np
+import torch
+
+data_folder = os.path.join(
+    os.path.dirname(os.getcwd()), 'dataset', 'Synthetic_v3_InstanceSegmentation', 'train')
+files = sorted(glob.glob(data_folder + '/*.pth'))
+numclass = 15
+semanticIDs = []
+for i in range(numclass):
+    semanticIDs.append(i)
+
+class_numpoint_mean_dict = {}
+class_radius_mean = {}
+for semanticID in semanticIDs:
+    class_numpoint_mean_dict[semanticID] = []
+    class_radius_mean[semanticID] = []
+num_points_semantic = np.array([0 for i in range(numclass)])
+
+for file in files:
+    coords, colors, sem_labels, instance_labels = torch.load(file)
+    points = np.concatenate(
+        [coords, colors, sem_labels[:, None].astype(int), instance_labels[:, None].astype(int)],
+        axis=1)
+    for semanticID in semanticIDs:
+        singleSemantic = points[np.where(points[:, 6] == semanticID)]
+        uniqueInstances, counts = np.unique(singleSemantic[:, 7], return_counts=True)
+        for count in counts:
+            class_numpoint_mean_dict[semanticID].append(count)
+        allRadius = []
+        for uniqueInstance in uniqueInstances:
+            eachInstance = singleSemantic[np.where(singleSemantic[:, 7] == uniqueInstance)]
+            radius = (np.max(eachInstance, axis=0) - np.min(eachInstance, axis=0)) / 2
+            radius = math.sqrt(radius[0]**2 + radius[1]**2 + radius[2]**2)
+            class_radius_mean[semanticID].append(radius)
+
+    uniqueSemantic, semanticCount = np.unique(points[:, 6], return_counts=True)
+    uniqueSemanticCount = np.array([0 for i in range(numclass)])
+    uniqueSemantic = uniqueSemantic.astype(int)
+    indexOf100 = np.where(uniqueSemantic == -100)
+    semanticCount = np.delete(semanticCount, indexOf100)
+    uniqueSemantic = np.delete(uniqueSemantic, indexOf100)
+    uniqueSemanticCount[uniqueSemantic] = semanticCount
+    num_points_semantic += uniqueSemanticCount
+
+class_numpoint_mean_list = []
+class_radius_mean_list = []
+for semanticID in semanticIDs:
+    class_numpoint_mean_list.append(
+        sum(class_numpoint_mean_dict[semanticID]) * 1.0 / len(class_numpoint_mean_dict[semanticID]))
+    class_radius_mean_list.append(
+        sum(class_radius_mean[semanticID]) / len(class_radius_mean[semanticID]))
+
+print('Using the printed list in hierarchical_aggregation.cpp for class_numpoint_mean_dict: ')
+print([1.0] + [float('{0:0.0f}'.format(i)) for i in class_numpoint_mean_list][1:], sep=',')
+print('Using the printed list in hierarchical_aggregation.cu for class_radius_mean: ')
+print([1.0] + [float('{0:0.2f}'.format(i)) for i in class_radius_mean_list][1:], sep='')
+
+# make ground to 1 the make building to 1
+maxSemantic = np.max(num_points_semantic)
+num_points_semantic = maxSemantic / num_points_semantic
+num_points_semantic = num_points_semantic / num_points_semantic[1]
+print('Using the printed list in hais_run_stpls3d.yaml for class_weight')
+print([1.0, 1.0] + [float('{0:0.2f}'.format(i)) for i in num_points_semantic][2:], sep='')

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+#!/bin/bash`
	`2`	`+echo Preprocess data`
	`3`	`+python prepare_data_inst_instance_stpls3d.py`