Skip to content

Commit 9f0ee8e

Browse files
committed
update dataset
1 parent 365a9d8 commit 9f0ee8e

File tree

6 files changed

+250
-6
lines changed

6 files changed

+250
-6
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,7 @@ dataset/s3dis/preprocess
7676
dataset/s3dis/val_gt
7777
dataset/s3dis/preprocess_sample
7878
dataset/s3dis/Stanford3dDataset_v1.2
79+
80+
dataset/stpls3d/train
81+
dataset/stpls3d/val
82+
dataset/stpls3d/Synthetic_v3_InstanceSegmentation

configs/softgroup_stpls3d.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ model:
3535
data:
3636
train:
3737
type: 'stpls3d'
38-
data_root: 'dataset/Synthetic_v3_InstanceSegmentation'
38+
data_root: 'dataset/stpls3d'
3939
prefix: 'train'
4040
suffix: '_inst_nostuff.pth'
4141
training: True
@@ -47,7 +47,7 @@ data:
4747
min_npoint: 5000
4848
test:
4949
type: 'stpls3d'
50-
data_root: 'dataset/Synthetic_v3_InstanceSegmentation'
50+
data_root: 'dataset/stpls3d'
5151
prefix: 'val'
5252
suffix: '_inst_nostuff.pth'
5353
training: False

configs/softgroup_stpls3d_backbone.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ model:
3232
data:
3333
train:
3434
type: 'stpls3d'
35-
data_root: 'dataset/Synthetic_v3_InstanceSegmentation'
35+
data_root: 'dataset/stpls3d'
3636
prefix: 'train'
3737
suffix: '_inst_nostuff.pth'
3838
training: True
@@ -44,7 +44,7 @@ data:
4444
min_npoint: 5000
4545
test:
4646
type: 'stpls3d'
47-
data_root: 'dataset/Synthetic_v3_InstanceSegmentation'
47+
data_root: 'dataset/stpls3d'
4848
prefix: 'val'
4949
suffix: '_inst_nostuff.pth'
5050
training: False
@@ -56,15 +56,15 @@ data:
5656

5757
dataloader:
5858
train:
59-
batch_size: 12
59+
batch_size: 4
6060
num_workers: 4
6161
test:
6262
batch_size: 1
6363
num_workers: 1
6464

6565
optimizer:
6666
type: 'Adam'
67-
lr: 0.002 # TODO change to 4 gpu
67+
lr: 0.004
6868

6969
save_cfg:
7070
semantic: True

dataset/stpls3d/prepare_data.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
echo Preprocess data
3+
python prepare_data_inst_instance_stpls3d.py
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
# https://github.com/meidachen/STPLS3D/blob/main/HAIS/data/prepare_data_inst_instance_stpls3d.py
2+
import glob
3+
import json
4+
import math
5+
import os
6+
import random
7+
8+
import numpy as np
9+
import pandas as pd
10+
import torch
11+
12+
13+
def splitPointCloud(cloud, size=50.0, stride=50):
14+
limitMax = np.amax(cloud[:, 0:3], axis=0)
15+
width = int(np.ceil((limitMax[0] - size) / stride)) + 1
16+
depth = int(np.ceil((limitMax[1] - size) / stride)) + 1
17+
cells = [(x * stride, y * stride) for x in range(width) for y in range(depth)]
18+
blocks = []
19+
for (x, y) in cells:
20+
xcond = (cloud[:, 0] <= x + size) & (cloud[:, 0] >= x)
21+
ycond = (cloud[:, 1] <= y + size) & (cloud[:, 1] >= y)
22+
cond = xcond & ycond
23+
block = cloud[cond, :]
24+
blocks.append(block)
25+
return blocks
26+
27+
28+
def getFiles(files, fileSplit):
29+
res = []
30+
for filePath in files:
31+
name = os.path.basename(filePath)
32+
num = name[:2] if name[:2].isdigit() else name[:1]
33+
if int(num) in fileSplit:
34+
res.append(filePath)
35+
return res
36+
37+
38+
def dataAug(file, semanticKeep):
39+
points = pd.read_csv(file, header=None).values
40+
angle = random.randint(1, 359)
41+
angleRadians = math.radians(angle)
42+
rotationMatrix = np.array([[math.cos(angleRadians), -math.sin(angleRadians), 0],
43+
[math.sin(angleRadians),
44+
math.cos(angleRadians), 0], [0, 0, 1]])
45+
points[:, :3] = points[:, :3].dot(rotationMatrix)
46+
pointsKept = points[np.in1d(points[:, 6], semanticKeep)]
47+
return pointsKept
48+
49+
50+
def preparePthFiles(files, split, outPutFolder, AugTimes=0):
51+
# save the coordinates so that we can merge the data to a single scene
52+
# after segmentation for visualization
53+
outJsonPath = os.path.join(outPutFolder, 'coordShift.json')
54+
coordShift = {}
55+
# used to increase z range if it is smaller than this,
56+
# over come the issue where spconv may crash for voxlization.
57+
zThreshold = 6
58+
59+
# Map relevant classes to {1,...,14}, and ignored classes to -100
60+
remapper = np.ones(150) * (-100)
61+
for i, x in enumerate([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]):
62+
remapper[x] = i
63+
# Map instance to -100 based on selected semantic
64+
# (change a semantic to -100 if you want to ignore it for instance)
65+
remapper_disableInstanceBySemantic = np.ones(150) * (-100)
66+
for i, x in enumerate([-100, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]):
67+
remapper_disableInstanceBySemantic[x] = i
68+
69+
# only augment data for these classes
70+
semanticKeep = [0, 2, 3, 7, 8, 9, 12, 13]
71+
72+
counter = 0
73+
for file in files:
74+
75+
for AugTime in range(AugTimes + 1):
76+
if AugTime == 0:
77+
points = pd.read_csv(file, header=None).values
78+
else:
79+
points = dataAug(file, semanticKeep)
80+
name = os.path.basename(file).strip('.txt') + '_%d' % AugTime
81+
82+
if split != 'test':
83+
coordShift['globalShift'] = list(points[:, :3].min(0))
84+
points[:, :3] = points[:, :3] - points[:, :3].min(0)
85+
86+
blocks = splitPointCloud(points, size=50, stride=50)
87+
for blockNum, block in enumerate(blocks):
88+
if (len(block) > 10000):
89+
outFilePath = os.path.join(outPutFolder,
90+
name + str(blockNum) + '_inst_nostuff.pth')
91+
if (block[:, 2].max(0) - block[:, 2].min(0) < zThreshold):
92+
block = np.append(
93+
block, [[
94+
block[:, 0].mean(0), block[:, 1].mean(0), block[:, 2].max(0) +
95+
(zThreshold -
96+
(block[:, 2].max(0) - block[:, 2].min(0))), block[:, 3].mean(0),
97+
block[:, 4].mean(0), block[:, 5].mean(0), -100, -100
98+
]],
99+
axis=0)
100+
print('range z is smaller than threshold ')
101+
print(name + str(blockNum) + '_inst_nostuff')
102+
if split != 'test':
103+
outFileName = name + str(blockNum) + '_inst_nostuff'
104+
coordShift[outFileName] = list(block[:, :3].mean(0))
105+
coords = np.ascontiguousarray(block[:, :3] - block[:, :3].mean(0))
106+
107+
# coords = block[:, :3]
108+
colors = np.ascontiguousarray(block[:, 3:6]) / 127.5 - 1
109+
110+
coords = np.float32(coords)
111+
colors = np.float32(colors)
112+
if split != 'test':
113+
sem_labels = np.ascontiguousarray(block[:, -2])
114+
sem_labels = sem_labels.astype(np.int32)
115+
sem_labels = remapper[np.array(sem_labels)]
116+
117+
instance_labels = np.ascontiguousarray(block[:, -1])
118+
instance_labels = instance_labels.astype(np.float32)
119+
120+
disableInstanceBySemantic_labels = np.ascontiguousarray(block[:, -2])
121+
disableInstanceBySemantic_labels = disableInstanceBySemantic_labels.astype(
122+
np.int32)
123+
disableInstanceBySemantic_labels = remapper_disableInstanceBySemantic[
124+
np.array(disableInstanceBySemantic_labels)]
125+
instance_labels = np.where(disableInstanceBySemantic_labels == -100, -100,
126+
instance_labels)
127+
128+
# map instance from 0.
129+
# [1:] because there are -100
130+
uniqueInstances = (np.unique(instance_labels))[1:].astype(np.int32)
131+
remapper_instance = np.ones(50000) * (-100)
132+
for i, j in enumerate(uniqueInstances):
133+
remapper_instance[j] = i
134+
135+
instance_labels = remapper_instance[instance_labels.astype(np.int32)]
136+
137+
uniqueSemantics = (np.unique(sem_labels))[1:].astype(np.int32)
138+
139+
if split == 'train' and (len(uniqueInstances) < 10 or
140+
(len(uniqueSemantics) >=
141+
(len(uniqueInstances) - 2))):
142+
print('unique insance: %d' % len(uniqueInstances))
143+
print('unique semantic: %d' % len(uniqueSemantics))
144+
print()
145+
counter += 1
146+
else:
147+
torch.save((coords, colors, sem_labels, instance_labels), outFilePath)
148+
else:
149+
torch.save((coords, colors), outFilePath)
150+
print('Total skipped file :%d' % counter)
151+
json.dump(coordShift, open(outJsonPath, 'w'))
152+
153+
154+
if __name__ == '__main__':
155+
data_folder = 'Synthetic_v3_InstanceSegmentation'
156+
filesOri = sorted(glob.glob(data_folder + '/*.txt'))
157+
158+
trainSplit = [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 24]
159+
trainFiles = getFiles(filesOri, trainSplit)
160+
split = 'train'
161+
trainOutDir = split
162+
os.makedirs(trainOutDir, exist_ok=True)
163+
preparePthFiles(trainFiles, split, trainOutDir, AugTimes=6)
164+
165+
valSplit = [5, 10, 15, 20, 25]
166+
split = 'val'
167+
valFiles = getFiles(filesOri, valSplit)
168+
valOutDir = split
169+
os.makedirs(valOutDir, exist_ok=True)
170+
preparePthFiles(valFiles, split, valOutDir)
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import glob
2+
import math
3+
import os
4+
5+
import numpy as np
6+
import torch
7+
8+
data_folder = os.path.join(
9+
os.path.dirname(os.getcwd()), 'dataset', 'Synthetic_v3_InstanceSegmentation', 'train')
10+
files = sorted(glob.glob(data_folder + '/*.pth'))
11+
numclass = 15
12+
semanticIDs = []
13+
for i in range(numclass):
14+
semanticIDs.append(i)
15+
16+
class_numpoint_mean_dict = {}
17+
class_radius_mean = {}
18+
for semanticID in semanticIDs:
19+
class_numpoint_mean_dict[semanticID] = []
20+
class_radius_mean[semanticID] = []
21+
num_points_semantic = np.array([0 for i in range(numclass)])
22+
23+
for file in files:
24+
coords, colors, sem_labels, instance_labels = torch.load(file)
25+
points = np.concatenate(
26+
[coords, colors, sem_labels[:, None].astype(int), instance_labels[:, None].astype(int)],
27+
axis=1)
28+
for semanticID in semanticIDs:
29+
singleSemantic = points[np.where(points[:, 6] == semanticID)]
30+
uniqueInstances, counts = np.unique(singleSemantic[:, 7], return_counts=True)
31+
for count in counts:
32+
class_numpoint_mean_dict[semanticID].append(count)
33+
allRadius = []
34+
for uniqueInstance in uniqueInstances:
35+
eachInstance = singleSemantic[np.where(singleSemantic[:, 7] == uniqueInstance)]
36+
radius = (np.max(eachInstance, axis=0) - np.min(eachInstance, axis=0)) / 2
37+
radius = math.sqrt(radius[0]**2 + radius[1]**2 + radius[2]**2)
38+
class_radius_mean[semanticID].append(radius)
39+
40+
uniqueSemantic, semanticCount = np.unique(points[:, 6], return_counts=True)
41+
uniqueSemanticCount = np.array([0 for i in range(numclass)])
42+
uniqueSemantic = uniqueSemantic.astype(int)
43+
indexOf100 = np.where(uniqueSemantic == -100)
44+
semanticCount = np.delete(semanticCount, indexOf100)
45+
uniqueSemantic = np.delete(uniqueSemantic, indexOf100)
46+
uniqueSemanticCount[uniqueSemantic] = semanticCount
47+
num_points_semantic += uniqueSemanticCount
48+
49+
class_numpoint_mean_list = []
50+
class_radius_mean_list = []
51+
for semanticID in semanticIDs:
52+
class_numpoint_mean_list.append(
53+
sum(class_numpoint_mean_dict[semanticID]) * 1.0 / len(class_numpoint_mean_dict[semanticID]))
54+
class_radius_mean_list.append(
55+
sum(class_radius_mean[semanticID]) / len(class_radius_mean[semanticID]))
56+
57+
print('Using the printed list in hierarchical_aggregation.cpp for class_numpoint_mean_dict: ')
58+
print([1.0] + [float('{0:0.0f}'.format(i)) for i in class_numpoint_mean_list][1:], sep=',')
59+
print('Using the printed list in hierarchical_aggregation.cu for class_radius_mean: ')
60+
print([1.0] + [float('{0:0.2f}'.format(i)) for i in class_radius_mean_list][1:], sep='')
61+
62+
# make ground to 1 the make building to 1
63+
maxSemantic = np.max(num_points_semantic)
64+
num_points_semantic = maxSemantic / num_points_semantic
65+
num_points_semantic = num_points_semantic / num_points_semantic[1]
66+
print('Using the printed list in hais_run_stpls3d.yaml for class_weight')
67+
print([1.0, 1.0] + [float('{0:0.2f}'.format(i)) for i in num_points_semantic][2:], sep='')

0 commit comments

Comments
 (0)