-
Notifications
You must be signed in to change notification settings - Fork 0
/
0_2_sampling_hybrid_min500.py
executable file
·134 lines (112 loc) · 5.65 KB
/
0_2_sampling_hybrid_min500.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import random
import shutil
import numpy as np
import pickle
minImgs = 500
maxImgs = 1200
newFolderName = 'jpgs_min'+str(minImgs)
database_base_path = '/home/jean-pierre/scratch/'
paths = [
database_base_path+'20201117_rotated/1pi2/labeled_20201117_1pi2_c3',
database_base_path+'20201117_rotated/2pi4/labeled_20201117_2pi4_c1',
database_base_path+'20201117_rotated/3SamsungGalaxyA5/labeled_20201117_3SamsungGalaxyA4_OC',
database_base_path+'20201117_rotated/4XiaomiRedmi4X/labeled_20201117_4XiaomiRedmi4X_OC',
database_base_path+'20201119_rotated/1XiaomiRedmi4X/labeled_20201119_1XiaomiRedmi4X_OC',
database_base_path+'20201119_rotated/2pi2/labeled_20201119_2pi2_c3',
database_base_path+'20201119_rotated/3pi4/labeled_20201119_3pi4_c3',
database_base_path+'20201119_rotated/4SamsungGalaxyA5_sampled/labeled_20201119_4SamsungGalaxyA_OC_sampled',
database_base_path+'20201126_rotated/3XiaomiRedmi2_sampled/labeled_20201126_3XiaomiRedmi2_OC2_sampled',
database_base_path+'20201126_rotated/4pi4_c4_sampled/labeled_20201126_4pi4_c4_sampled',
database_base_path+'20201126_rotated/4pi4_c5/labeled_20201126_4pi4_c5',
database_base_path+'20201127_rotated/2pi4/labeled_20201127_2pi4_c2',
database_base_path+'20201203_rotated/1pi4_sampled/labeled_20201203_1pi4_c20_sampled',
database_base_path+'20201203_rotated/3pi2/labeled_20201203_3pi2_c4',
database_base_path+'20201203_rotated/4SamsungGalaxyA5/labeled_20201203_4SamsungGalaxyA5_OC',
database_base_path+'unilyon_and_others/20071123_0756_Ain1/labeled_20071123_0756_Ain1',
database_base_path+'unilyon_and_others/20071123_0956_Ain2/labeled_20071123_0956_Ain2',
database_base_path+'unilyon_and_others/20191125_Allier1/labeled_20191125_Allier1',
database_base_path+'unilyon_and_others/20191223_Allier2/labeled_20191223_Allier2',
database_base_path+'unilyon_and_others/randomWoodImages/labeled_2022_randomWoodImages'
]
#samplesPerDataset = [1429,601,1076,478,344,2478,2146,191,18,138,1046,1034,157,2340,1236,116,81,176,134,9]
#define folder to be counted
originalToCount = 'jpgs'
#create empty variable to store the folder and the amount of jpgs in
samplesPerDataset = []
#get a list of the amount of samples per dataset
for path in paths:
count = 0
jpgsFolder = os.path.join(path,originalToCount)
files = os.listdir(jpgsFolder)
for filee in os.listdir(os.path.join(path,originalToCount)):
if filee[-4:] == '.jpg':
count = count + 1
samplesPerDataset.append((path,count))
samplesPerDatasetAdjusted = []
for dataset in samplesPerDataset:
#print(dataset[1])
number = dataset[1]
if number < minImgs:
samplesPerDatasetAdjusted.append((dataset[0],minImgs))
if minImgs <= number:
samplesPerDatasetAdjusted.append((dataset[0],number))
print(samplesPerDatasetAdjusted)
#create folder and copy the correct amount of images into the folder
for pathAndNumber in samplesPerDatasetAdjusted:
#define new path name
newPathName = os.path.join(pathAndNumber[0],newFolderName)
#create new folder
try:
os.mkdir(newPathName)
#if folder already exists, delete it and create it
except:
print('folder already exists')
shutil.rmtree(newPathName)
os.mkdir(newPathName)
#define new path name
newTxtPathName = newPathName.replace('/jpgs','/txts')
#create new folder
try:
os.mkdir(newTxtPathName)
#if folder already exists, delete it and create it
except:
print('folder already exists')
shutil.rmtree(newTxtPathName)
os.mkdir(newTxtPathName)
#create empty list to store all the jpgs in that are in the '/jpgs/' folder of that specific dataset
jpgList = []
#fille the list
for filee in os.listdir(os.path.join(pathAndNumber[0],originalToCount)):
if filee[-4:] == '.jpg':
jpgList.append(filee)
# create empty list to store the selected jpgs for the experiment
selectedJpgList = []
#in case the amount of jpgs to be picked is larger then the amount of jpg available, first add all jpgs to the list and then randomly fill the list with duplicates
if len(jpgList) < pathAndNumber[1]:
selectedJpgList = jpgList
for i in range(pathAndNumber[1]-len(jpgList)):
selectedJpgList.append(jpgList[random.randrange(0,len(jpgList))])
#in case the amount of jpgs are exactly the same as the required amount to be picked, put all jpgs in selected jpgs list
if len(jpgList) == pathAndNumber[1]:
selectedJpgList = jpgList
#in case the amount of jpgs is larger than the amount to be picked, randomly pick the amount without duplicates
if len(jpgList) > pathAndNumber[1]:
selectedJpgList = np.random.choice(jpgList, pathAndNumber[1], replace = False)
selectedJpgList.sort()
#store the selected images in the respective folder
output = open(os.path.join(path,newFolderName+'_selected.pickle'), 'wb')
pickle.dump(selectedJpgList, output)
print(len(selectedJpgList))
#now copy the selected files from the jpgs folder to the newly created folder
count = 1
for selectedJpg in selectedJpgList:
orJpgPath = os.path.join(pathAndNumber[0],'jpgs',selectedJpg)
while True:
outJpgPath = os.path.join(pathAndNumber[0],newFolderName,selectedJpg.replace('.jpg',('-'+str(count)+'.jpg')))
if os.path.exists(outJpgPath) == False:
shutil.copy(orJpgPath,outJpgPath)
shutil.copy( (orJpgPath.replace('/jpgs/','/txts/')).replace('.jpg', '.txt'),(outJpgPath.replace('/jpgs','/txts')).replace('.jpg', '.txt') )
count = 1
break
count = count + 1