Skip to content

Commit 20b1bf4

Browse files
author
stefan
committed
update
1 parent b5a9ebc commit 20b1bf4

File tree

9 files changed

+163
-260
lines changed

9 files changed

+163
-260
lines changed

Collector.py

Lines changed: 118 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
import os
32
import constant
43
import common
@@ -17,6 +16,7 @@ class Collector(QThread):
1716
def __init__(self, ui):
1817
QThread.__init__(self)
1918
self.map = {}
19+
self.duplicate_db = DuplicateDB(ui)
2020
self.ui = ui
2121

2222

@@ -32,28 +32,21 @@ def clear(self):
3232
self.map = {}
3333

3434

35-
def add_dir(self, path, recursive, cmd, duplicate_db, skipExisting):
35+
def add_dir(self, path, recursive, cmd, skipExisting):
3636
self.argPath = path
3737
self.argRecursive = recursive
3838
self.argCmd = cmd
3939
self.argSkipExisting = skipExisting
40-
self.arg_duplicate_db = duplicate_db
41-
self.worker = self.add_dir_wrapper
42-
if constant.USE_THREADS:
43-
self.start()
44-
else:
45-
self.worker()
40+
self.worker = self.add_dir_worker
41+
self.exec()
4642

4743

4844
def find_extern_duplicates(self, srcDir, recursive, simulate):
4945
self.argSrcDir = srcDir
5046
self.argRecursive = recursive
5147
self.arg_simulate = simulate
52-
self.worker = self.find_extern_duplicates_wrapper
53-
if constant.USE_THREADS:
54-
self.start()
55-
else:
56-
self.worker()
48+
self.worker = self.find_extern_duplicates_worker
49+
self.exec()
5750

5851

5952
def find_hash(self, hash):
@@ -83,19 +76,19 @@ def run(self):
8376
self.worker()
8477

8578

86-
def skip_dir(self, path):
79+
def is_skip_dir(self, path):
8780
return os.path.isfile(os.path.join(path, constant.NOHASHFILE))
8881

8982

90-
def add_dir_wrapper(self):
91-
self.add_dir_impl(self.argPath, self.argRecursive, self.argSkipExisting, self.argCmd, self.arg_duplicate_db)
83+
def add_dir_worker(self):
84+
self.ui.reset()
85+
self.add_dir_impl(self.argPath, self.argRecursive, self.argSkipExisting, self.argCmd)
9286
self.ui.stats()
9387

9488

95-
96-
def add_dir_impl2(self, path, recursive, skipExisting, cmd, duplicate_db):
89+
def add_dir_impl2(self, path, recursive, skipExisting, cmd):
9790
dir = os.path.normpath(path)
98-
if self.skip_dir(dir):
91+
if self.is_skip_dir(dir):
9992
self.ui.info("Skipping dir: %s" % dir)
10093
self.ui.inc_dir_skipped()
10194
return
@@ -106,7 +99,7 @@ def add_dir_impl2(self, path, recursive, skipExisting, cmd, duplicate_db):
10699
pass
107100

108101
if cmd is CollectorCmd.scan:
109-
db.scan(duplicate_db, skipExisting)
102+
db.scan(self.duplicate_db, skipExisting)
110103
self.ui.inc_dir_scanned()
111104
db.save()
112105
elif cmd is CollectorCmd.verify:
@@ -116,17 +109,17 @@ def add_dir_impl2(self, path, recursive, skipExisting, cmd, duplicate_db):
116109
dirList = []
117110
dirList.extend(common.get_dir_list_absolute(path, False))
118111
for dir in dirList:
119-
self.add_dir_impl2(dir, recursive, skipExisting, cmd, duplicate_db)
112+
self.add_dir_impl2(dir, recursive, skipExisting, cmd)
120113
if self.ui.is_abort():
121114
return
122115

123116

124-
def add_dir_impl(self, path, recursive, skipExisting, cmd, duplicate_db):
125-
duplicate_db.reset()
117+
def add_dir_impl(self, path, recursive, skipExisting, cmd):
118+
self.duplicate_db.reset()
126119
self.ui.info("Loading HashDB %sfrom: %s" % ('recursively ' if recursive else '', path))
127-
self.add_dir_impl2(path, recursive, skipExisting, cmd, duplicate_db)
120+
self.add_dir_impl2(path, recursive, skipExisting, cmd)
128121
self.ui.debug("Finished loading %d HashDB." % (len(self.map)))
129-
duplicate_db.show_duplicates()
122+
self.duplicate_db.show_duplicates()
130123

131124

132125
def remove_hash(self, path, hash):
@@ -137,6 +130,15 @@ def remove_hash(self, path, hash):
137130
db.remove(hash)
138131

139132

133+
def remove_file(self, filepath):
134+
path = os.path.dirname(filepath)
135+
filename = os.path.basename(filepath)
136+
db = self.map.get(path)
137+
if db:
138+
db.remove_filename(filename)
139+
else:
140+
self.ui.debug("remove_file: HashDB not found: %s" % path)
141+
140142
def save_hashes(self, forceSave = False):
141143
self.ui.info("Start saving HashDB")
142144
for path, db in self.map.items():
@@ -145,23 +147,18 @@ def save_hashes(self, forceSave = False):
145147
pass
146148

147149

148-
def find_extern_duplicates_wrapper(self):
150+
def find_extern_duplicates_worker(self):
151+
self.ui.reset()
149152
self.find_extern_duplicates_impl(self.argSrcDir, self.argRecursive, self.arg_simulate)
153+
self.ui.stats()
150154

151155

152156
def find_extern_duplicates_impl(self, srcDir, recursive, simulate):
153-
154-
if None == srcDir:
155-
self.ui.error("No src dir set")
156-
return
157-
158157
self.ui.info("Duplicates found in %s:" % srcDir)
159158
srcDirList = [srcDir]
160159
if recursive:
161160
srcDirList.extend(common.get_dir_list_absolute(srcDir, recursive))
162161

163-
cntDuplicates = 0
164-
165162
for curSrcDir in srcDirList:
166163
fileList = common.get_file_list(curSrcDir)
167164

@@ -170,7 +167,92 @@ def find_extern_duplicates_impl(self, srcDir, recursive, simulate):
170167
hash = common.get_hash_from_file(srcFilepath, self.ui)
171168
found_file = self.find_hash(hash)
172169
if None != found_file:
173-
cntDuplicates += 1
174-
self.ui.info(srcFilepath)
170+
self.ui.inc_file_duplicates()
171+
self.ui.file(srcFilepath)
172+
173+
#self.ui.info("Finished finding duplicates. %d files" % (cntDuplicates))
174+
175+
176+
177+
def find_duplicates_in_hashDB_impl(self):
178+
self.ui.info("Start finding duplicates in HashDB...")
179+
self.duplicate_db.reset()
180+
for path, db in self.map.items():
181+
for hash, name in db.map.items():
182+
filepath = os.path.normpath(os.path.join(db.path, name))
183+
self.duplicate_db.add_hash(hash, filepath)
184+
self.ui.inc_file_processed()
185+
self.ui.debug("Finished finding duplicates")
186+
187+
188+
def find_and_show_duplicates_in_hashDB_worker(self):
189+
self.find_duplicates_in_hashDB_impl()
190+
self.duplicate_db.show_duplicates(self.arg_path)
191+
self.ui.stats()
192+
193+
194+
def find_and_show_duplicates_in_hashDB(self, path):
195+
self.arg_path = path
196+
self.worker = self.find_and_show_duplicates_in_hashDB_worker
197+
self.exec()
198+
199+
200+
def exec(self):
201+
if constant.USE_THREADS:
202+
self.start()
203+
else:
204+
self.worker()
205+
206+
207+
def move_duplicates_with_master_dir(self, master_path, dest_dir, move_flat, simulate):
208+
self.arg_master_path = master_path
209+
self.arg_dest_dir = dest_dir
210+
self.arg_move_flat = move_flat
211+
self.arg_simulate = simulate
212+
self.worker = self.move_duplicates_with_master_dir_worker
213+
self.exec()
214+
215+
216+
def move_duplicates_with_master_dir_worker(self):
217+
filenames = self.duplicate_db.get_list_with_files_to_move_keep_master_path(self.arg_master_path)
218+
self.move_files(filenames, self.arg_move_flat, self.arg_dest_dir, self.arg_simulate)
219+
220+
221+
def move_files(self, filenames, move_flat, dest_dir, is_simulation):
222+
self.ui.reset()
223+
for filename in filenames:
224+
path = common.create_duplicate_dest_path(filename, dest_dir, move_flat)
225+
common.move_file(filename, path, False, is_simulation, self.ui)
226+
if not is_simulation:
227+
self.remove_file(filename)
228+
if not is_simulation:
229+
self.save_hashes()
230+
self.ui.stats()
231+
232+
233+
def find_and_show_duplicates_in_folder(self, path):
234+
self.arg_path = path
235+
self.worker = self.find_and_show_duplicates_in_folder_worker
236+
self.exec()
237+
238+
239+
def find_and_show_duplicates_in_folder_worker(self):
240+
self.ui.reset()
241+
self.find_and_show_duplicates_in_folder_impl(self.arg_path)
242+
self.duplicate_db.show_duplicates(None)
243+
self.ui.stats()
244+
245+
246+
def find_and_show_duplicates_in_folder_impl(self, path):
247+
self.duplicate_db.reset()
248+
files = common.get_file_list(path)
249+
self.ui.info("Scannning %d files for duplicates in %s" % (len(files), path))
250+
for item in files:
251+
if self.ui.is_abort():
252+
return
253+
self.ui.info("Hashing: %s" % item)
254+
filepath = os.path.normpath(os.path.join(path, item))
255+
hash = common.get_hash_from_file(filepath, self.ui)
256+
self.duplicate_db.add_hash(hash, filepath)
257+
self.ui.info("Finished finding duplicates.")
175258

176-
self.ui.info("Finished finding duplicates. %d files" % (cntDuplicates))

DuplicateDB.py

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -58,34 +58,17 @@ def show_duplicates(self, path = None):
5858
# self.ui.info("No duplicates found.")
5959

6060

61-
def move_duplicates_with_master_dir_impl(self, master_path, duplicate_path, simulate, collector):
61+
def get_list_with_files_to_move_keep_master_path(self, master_path):
6262
self.ui.info("Start moving duplicates...")
63+
all_files_to_move = []
6364
for hash, files in self.map.items():
6465
if len(files) > 1:
65-
files_to_move = []
66-
master_file = None
6766
for filename in files:
6867
p = os.path.dirname(filename)
69-
if p == master_path:
70-
#self.ui.info("Found master: %s" % filename)
71-
master_file = filename
68+
if p == master_path: # found master file to keep
7269
files_to_move = list(files)
7370
files_to_move.remove(filename)
71+
all_files_to_move.extend(files_to_move)
7472
break
75-
76-
for src_path in files_to_move:
77-
try:
78-
path = "." + os.path.splitdrive(src_path)[1]
79-
path = os.path.normpath(path)
80-
dest_path = os.path.join(duplicate_path, path)
81-
self.ui.debug("Move %s to %s - Master: %s" % (src_path, dest_path, master_file))
82-
common.move_file(src_path, dest_path, False, simulate, self.ui)
83-
if not simulate:
84-
dir_name = os.path.dirname(src_path)
85-
collector.remove_hash(dir_name, hash)
86-
except Exception as e:
87-
self.ui.error("Error moving: %s, %s" % (str(e), src_path))
88-
pass
89-
90-
collector.save_hashes()
91-
#self.ui.info("Finished moving %d duplicates. Errors: %d" % (cnt_moved, cnt_error))
73+
return all_files_to_move
74+

HashDb.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,9 @@ def find_filename(self, filename):
148148
def find_hash(self, hash):
149149
value = self.map.get(hash)
150150
return value
151+
152+
153+
def remove_filename(self, filename):
154+
hash = self.find_filename(filename)
155+
if hash:
156+
self.remove(hash)

Logger.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def reset(self):
5757
self.cnt_hash_duplicates = 0
5858
self.cnt_file_duplicates = 0
5959
self.cnt_missing_file = 0
60+
self.cnt_file_processed = 0
6061

6162
def m(self, msg, cnt):
6263
if cnt > 0:
@@ -77,9 +78,13 @@ def stats(self):
7778
str += self.m("Skipped dir", self.cnt_dir_skipped)
7879
str += self.m("Hash duplicates", self.cnt_hash_duplicates)
7980
str += self.m("File duplicates", self.cnt_file_duplicates)
80-
str += self.m("File missing", self.cnt_missing_file)
81+
str += self.m("Files missing", self.cnt_missing_file)
82+
str += self.m("Files processed", self.cnt_file_processed)
8183
self.info(str)
8284

85+
def inc_file_processed(self):
86+
self.cnt_file_processed += 1
87+
8388
def inc_missing_file(self):
8489
self.cnt_missing_file += 1
8590

common.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -62,34 +62,34 @@ def create_new_filename(filepath):
6262
return newfilepath
6363

6464

65-
def create_duplicate_dest_path(srcPath, duplicateDir, flat):
65+
def create_duplicate_dest_path(src_path, dest_dir, flat):
6666
if flat:
67-
destPath = os.path.join(duplicateDir, os.path.basename(srcPath))
67+
dest_path = os.path.join(dest_dir, os.path.basename(src_path))
6868
else:
69-
destPath = '.' + os.path.splitdrive(srcPath)[1]
70-
destPath = os.path.join(duplicateDir, destPath)
71-
return os.path.normpath(destPath)
69+
dest_path = '.' + os.path.splitdrive(src_path)[1]
70+
dest_path = os.path.join(dest_dir, dest_path)
71+
return os.path.normpath(dest_path)
7272

7373

74-
def move_file(srcPath, destPath, overwrite, simulate, ui):
75-
ui.info("Move %s ==> %s" % (srcPath, destPath))
74+
def move_file(src_path, dest_path, overwrite, simulate, ui):
75+
ui.info("Move %s ==> %s" % (src_path, dest_path))
7676
if not simulate:
7777
try:
78-
destDir = os.path.dirname(destPath)
79-
if not os.path.exists(destDir):
80-
os.makedirs(destDir, exist_ok=True)
81-
82-
if not overwrite and os.path.isfile(destPath):
83-
destPath = create_new_filename(destPath)
84-
ui.info("Renamed moved file to %s" % destPath)
85-
shutil.move(srcPath, destPath)
78+
dest_path = os.path.dirname(dest_path)
79+
if not os.path.exists(dest_path):
80+
os.makedirs(dest_path, exist_ok=True)
81+
82+
if not overwrite and os.path.isfile(dest_path):
83+
dest_path = create_new_filename(dest_path)
84+
ui.info("Renamed moved file to %s" % dest_path)
85+
shutil.move(src_path, dest_path)
8686
ui.inc_moved_renamed()
8787
else:
88-
shutil.move(srcPath, destPath)
88+
shutil.move(src_path, dest_path)
8989
ui.inc_moved()
9090
except:
9191
ui.error("Failed to move %s to %s" %
92-
(srcPath, destPath))
92+
(src_path, dest_path))
9393
ui.inc_error()
9494
pass
9595

constant.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
HASHFILE = "__hashes.txt"
22
NOHASHFILE = "__nohashes.txt"
3-
USE_THREADS = True
3+
USE_THREADS = False

0 commit comments

Comments
 (0)