1
-
2
1
import os
3
2
import constant
4
3
import common
@@ -17,6 +16,7 @@ class Collector(QThread):
17
16
def __init__ (self , ui ):
18
17
QThread .__init__ (self )
19
18
self .map = {}
19
+ self .duplicate_db = DuplicateDB (ui )
20
20
self .ui = ui
21
21
22
22
@@ -32,28 +32,21 @@ def clear(self):
32
32
self .map = {}
33
33
34
34
35
- def add_dir (self , path , recursive , cmd , duplicate_db , skipExisting ):
35
+ def add_dir (self , path , recursive , cmd , skipExisting ):
36
36
self .argPath = path
37
37
self .argRecursive = recursive
38
38
self .argCmd = cmd
39
39
self .argSkipExisting = skipExisting
40
- self .arg_duplicate_db = duplicate_db
41
- self .worker = self .add_dir_wrapper
42
- if constant .USE_THREADS :
43
- self .start ()
44
- else :
45
- self .worker ()
40
+ self .worker = self .add_dir_worker
41
+ self .exec ()
46
42
47
43
48
44
def find_extern_duplicates (self , srcDir , recursive , simulate ):
49
45
self .argSrcDir = srcDir
50
46
self .argRecursive = recursive
51
47
self .arg_simulate = simulate
52
- self .worker = self .find_extern_duplicates_wrapper
53
- if constant .USE_THREADS :
54
- self .start ()
55
- else :
56
- self .worker ()
48
+ self .worker = self .find_extern_duplicates_worker
49
+ self .exec ()
57
50
58
51
59
52
def find_hash (self , hash ):
@@ -83,19 +76,19 @@ def run(self):
83
76
self .worker ()
84
77
85
78
86
- def skip_dir (self , path ):
79
+ def is_skip_dir (self , path ):
87
80
return os .path .isfile (os .path .join (path , constant .NOHASHFILE ))
88
81
89
82
90
- def add_dir_wrapper (self ):
91
- self .add_dir_impl (self .argPath , self .argRecursive , self .argSkipExisting , self .argCmd , self .arg_duplicate_db )
83
+ def add_dir_worker (self ):
84
+ self .ui .reset ()
85
+ self .add_dir_impl (self .argPath , self .argRecursive , self .argSkipExisting , self .argCmd )
92
86
self .ui .stats ()
93
87
94
88
95
-
96
- def add_dir_impl2 (self , path , recursive , skipExisting , cmd , duplicate_db ):
89
+ def add_dir_impl2 (self , path , recursive , skipExisting , cmd ):
97
90
dir = os .path .normpath (path )
98
- if self .skip_dir (dir ):
91
+ if self .is_skip_dir (dir ):
99
92
self .ui .info ("Skipping dir: %s" % dir )
100
93
self .ui .inc_dir_skipped ()
101
94
return
@@ -106,7 +99,7 @@ def add_dir_impl2(self, path, recursive, skipExisting, cmd, duplicate_db):
106
99
pass
107
100
108
101
if cmd is CollectorCmd .scan :
109
- db .scan (duplicate_db , skipExisting )
102
+ db .scan (self . duplicate_db , skipExisting )
110
103
self .ui .inc_dir_scanned ()
111
104
db .save ()
112
105
elif cmd is CollectorCmd .verify :
@@ -116,17 +109,17 @@ def add_dir_impl2(self, path, recursive, skipExisting, cmd, duplicate_db):
116
109
dirList = []
117
110
dirList .extend (common .get_dir_list_absolute (path , False ))
118
111
for dir in dirList :
119
- self .add_dir_impl2 (dir , recursive , skipExisting , cmd , duplicate_db )
112
+ self .add_dir_impl2 (dir , recursive , skipExisting , cmd )
120
113
if self .ui .is_abort ():
121
114
return
122
115
123
116
124
- def add_dir_impl (self , path , recursive , skipExisting , cmd , duplicate_db ):
125
- duplicate_db .reset ()
117
+ def add_dir_impl (self , path , recursive , skipExisting , cmd ):
118
+ self . duplicate_db .reset ()
126
119
self .ui .info ("Loading HashDB %sfrom: %s" % ('recursively ' if recursive else '' , path ))
127
- self .add_dir_impl2 (path , recursive , skipExisting , cmd , duplicate_db )
120
+ self .add_dir_impl2 (path , recursive , skipExisting , cmd )
128
121
self .ui .debug ("Finished loading %d HashDB." % (len (self .map )))
129
- duplicate_db .show_duplicates ()
122
+ self . duplicate_db .show_duplicates ()
130
123
131
124
132
125
def remove_hash (self , path , hash ):
@@ -137,6 +130,15 @@ def remove_hash(self, path, hash):
137
130
db .remove (hash )
138
131
139
132
133
+ def remove_file (self , filepath ):
134
+ path = os .path .dirname (filepath )
135
+ filename = os .path .basename (filepath )
136
+ db = self .map .get (path )
137
+ if db :
138
+ db .remove_filename (filename )
139
+ else :
140
+ self .ui .debug ("remove_file: HashDB not found: %s" % path )
141
+
140
142
def save_hashes (self , forceSave = False ):
141
143
self .ui .info ("Start saving HashDB" )
142
144
for path , db in self .map .items ():
@@ -145,23 +147,18 @@ def save_hashes(self, forceSave = False):
145
147
pass
146
148
147
149
148
- def find_extern_duplicates_wrapper (self ):
150
+ def find_extern_duplicates_worker (self ):
151
+ self .ui .reset ()
149
152
self .find_extern_duplicates_impl (self .argSrcDir , self .argRecursive , self .arg_simulate )
153
+ self .ui .stats ()
150
154
151
155
152
156
def find_extern_duplicates_impl (self , srcDir , recursive , simulate ):
153
-
154
- if None == srcDir :
155
- self .ui .error ("No src dir set" )
156
- return
157
-
158
157
self .ui .info ("Duplicates found in %s:" % srcDir )
159
158
srcDirList = [srcDir ]
160
159
if recursive :
161
160
srcDirList .extend (common .get_dir_list_absolute (srcDir , recursive ))
162
161
163
- cntDuplicates = 0
164
-
165
162
for curSrcDir in srcDirList :
166
163
fileList = common .get_file_list (curSrcDir )
167
164
@@ -170,7 +167,92 @@ def find_extern_duplicates_impl(self, srcDir, recursive, simulate):
170
167
hash = common .get_hash_from_file (srcFilepath , self .ui )
171
168
found_file = self .find_hash (hash )
172
169
if None != found_file :
173
- cntDuplicates += 1
174
- self .ui .info (srcFilepath )
170
+ self .ui .inc_file_duplicates ()
171
+ self .ui .file (srcFilepath )
172
+
173
+ #self.ui.info("Finished finding duplicates. %d files" % (cntDuplicates))
174
+
175
+
176
+
177
+ def find_duplicates_in_hashDB_impl (self ):
178
+ self .ui .info ("Start finding duplicates in HashDB..." )
179
+ self .duplicate_db .reset ()
180
+ for path , db in self .map .items ():
181
+ for hash , name in db .map .items ():
182
+ filepath = os .path .normpath (os .path .join (db .path , name ))
183
+ self .duplicate_db .add_hash (hash , filepath )
184
+ self .ui .inc_file_processed ()
185
+ self .ui .debug ("Finished finding duplicates" )
186
+
187
+
188
+ def find_and_show_duplicates_in_hashDB_worker (self ):
189
+ self .find_duplicates_in_hashDB_impl ()
190
+ self .duplicate_db .show_duplicates (self .arg_path )
191
+ self .ui .stats ()
192
+
193
+
194
+ def find_and_show_duplicates_in_hashDB (self , path ):
195
+ self .arg_path = path
196
+ self .worker = self .find_and_show_duplicates_in_hashDB_worker
197
+ self .exec ()
198
+
199
+
200
+ def exec (self ):
201
+ if constant .USE_THREADS :
202
+ self .start ()
203
+ else :
204
+ self .worker ()
205
+
206
+
207
+ def move_duplicates_with_master_dir (self , master_path , dest_dir , move_flat , simulate ):
208
+ self .arg_master_path = master_path
209
+ self .arg_dest_dir = dest_dir
210
+ self .arg_move_flat = move_flat
211
+ self .arg_simulate = simulate
212
+ self .worker = self .move_duplicates_with_master_dir_worker
213
+ self .exec ()
214
+
215
+
216
+ def move_duplicates_with_master_dir_worker (self ):
217
+ filenames = self .duplicate_db .get_list_with_files_to_move_keep_master_path (self .arg_master_path )
218
+ self .move_files (filenames , self .arg_move_flat , self .arg_dest_dir , self .arg_simulate )
219
+
220
+
221
+ def move_files (self , filenames , move_flat , dest_dir , is_simulation ):
222
+ self .ui .reset ()
223
+ for filename in filenames :
224
+ path = common .create_duplicate_dest_path (filename , dest_dir , move_flat )
225
+ common .move_file (filename , path , False , is_simulation , self .ui )
226
+ if not is_simulation :
227
+ self .remove_file (filename )
228
+ if not is_simulation :
229
+ self .save_hashes ()
230
+ self .ui .stats ()
231
+
232
+
233
+ def find_and_show_duplicates_in_folder (self , path ):
234
+ self .arg_path = path
235
+ self .worker = self .find_and_show_duplicates_in_folder_worker
236
+ self .exec ()
237
+
238
+
239
+ def find_and_show_duplicates_in_folder_worker (self ):
240
+ self .ui .reset ()
241
+ self .find_and_show_duplicates_in_folder_impl (self .arg_path )
242
+ self .duplicate_db .show_duplicates (None )
243
+ self .ui .stats ()
244
+
245
+
246
+ def find_and_show_duplicates_in_folder_impl (self , path ):
247
+ self .duplicate_db .reset ()
248
+ files = common .get_file_list (path )
249
+ self .ui .info ("Scannning %d files for duplicates in %s" % (len (files ), path ))
250
+ for item in files :
251
+ if self .ui .is_abort ():
252
+ return
253
+ self .ui .info ("Hashing: %s" % item )
254
+ filepath = os .path .normpath (os .path .join (path , item ))
255
+ hash = common .get_hash_from_file (filepath , self .ui )
256
+ self .duplicate_db .add_hash (hash , filepath )
257
+ self .ui .info ("Finished finding duplicates." )
175
258
176
- self .ui .info ("Finished finding duplicates. %d files" % (cntDuplicates ))
0 commit comments