-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathpyspace.py
80 lines (74 loc) · 3.47 KB
/
pyspace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
#A simple Python script to remove duplicate files...Coded by MCoury
import hashlib, os, optparse, sys
#define a function to calculate md5checksum for a given file:
def md5(f):
"""takes one file f as an argument and calculates md5checksum for that file"""
md5Hash=hashlib.md5()
with open(f,'rb') as f:
for chunk in iter(lambda: f.read(4096),b""):
md5Hash.update(chunk)
return md5Hash.hexdigest()
#define our main function:
def rm_dup(path, exps):
"""relies on the md5 function above to remove duplicate files"""
if not os.path.isdir(path):#make sure the given directory exists
print('specified directory does not exist!')
else:
md5_dict={}
if exps:
exp_list=exps.split("-")
else:
exp_list = []
print('Working...')
print()
for root, dirs, files in os.walk(path):#the os.walk function allows checking subdirectories too...
for f in files:
filePath=os.path.join(root,f)
md5Hash=md5(filePath)
size=os.path.getsize(filePath)
fileComb=str(md5Hash)+str(size)
if fileComb in md5_dict:
md5_dict[fileComb].append(filePath)
else:
md5_dict.update({fileComb:[filePath]})
ignore_list=[]
for key in md5_dict:
for item in md5_dict[key]:
for p in exp_list:
if item.endswith(p):
ignore_list.append(item)
while md5_dict[key].count(item)>0:
md5_dict[key].remove(item)
print("Done! Following files will be deleted:\n")
for key in md5_dict:
for item in md5_dict[key][:-1]:
print(item)
if input("\nEnter (y)es to confirm operation or anything else to abort: ").lower() not in ("y", "yes"):
sys.exit("Operation cancelled by user. Exiting...")
print("Deleting...")
c=0
for key in md5_dict:
while len(md5_dict[key])>1:
for item in md5_dict[key]:
os.remove(item)
md5_dict[key].remove(item)
c += 1
if len(ignore_list)>0:
print('Done! Found {} duplicate files, deleted {}, and ignored {} on user\'s request...'.format(c+len(ignore_list),c,len(ignore_list)))
else:
print('Done! Found and deleted {} files...'.format(c))
if __name__=='__main__':
print(' ##########A simple Python script to remove duplicate files#########')
print(' # Coded by monrocoury #')
print(' # The script relies on the fact the fact #')
print(' # that if 2 files have the same md5checksum #')
print(' # they most likely have the same content #')
print(' ###################################################################')
parser = optparse.OptionParser("usage: python %prog -p <target path> -e <file extensions to ignore separated by ->")
parser.add_option("-p", dest="target_path", type="string", help="provide target path")
parser.add_option("-e", dest="ext2ignore", type="string", help="(optional) provide file extensions to ignore separated by - eg: -e .py-.doc")
(options, args) = parser.parse_args()
p = options.target_path
e = options.ext2ignore
rm_dup(p, e)