-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtestFreq.py
48 lines (36 loc) · 1.34 KB
/
testFreq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/python
import os, sys, time
'''
Script that takes the new tabix file and counts how many variants previously had an allele frequency but there is no allele freq in the AF325 dataset
'''
allele_file = str(sys.argv[1])
resfilename = str(sys.argv[2])
infile = open(allele_file, 'r')
outres = open(resfilename, 'w')
lin = infile.readline()
total_var = 0
missing_AF325 = 0
missing_CG50 = 0
different_count = 0
while lin:
total_var = total_var+1
if total_var % 10000 == 0:
print str(total_var) + time.asctime( time.localtime(time.time()) )
line = lin.strip().split("\t")
if line[20] == 'N/A' and line[19] !='N/A':
missing_AF325 = missing_AF325 + 1
outres.write(lin)
elif line[19] =='N/A' and line[20] != 'N/A':
missing_CG50 = missing_CG50 + 1
elif line[19] != line[20]:
different_count = different_count + 1
lin = infile.readline()
continue
infile.close()
print "Total number of variants " + str(total_var)
a = missing_AF325/total_var
print "Variants previously found in wellderly missing in the new dataset " + str(missing_AF325) + " " + str(a)
b = missing_CG50/total_var
print "New variants found in the new dataset " + str(missing_CG50) + " " + str(b)
c = different_count/total_var
print "Variants that have a different AF " + str(different_count) + " " + str(c)