-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathba1b.py
53 lines (39 loc) · 1.15 KB
/
ba1b.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
def kmer_find(t, k):
count = 0
maxf = 0
indi = np.zeros(len(t))
mostk = set()
p = []
for i in range(len(t)-k):
p.append(t[i:i+k])
for i in range(len(t)-k):
for j in p:
if t[i:i+len(j)] == j:
count += 1
indi[i] = count
maxf = indi.max()
for i in range(len(t)-k):
if indi[i] == maxf:
mostk.add(t[i:i+k])
print(mostk)
def fasterMostFrequentsKMers(text, k, frequency=-1):
textLen = len(text)
frequencyArray = dict()
mostFrequentPatterns = []
for i in range(0, textLen - k):
try:
frequencyArray[text[i:i+k]]+=1
except KeyError as err:
frequencyArray[text[i:i+k]] = 1
frequency = max(frequencyArray.values()) if frequency < 0 else frequency
for key in frequencyArray.keys():
if frequencyArray[key] >= frequency:
mostFrequentPatterns.append(key)
return mostFrequentPatterns
f = open("rosalind_ba1b.txt","r")
text = f.readline()
k = int(f.readline())
kmer_most = fasterMostFrequentsKMers(text,k)
for i in kmer_most:
print(i)