Skip to content

Commit 0ebcc86

Browse files
committed
modify cub calculator
1 parent 2e8204f commit 0ebcc86

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+393
-13302
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.idea/
2+
resource/
3+
cube_tool.egg-info/
4+
build/

.idea/.gitignore

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/inspectionProfiles/profiles_settings.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/mCAI.iml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/misc.xml

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/modules.xml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/vcs.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# CAFE:Codon Adaptation Facile Estimation
2-
### CAFE can calculate the mCAI(modified Codon Adaptation Index) value, and optimize gene sequences to increase expression.
1+
# CUBE:Codon Usage Bias Ensemble
2+
### CUBE can calculate the mCAI(modified Codon Adaptation Index) value, and optimize gene sequences to increase expression.
33
##### Created By: Yingying Dong
4-
##### Email: dyyvgug@163.com
4+
##### Email: dyyvgug@gmail.com
55

66
  Before using, please make sure that python 3.X has been installed on your computer. When using, download the repository to the local.
77

cal_cub.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#!/usr/bin/python
2+
# -*- coding:utf-8 -*-
3+
# Author: Yingying Dong.
4+
5+
import os
6+
import sys
7+
import platform
8+
import argparse
9+
from scipy import stats
10+
import codonw
11+
12+
13+
parser = argparse.ArgumentParser(description='Calculate CUB indices.', prog='CUB', usage='%(prog)s [options]')
14+
parser.add_argument('-spe', nargs='?', required=True, type=str, default='Caenorhabditis_elegans', help='The Latin name of the species, separated by an underscore, for example: Caenorhabditis_elegans')
15+
parser.add_argument('-i', nargs='?', required=True, type=argparse.FileType('r'), help='The FASTA file of the gene sequences that you want to calculate CUB values')
16+
parser.add_argument('-o', nargs='?', type=str, default='cub.txt',
17+
help='The file name of output CUB value.The default file name is \'cub.txt\'')
18+
parser.add_argument('-cub',nargs='?', type=list or str, default=["CAI","CBI"],
19+
help='The CUB indices you want to calculate, you can input one or more indices, such as ["CAI","ENC"]')
20+
args = parser.parse_args()
21+
22+
23+
def cal_cub(dataSource, species, output,indices):
24+
syst = platform.system()
25+
if syst == "Windows":
26+
os.chdir('.\\')
27+
we_path = '.\\resource\\weight\\'
28+
elif syst == "Linux":
29+
os.chdir('./')
30+
we_path = './resource/weight/'
31+
32+
if os.path.exists('{}{}'.format(we_path, species)):
33+
weight_file = open('{}{}'.format(we_path, species), 'r')
34+
result = open(output, 'w+')
35+
36+
weight_table = []
37+
for line in weight_file:
38+
weight_table.append(line.strip().split('\t'))
39+
codon_weight = {}
40+
for i in weight_table:
41+
codon_weight[i[0]] = float(i[1])
42+
43+
dna = ''
44+
header = ''
45+
weight_list = []
46+
result.write("gene_id\t")
47+
result.write("\t".join(indices))
48+
result.write("\n")
49+
indices = [i.lower() for i in indices]
50+
51+
dataSource += '\n>'
52+
f = dataSource.split('\n')
53+
54+
for line in f:
55+
if line.startswith('>') and dna == '':
56+
header = line.strip().replace('>', '')
57+
result.write('{}\t'.format(header))
58+
elif not line.startswith('>'):
59+
dna = str.upper(dna) + line.strip()
60+
elif line.startswith('>') and dna != '':
61+
for j in range(0, len(dna), 3):
62+
codon = dna[j:j + 3]
63+
if codon in codon_weight:
64+
weight_list.append(codon_weight[codon])
65+
#print(type(dna))
66+
CAI = stats.gmean(weight_list)
67+
index_list = []
68+
cseq = codonw.CodonSeq(dna)
69+
for i in indices:
70+
if i == "cai":
71+
index_list.append(CAI)
72+
elif i == "gc3s":
73+
index_list.append(cseq.silent_base_usage())
74+
elif i == "cbi":
75+
if species == "Escherichia_coli":
76+
index_list.append(cseq.cbi())
77+
elif species == "Bacillus subtilis":
78+
index_list.append(cseq.cbi(1))
79+
elif species == "Saccharomyces cerevisiae":
80+
index_list.append(cseq.cbi(2))
81+
else:
82+
index_list.append("NA")
83+
elif i == "fop":
84+
if species == "Escherichia_coli":
85+
index_list.append(cseq.fop())
86+
elif species == "Bacillus subtilis":
87+
index_list.append(cseq.fop(1))
88+
elif species == "Dictyostelium discoideum":
89+
index_list.append(cseq.fop(2))
90+
elif species == "Aspergillus nidulans":
91+
index_list.append(cseq.fop(3))
92+
elif species == "Saccharomyces cerevisiae":
93+
index_list.append(cseq.fop(4))
94+
elif species == "Drosophila melanogaster":
95+
index_list.append(cseq.fop(5))
96+
elif species == "Caenorhabditis elegans":
97+
index_list.append(cseq.fop(6))
98+
elif species == "Neurospora crassa":
99+
index_list.append(cseq.fop(7))
100+
else:
101+
index_list.append("NA")
102+
else:
103+
index_list.append(getattr(cseq, i))
104+
index_list = [str(num) for num in index_list]
105+
106+
result.write('{}\t'.format(header))
107+
result.write("\t".join(index_list))
108+
result.write('\n')
109+
header = line.strip().replace('>', '')
110+
dna = ""
111+
weight_list = []
112+
113+
weight_file.close()
114+
115+
else:
116+
print('\tThe calculation of this species is not supported, and the species that supports calculation are mentioned in the \'supported_species.txt\'.\n\t If you have the genome and GFF annotation files of the species, you can generate weight from the cal_RSCU.py and cal_weight.R file, and then use the script to calculate the mCAI value')
117+
118+
119+
if __name__ == '__main__':
120+
cal_cub(dataSource=args.i.read(), species=args.spe, output=args.o,indices=args.cub)

mCAI.py renamed to cal_mCAI.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,4 @@ def cal_mcai(file, species, out):
6666

6767

6868
if __name__ == '__main__':
69-
cal_mcai(args.inp, args.spe, args.o)
69+
cal_mcai(args.inp, args.spe, args.o)

0 commit comments

Comments
 (0)