3
3
4
4
# =================================================================================
5
5
# Author: Yingying Dong. Email: [email protected] .This script is used to calculate
6
- # modified CAI(mCAI) value .
6
+ # CUB indices .
7
7
# =================================================================================
8
8
9
9
import os
16
16
os .system ('pip install scipy' )
17
17
from scipy import stats
18
18
import get_weight as rs
19
+ import codonw
19
20
20
21
21
- parser = argparse .ArgumentParser (description = 'Calculate mCAI .' , prog = 'mCAI ' , usage = '%(prog)s [options]' )
22
+ parser = argparse .ArgumentParser (description = 'Calculate CUB with customized species .' , prog = 'CUB_comp ' , usage = '%(prog)s [options]' )
22
23
parser .add_argument ('-spe' , nargs = '?' , type = str , help = 'The Latin name of the species, separated by an underscore, for example: Caenorhabditis_elegans' )
23
24
parser .add_argument ('-inp' , nargs = '?' , required = True , type = str , help = 'The FASTA file of gene sequences that you want to calculate the mCAI value' )
24
25
parser .add_argument ('-genome' , nargs = '?' , type = str , help = 'The FASTA file of the species genome' )
25
26
parser .add_argument ('-gff' , nargs = '?' , type = str , help = 'The annotation file GFF3 format of the species' )
26
- parser .add_argument ('-o' , nargs = '?' , type = str , default = 'mCAI.txt' ,
27
- help = 'The file name of output mCAI value.The default file name is \' mCAI.txt\' ' )
27
+ parser .add_argument ('-o' , nargs = '?' , type = str , default = 'cub.txt' ,
28
+ help = 'The file name of output mCAI value.The default file name is \' cub.txt\' ' )
29
+ parser .add_argument ('-cub' ,nargs = '?' , type = list or str , default = ["CAI" ,"CBI" ],
30
+ help = 'The CUB indices you want to calculate, you can input one or more indices, such as ["CAI","ENC"]' )
28
31
args = parser .parse_args ()
29
32
30
33
31
- def cal_mcai (file , species , out ):
32
- CAI_file = open (out , 'w' )
34
+ def cal_mcai (dataSource , species , indices , out ):
35
+ result = open (out , 'w+ ' )
33
36
weight_table = []
34
37
for line in species :
35
38
weight_table .append (line .strip ().split ('\t ' ))
36
39
codon_weight = {}
37
40
for i in weight_table :
38
41
codon_weight [i [0 ]] = float (i [1 ])
39
42
40
- weight_list = []
41
- CAI_file .write ('gene_id\t mCAI_value\n ' )
42
- f = open (file , 'r' )
43
- f2 = f .read ()
44
- #print(type(f2))
45
- f2 += '>'
46
- f3 = f2 .split ('\n ' )
47
-
48
43
dna = ''
49
44
header = ''
45
+ weight_list = []
46
+ result .write ("gene_id\t " )
47
+ result .write ("\t " .join (indices ))
48
+ result .write ("\n " )
49
+ indices = [i .lower () for i in indices ]
50
+
51
+ dataSource += '\n >'
52
+ f = dataSource .split ('\n ' )
50
53
51
- for line in f3 :
54
+ for line in f :
52
55
if line .startswith ('>' ) and dna == '' :
53
56
header = line .strip ().replace ('>' , '' )
54
57
elif not line .startswith ('>' ):
@@ -58,14 +61,56 @@ def cal_mcai(file, species, out):
58
61
codon = dna [j :j + 3 ]
59
62
if codon in codon_weight :
60
63
weight_list .append (codon_weight [codon ])
64
+ # print(type(dna))
61
65
CAI = stats .gmean (weight_list )
62
- CAI_file .write ('{}\t {}\n ' .format (header .replace ('>' , '' ), CAI ))
66
+ index_list = []
67
+ cseq = codonw .CodonSeq (dna )
68
+ for i in indices :
69
+ if i == "cai" :
70
+ index_list .append (CAI )
71
+ elif i == "gc3s" :
72
+ index_list .append (cseq .silent_base_usage ())
73
+ elif i == "cbi" :
74
+ if species == "Escherichia_coli" :
75
+ index_list .append (cseq .cbi ())
76
+ elif species == "Bacillus_subtilis" :
77
+ index_list .append (cseq .cbi (1 ))
78
+ elif species == "Saccharomyces_cerevisiae_S288C" :
79
+ index_list .append (cseq .cbi (2 ))
80
+ else :
81
+ index_list .append ("NA" )
82
+ elif i == "fop" :
83
+ if species == "Escherichia_coli" :
84
+ index_list .append (cseq .fop ())
85
+ elif species == "Bacillus_subtilis" :
86
+ index_list .append (cseq .fop (1 ))
87
+ elif species == "Dictyostelium_discoideum" :
88
+ index_list .append (cseq .fop (2 ))
89
+ elif species == "Aspergillus_nidulans" :
90
+ index_list .append (cseq .fop (3 ))
91
+ elif species == "Saccharomyces_cerevisiae_S288C" :
92
+ index_list .append (cseq .fop (4 ))
93
+ elif species == "Drosophila_melanogaster" :
94
+ index_list .append (cseq .fop (5 ))
95
+ elif species == "Caenorhabditis_elegans" :
96
+ index_list .append (cseq .fop (6 ))
97
+ elif species == "Neurospora_crassa" :
98
+ index_list .append (cseq .fop (7 ))
99
+ else :
100
+ index_list .append ("NA" )
101
+ else :
102
+ index_list .append (getattr (cseq , i ))
103
+ index_list = [str (num ) for num in index_list ]
104
+
105
+ result .write ('{}\t ' .format (header ))
106
+ result .write ("\t " .join (index_list ))
107
+ result .write ('\n ' )
63
108
header = line .strip ().replace ('>' , '' )
64
- dna = ''
109
+ dna = ""
65
110
weight_list = []
66
111
67
- f .close ()
68
- CAI_file .close ()
112
+ dataSource .close ()
113
+ result .close ()
69
114
70
115
71
116
if __name__ == '__main__' :
0 commit comments