Skip to content

Commit ad83246

Browse files
committed
modify
1 parent 79950a3 commit ad83246

16 files changed

+360
-418
lines changed

CAFE.zip

-23.7 MB
Binary file not shown.

cal_mCAI.py

Lines changed: 0 additions & 69 deletions
This file was deleted.

cub.txt

Lines changed: 0 additions & 6 deletions
This file was deleted.

cal_cub.py renamed to cube.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#!/usr/bin/python
22
# -*- coding:utf-8 -*-
33
# Author: Yingying Dong.
4+
# =================================================================================
5+
# Author: Yingying Dong. Email: [email protected] .This script is used to calculate
6+
# CUB indices.
7+
# =================================================================================
48

59
import os
610
import sys

mCAI_comp.py renamed to cube_comp.py

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# =================================================================================
55
# Author: Yingying Dong. Email: [email protected] .This script is used to calculate
6-
# modified CAI(mCAI) value.
6+
# CUB indices.
77
# =================================================================================
88

99
import os
@@ -16,39 +16,42 @@
1616
os.system('pip install scipy')
1717
from scipy import stats
1818
import get_weight as rs
19+
import codonw
1920

2021

21-
parser = argparse.ArgumentParser(description='Calculate mCAI.', prog='mCAI', usage='%(prog)s [options]')
22+
parser = argparse.ArgumentParser(description='Calculate CUB with customized species.', prog='CUB_comp', usage='%(prog)s [options]')
2223
parser.add_argument('-spe', nargs='?', type=str, help='The Latin name of the species, separated by an underscore, for example: Caenorhabditis_elegans')
2324
parser.add_argument('-inp', nargs='?', required=True, type=str, help='The FASTA file of gene sequences that you want to calculate the mCAI value')
2425
parser.add_argument('-genome', nargs='?', type=str, help='The FASTA file of the species genome')
2526
parser.add_argument('-gff', nargs='?', type=str, help='The annotation file GFF3 format of the species')
26-
parser.add_argument('-o', nargs='?', type=str, default='mCAI.txt',
27-
help='The file name of output mCAI value.The default file name is \'mCAI.txt\'')
27+
parser.add_argument('-o', nargs='?', type=str, default='cub.txt',
28+
help='The file name of output mCAI value.The default file name is \'cub.txt\'')
29+
parser.add_argument('-cub',nargs='?', type=list or str, default=["CAI","CBI"],
30+
help='The CUB indices you want to calculate, you can input one or more indices, such as ["CAI","ENC"]')
2831
args = parser.parse_args()
2932

3033

31-
def cal_mcai(file, species, out):
32-
CAI_file = open(out, 'w')
34+
def cal_mcai(dataSource, species, indices, out):
35+
result = open(out, 'w+')
3336
weight_table = []
3437
for line in species:
3538
weight_table.append(line.strip().split('\t'))
3639
codon_weight = {}
3740
for i in weight_table:
3841
codon_weight[i[0]] = float(i[1])
3942

40-
weight_list = []
41-
CAI_file.write('gene_id\tmCAI_value\n')
42-
f = open(file, 'r')
43-
f2 = f.read()
44-
#print(type(f2))
45-
f2 += '>'
46-
f3 = f2.split('\n')
47-
4843
dna = ''
4944
header = ''
45+
weight_list = []
46+
result.write("gene_id\t")
47+
result.write("\t".join(indices))
48+
result.write("\n")
49+
indices = [i.lower() for i in indices]
50+
51+
dataSource += '\n>'
52+
f = dataSource.split('\n')
5053

51-
for line in f3:
54+
for line in f:
5255
if line.startswith('>') and dna == '':
5356
header = line.strip().replace('>', '')
5457
elif not line.startswith('>'):
@@ -58,14 +61,56 @@ def cal_mcai(file, species, out):
5861
codon = dna[j:j + 3]
5962
if codon in codon_weight:
6063
weight_list.append(codon_weight[codon])
64+
# print(type(dna))
6165
CAI = stats.gmean(weight_list)
62-
CAI_file.write('{}\t{}\n'.format(header.replace('>', ''), CAI))
66+
index_list = []
67+
cseq = codonw.CodonSeq(dna)
68+
for i in indices:
69+
if i == "cai":
70+
index_list.append(CAI)
71+
elif i == "gc3s":
72+
index_list.append(cseq.silent_base_usage())
73+
elif i == "cbi":
74+
if species == "Escherichia_coli":
75+
index_list.append(cseq.cbi())
76+
elif species == "Bacillus_subtilis":
77+
index_list.append(cseq.cbi(1))
78+
elif species == "Saccharomyces_cerevisiae_S288C":
79+
index_list.append(cseq.cbi(2))
80+
else:
81+
index_list.append("NA")
82+
elif i == "fop":
83+
if species == "Escherichia_coli":
84+
index_list.append(cseq.fop())
85+
elif species == "Bacillus_subtilis":
86+
index_list.append(cseq.fop(1))
87+
elif species == "Dictyostelium_discoideum":
88+
index_list.append(cseq.fop(2))
89+
elif species == "Aspergillus_nidulans":
90+
index_list.append(cseq.fop(3))
91+
elif species == "Saccharomyces_cerevisiae_S288C":
92+
index_list.append(cseq.fop(4))
93+
elif species == "Drosophila_melanogaster":
94+
index_list.append(cseq.fop(5))
95+
elif species == "Caenorhabditis_elegans":
96+
index_list.append(cseq.fop(6))
97+
elif species == "Neurospora_crassa":
98+
index_list.append(cseq.fop(7))
99+
else:
100+
index_list.append("NA")
101+
else:
102+
index_list.append(getattr(cseq, i))
103+
index_list = [str(num) for num in index_list]
104+
105+
result.write('{}\t'.format(header))
106+
result.write("\t".join(index_list))
107+
result.write('\n')
63108
header = line.strip().replace('>', '')
64-
dna = ''
109+
dna = ""
65110
weight_list = []
66111

67-
f.close()
68-
CAI_file.close()
112+
dataSource.close()
113+
result.close()
69114

70115

71116
if __name__ == '__main__':

cube_tool.egg-info/PKG-INFO

Lines changed: 0 additions & 8 deletions
This file was deleted.

cube_tool.egg-info/SOURCES.txt

Lines changed: 0 additions & 8 deletions
This file was deleted.

cube_tool.egg-info/dependency_links.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

cube_tool.egg-info/requires.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

cube_tool.egg-info/top_level.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)