dyyvgug
diff --git a/‎.gitignore
Lines changed: 4 additions & 0 deletions b/‎.gitignore
Lines changed: 4 additions & 0 deletions
diff --git a/‎.idea/.gitignore
Lines changed: 3 additions & 0 deletions b/‎.idea/.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎.idea/inspectionProfiles/profiles_settings.xml
Lines changed: 6 additions & 0 deletions b/‎.idea/inspectionProfiles/profiles_settings.xml
Lines changed: 6 additions & 0 deletions
diff --git a/‎.idea/mCAI.iml
Lines changed: 8 additions & 0 deletions b/‎.idea/mCAI.iml
Lines changed: 8 additions & 0 deletions
diff --git a/‎.idea/misc.xml
Lines changed: 4 additions & 0 deletions b/‎.idea/misc.xml
Lines changed: 4 additions & 0 deletions
diff --git a/‎.idea/modules.xml
Lines changed: 8 additions & 0 deletions b/‎.idea/modules.xml
Lines changed: 8 additions & 0 deletions
diff --git a/‎.idea/vcs.xml
Lines changed: 6 additions & 0 deletions b/‎.idea/vcs.xml
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 3 additions & 3 deletions b/‎README.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎cal_cub.py
Lines changed: 120 additions & 0 deletions b/‎cal_cub.py
Lines changed: 120 additions & 0 deletions
diff --git a/‎mCAI.py renamed to ‎cal_mCAI.py
Lines changed: 1 addition & 1 deletion b/‎mCAI.py renamed to ‎cal_mCAI.py
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,4 @@
+.idea/
+resource/
+cube_tool.egg-info/
+build/
@@ -1,7 +1,7 @@
-# CAFE：Codon Adaptation Facile Estimation
-### CAFE can calculate the mCAI(modified Codon Adaptation Index) value, and optimize gene sequences to increase expression.
+# CUBE：Codon Usage Bias Ensemble
+### CUBE can calculate the mCAI(modified Codon Adaptation Index) value, and optimize gene sequences to increase expression.
 ##### Created By: Yingying Dong
-##### Email: dyyvgug@163.com
+##### Email: dyyvgug@gmail.com
 
 &#8195;&#8195;Before using, please make sure that python 3.X has been installed on your computer. When using, download the repository to the local.
 
 
@@ -0,0 +1,120 @@
+#!/usr/bin/python
+# -*- coding:utf-8 -*-
+# Author: Yingying Dong.
+
+import os
+import sys
+import platform
+import argparse
+from scipy import stats
+import codonw
+
+
+parser = argparse.ArgumentParser(description='Calculate CUB indices.', prog='CUB', usage='%(prog)s [options]')
+parser.add_argument('-spe', nargs='?', required=True, type=str, default='Caenorhabditis_elegans', help='The Latin name of the species, separated by an underscore, for example: Caenorhabditis_elegans')
+parser.add_argument('-i', nargs='?', required=True, type=argparse.FileType('r'), help='The FASTA file of the gene sequences that you want to calculate CUB values')
+parser.add_argument('-o', nargs='?', type=str, default='cub.txt',
+                    help='The file name of output CUB value.The default file name is \'cub.txt\'')
+parser.add_argument('-cub',nargs='?', type=list or str, default=["CAI","CBI"],
+                    help='The CUB indices you want to calculate, you can input one or more indices, such as ["CAI","ENC"]')
+args = parser.parse_args()
+
+
+def cal_cub(dataSource, species, output,indices):
+    syst = platform.system()
+    if syst == "Windows":
+        os.chdir('.\\')
+        we_path = '.\\resource\\weight\\'
+    elif syst == "Linux":
+        os.chdir('./')
+        we_path = './resource/weight/'
+
+    if os.path.exists('{}{}'.format(we_path, species)):
+        weight_file = open('{}{}'.format(we_path, species), 'r')
+        result = open(output, 'w+')
+
+        weight_table = []
+        for line in weight_file:
+            weight_table.append(line.strip().split('\t'))
+        codon_weight = {}
+        for i in weight_table:
+            codon_weight[i[0]] = float(i[1])
+
+        dna = ''
+        header = ''
+        weight_list = []
+        result.write("gene_id\t")
+        result.write("\t".join(indices))
+        result.write("\n")
+        indices = [i.lower() for i in indices]
+
+        dataSource += '\n>'
+        f = dataSource.split('\n')
+
+        for line in f:
+            if line.startswith('>') and dna == '':
+                header = line.strip().replace('>', '')
+                result.write('{}\t'.format(header))
+            elif not line.startswith('>'):
+                dna = str.upper(dna) + line.strip()
+            elif line.startswith('>') and dna != '':
+                for j in range(0, len(dna), 3):
+                    codon = dna[j:j + 3]
+                    if codon in codon_weight:
+                        weight_list.append(codon_weight[codon])
+                #print(type(dna))
+                CAI = stats.gmean(weight_list)
+                index_list = []
+                cseq = codonw.CodonSeq(dna)
+                for i in indices:
+                    if i == "cai":
+                        index_list.append(CAI)
+                    elif i == "gc3s":
+                        index_list.append(cseq.silent_base_usage())
+                    elif i == "cbi":
+                        if species == "Escherichia_coli":
+                            index_list.append(cseq.cbi())
+                        elif species == "Bacillus subtilis":
+                            index_list.append(cseq.cbi(1))
+                        elif species == "Saccharomyces cerevisiae":
+                            index_list.append(cseq.cbi(2))
+                        else:
+                            index_list.append("NA")
+                    elif i == "fop":
+                        if species == "Escherichia_coli":
+                            index_list.append(cseq.fop())
+                        elif species == "Bacillus subtilis":
+                            index_list.append(cseq.fop(1))
+                        elif species == "Dictyostelium discoideum":
+                            index_list.append(cseq.fop(2))
+                        elif species == "Aspergillus nidulans":
+                            index_list.append(cseq.fop(3))
+                        elif species == "Saccharomyces cerevisiae":
+                            index_list.append(cseq.fop(4))
+                        elif species == "Drosophila melanogaster":
+                            index_list.append(cseq.fop(5))
+                        elif species == "Caenorhabditis elegans":
+                            index_list.append(cseq.fop(6))
+                        elif species == "Neurospora crassa":
+                            index_list.append(cseq.fop(7))
+                        else:
+                            index_list.append("NA")
+                    else:
+                        index_list.append(getattr(cseq, i))
+                index_list = [str(num) for num in index_list]
+
+                result.write('{}\t'.format(header))
+                result.write("\t".join(index_list))
+                result.write('\n')
+                header = line.strip().replace('>', '')
+                dna = ""
+                weight_list = []
+
+        weight_file.close()
+
+    else:
+        print('\tThe calculation of this species is not supported, and the species that supports calculation are mentioned in the \'supported_species.txt\'.\n\t If you have the genome and GFF annotation files of the species, you can generate weight from the cal_RSCU.py and cal_weight.R file, and then use the script to calculate the mCAI value')
+
+
+if __name__ == '__main__':
+    cal_cub(dataSource=args.i.read(), species=args.spe, output=args.o,indices=args.cub)
@@ -66,4 +66,4 @@ def cal_mcai(file, species, out):
 
 
 if __name__ == '__main__':
-    cal_mcai(args.inp, args.spe, args.o)
+    cal_mcai(args.inp, args.spe, args.o)
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +.idea/
 +resource/
 +cube_tool.egg-info/
 +build/
Original file line number	Diff line number	Diff line change
`@@ -66,4 +66,4 @@ def cal_mcai(file, species, out):`
`66`	`66`
`67`	`67`
`68`	`68`	`if __name__ == '__main__':`
`69`		`- cal_mcai(args.inp, args.spe, args.o)`
	`69`	`+ cal_mcai(args.inp, args.spe, args.o)`