dyyvgug
diff --git a/‎CAFE.zip
-23.7 MB b/‎CAFE.zip
-23.7 MB
diff --git a/‎cal_mCAI.py
Lines changed: 0 additions & 69 deletions b/‎cal_mCAI.py
Lines changed: 0 additions & 69 deletions
diff --git a/‎cub.txt
Lines changed: 0 additions & 6 deletions b/‎cub.txt
Lines changed: 0 additions & 6 deletions
diff --git a/‎cal_cub.py renamed to ‎cube.py
Lines changed: 4 additions & 0 deletions b/‎cal_cub.py renamed to ‎cube.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎mCAI_comp.py renamed to ‎cube_comp.py
Lines changed: 64 additions & 19 deletions b/‎mCAI_comp.py renamed to ‎cube_comp.py
Lines changed: 64 additions & 19 deletions
diff --git a/‎cube_tool.egg-info/PKG-INFO
Lines changed: 0 additions & 8 deletions b/‎cube_tool.egg-info/PKG-INFO
Lines changed: 0 additions & 8 deletions
diff --git a/‎cube_tool.egg-info/SOURCES.txt
Lines changed: 0 additions & 8 deletions b/‎cube_tool.egg-info/SOURCES.txt
Lines changed: 0 additions & 8 deletions
diff --git a/‎cube_tool.egg-info/dependency_links.txt
Lines changed: 0 additions & 1 deletion b/‎cube_tool.egg-info/dependency_links.txt
Lines changed: 0 additions & 1 deletion
diff --git a/‎cube_tool.egg-info/requires.txt
Lines changed: 0 additions & 1 deletion b/‎cube_tool.egg-info/requires.txt
Lines changed: 0 additions & 1 deletion
diff --git a/‎cube_tool.egg-info/top_level.txt
Lines changed: 0 additions & 1 deletion b/‎cube_tool.egg-info/top_level.txt
Lines changed: 0 additions & 1 deletion
@@ -1,6 +1,10 @@
 #!/usr/bin/python
 # -*- coding:utf-8 -*-
 # Author: Yingying Dong.
+# =================================================================================
+# Author: Yingying Dong. Email: [email protected] .This script is used to calculate
+#  CUB indices.
+# =================================================================================
 
 import os
 import sys
 
@@ -3,7 +3,7 @@
 
 # =================================================================================
 # Author: Yingying Dong. Email: [email protected] .This script is used to calculate
-#  modified CAI(mCAI) value.
+#  CUB indices.
 # =================================================================================
 
 import os
@@ -16,39 +16,42 @@
     os.system('pip install scipy')
     from scipy import stats
 import get_weight as rs
+import codonw
 
 
-parser = argparse.ArgumentParser(description='Calculate mCAI.', prog='mCAI', usage='%(prog)s [options]')
+parser = argparse.ArgumentParser(description='Calculate CUB with customized species.', prog='CUB_comp', usage='%(prog)s [options]')
 parser.add_argument('-spe', nargs='?', type=str, help='The Latin name of the species, separated by an underscore, for example: Caenorhabditis_elegans')
 parser.add_argument('-inp', nargs='?', required=True, type=str, help='The FASTA file of gene sequences that you want to calculate the mCAI value')
 parser.add_argument('-genome', nargs='?', type=str, help='The FASTA file of the species genome')
 parser.add_argument('-gff', nargs='?', type=str, help='The annotation file GFF3 format of the species')
-parser.add_argument('-o', nargs='?', type=str, default='mCAI.txt',
-                    help='The file name of output mCAI value.The default file name is \'mCAI.txt\'')
+parser.add_argument('-o', nargs='?', type=str, default='cub.txt',
+                    help='The file name of output mCAI value.The default file name is \'cub.txt\'')
+parser.add_argument('-cub',nargs='?', type=list or str, default=["CAI","CBI"],
+                    help='The CUB indices you want to calculate, you can input one or more indices, such as ["CAI","ENC"]')
 args = parser.parse_args()
 
 
-def cal_mcai(file, species, out):
-    CAI_file = open(out, 'w')
+def cal_mcai(dataSource, species, indices, out):
+    result = open(out, 'w+')
     weight_table = []
     for line in species:
         weight_table.append(line.strip().split('\t'))
     codon_weight = {}
     for i in weight_table:
         codon_weight[i[0]] = float(i[1])
 
-    weight_list = []
-    CAI_file.write('gene_id\tmCAI_value\n')
-    f = open(file, 'r')
-    f2 = f.read()
-    #print(type(f2))
-    f2 += '>'
-    f3 = f2.split('\n')
-
     dna = ''
     header = ''
+    weight_list = []
+    result.write("gene_id\t")
+    result.write("\t".join(indices))
+    result.write("\n")
+    indices = [i.lower() for i in indices]
+
+    dataSource += '\n>'
+    f = dataSource.split('\n')
 
-    for line in f3:
+    for line in f:
         if line.startswith('>') and dna == '':
             header = line.strip().replace('>', '')
         elif not line.startswith('>'):
@@ -58,14 +61,56 @@ def cal_mcai(file, species, out):
                 codon = dna[j:j + 3]
                 if codon in codon_weight:
                     weight_list.append(codon_weight[codon])
+            # print(type(dna))
             CAI = stats.gmean(weight_list)
-            CAI_file.write('{}\t{}\n'.format(header.replace('>', ''), CAI))
+            index_list = []
+            cseq = codonw.CodonSeq(dna)
+            for i in indices:
+                if i == "cai":
+                    index_list.append(CAI)
+                elif i == "gc3s":
+                    index_list.append(cseq.silent_base_usage())
+                elif i == "cbi":
+                    if species == "Escherichia_coli":
+                        index_list.append(cseq.cbi())
+                    elif species == "Bacillus_subtilis":
+                        index_list.append(cseq.cbi(1))
+                    elif species == "Saccharomyces_cerevisiae_S288C":
+                        index_list.append(cseq.cbi(2))
+                    else:
+                        index_list.append("NA")
+                elif i == "fop":
+                    if species == "Escherichia_coli":
+                        index_list.append(cseq.fop())
+                    elif species == "Bacillus_subtilis":
+                        index_list.append(cseq.fop(1))
+                    elif species == "Dictyostelium_discoideum":
+                        index_list.append(cseq.fop(2))
+                    elif species == "Aspergillus_nidulans":
+                        index_list.append(cseq.fop(3))
+                    elif species == "Saccharomyces_cerevisiae_S288C":
+                        index_list.append(cseq.fop(4))
+                    elif species == "Drosophila_melanogaster":
+                        index_list.append(cseq.fop(5))
+                    elif species == "Caenorhabditis_elegans":
+                        index_list.append(cseq.fop(6))
+                    elif species == "Neurospora_crassa":
+                        index_list.append(cseq.fop(7))
+                    else:
+                        index_list.append("NA")
+                else:
+                    index_list.append(getattr(cseq, i))
+            index_list = [str(num) for num in index_list]
+
+            result.write('{}\t'.format(header))
+            result.write("\t".join(index_list))
+            result.write('\n')
             header = line.strip().replace('>', '')
-            dna = ''
+            dna = ""
             weight_list = []
 
-    f.close()
-    CAI_file.close()
+    dataSource.close()
+    result.close()
 
 
 if __name__ == '__main__':