-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.sh
70 lines (50 loc) · 2.01 KB
/
main.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
## 1) Test variability of nuclear genes in BOLD data
cd /media/inter/mkapun/projects/TETTRIS_barcoding/data/BOLD_TestRun
mkdir Genes && cd Genes
## split genes in separate files
awk '/^>/ {split($0,a,"|"); gsub("\r", "", a[3]); file=a[3]".fasta"} { print > file }' /media/inter/mkapun/projects/TETTRIS_barcoding/data/BOLD_TestRun/bold_fasta.fas
echo '''
library(DECIPHER)
library(ape)
library(pegas)
library(PopGenome)
library(knitr)
filepath <- "/media/inter/mkapun/projects/TETTRIS_barcoding/data/BOLD_TestRun/Genes"
predict_files <- list.files(filepath, pattern=".fasta")
ID=gsub(".fasta","",predict_files)
OUT=list("ID"=c(),"N"=c(),"Length"=c(),"NucDiv"=c(),"Theta"=c())
# Get vector of predict.txt files
predict_files <- list.files(filepath, pattern="predict.txt")
# Get vector of full paths for predict.txt files
predict_full <- file.path(filepath, predict_files)
for (i in ID){
OUT[["ID"]]<-c(OUT[["ID"]],i)
# Multiple alignment with DECIPHER
fas1 <- paste0("/media/inter/mkapun/projects/TETTRIS_barcoding/data/BOLD_TestRun/Genes/",i,".fasta")
seqs <- readDNAStringSet(fas1)
seqs <- RemoveGaps(seqs, "all")
seqs <- OrientNucleotides(seqs)
aligned <- AlignSeqs(seqs)
# BrowseSeqs(aligned, highlight=0)
Length=length(myalign[[1]])
OUT[["Length"]]<-c(OUT[["Length"]],Length)
## convert to ape/Pegas
myalign <- as.DNAbin(aligned)
base.freq(myalign)
seg.sites(myalign)
## Nucleotide Diversity
N<-round(nuc.div(myalign),3)
OUT[["NucDiv"]]<-c(OUT[["NucDiv"]],N)
## Segregating Sites (Theta)
s <- length(seg.sites(myalign))
n <- length(myalign)
T<-round(theta.s(s,n)/Length,3)
OUT[["Theta"]]<-c(OUT[["Theta"]],T)
OUT[["N"]]<-c(OUT[["N"]],n)
}
## Make table
sink("/media/inter/mkapun/projects/TETTRIS_barcoding/data/BOLD_TestRun/Genes.stats")
print(kable(as.data.frame(OUT)))
sink()
''' >/media/inter/mkapun/projects/TETTRIS_barcoding/data/BOLD_TestRun/Genes.r
Rscript /media/inter/mkapun/projects/TETTRIS_barcoding/data/BOLD_TestRun/Genes.r