-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
194 additions
and
180 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#!/bin/bash | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/bin/bash | ||
annotate_vcf(){ | ||
|
||
custom_prints "Annotate variants with reference features context" | ||
|
||
## In case you started from here you need | ||
gatk_folder="$WD/VariantCall/gatk_mutect2" | ||
vcf_file="$gatk_folder/$prefix.$ID.gatk.filt.vcf" | ||
vcf_file_annot="$gatk_folder/$prefix.$ID.gatk.filt.anno.vcf" | ||
|
||
#Annotate VFC with rCRS reference | ||
reference_annot=$exec_path"/refseqMT" | ||
#vcf_file="$gatk_folder/$prefix.$ID.gatk.annot.vcf" | ||
|
||
bcftools annotate -a "$reference_annot/HV.bed" $vcf_file -c "CHROM,FROM,TO,Hypervariable" -h <(echo '##INFO=<ID=Hypervariable,Number=1,Type=String,Description="Hypervariable">') > $vcf_file_annot | ||
cp $vcf_file_annot $vcf_file | ||
bcftools annotate -a "$reference_annot/HP.bed" $vcf_file -c "CHROM,FROM,TO,Homopolymer" -h <(echo '##INFO=<ID=Homopolymer,Number=0,Type=Flag,Description="Homoloplymer">') > $vcf_file_annot | ||
cp $vcf_file_annot $vcf_file | ||
bcftools annotate -a "$reference_annot/HS.bed" $vcf_file -c "CHROM,FROM,TO,Hotspot" -h <(echo '##INFO=<ID=Hotspot,Number=0,Type=Flag,Description="Hotspot">') > $vcf_file_annot | ||
cp $vcf_file_annot $vcf_file | ||
bcftools annotate -a "$reference_annot/CDS.bed" $vcf_file -c "CHROM,FROM,TO,CDS" -h <(echo '##INFO=<ID=CDS,Number=1,Type=String,Description="CDS">') > $vcf_file_annot | ||
cp $vcf_file_annot $vcf_file | ||
bcftools annotate -a "$reference_annot/RNR.bed" $vcf_file -c "CHROM,FROM,TO,RNR" -h <(echo '##INFO=<ID=RNR,Number=1,Type=String,Description="rRNA">') > $vcf_file_annot | ||
cp $vcf_file_annot $vcf_file | ||
bcftools annotate -a "$reference_annot/TRN.bed" $vcf_file -c "CHROM,FROM,TO,TRN" -h <(echo '##INFO=<ID=TRN,Number=1,Type=String,Description="tRNA">') > $vcf_file_annot | ||
cp $vcf_file_annot $vcf_file | ||
bcftools annotate -a "$reference_annot/DLOOP.bed" $vcf_file -c "CHROM,FROM,TO,DLOOP" -h <(echo '##INFO=<ID=DLOOP,Number=0,Type=Flag,Description="DLOOP">') > $vcf_file_annot | ||
|
||
echo "$timestamp [ATTENTION]: The annotated VCF is at" $vcf_file | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/bin/bash | ||
|
||
filter_by_quality(){ | ||
|
||
custom_prints "Filter reads by: Qual:$min_mean_quality MinLen: $min_length MaxLen: $max_length" | ||
|
||
## Seqkit output | ||
chopper_output="$WD/$prefix.filtQ$min_mean_quality.fastq" | ||
chopper --threads $threads -q $min_mean_quality --minlength $min_length --maxlength $max_length --input $reads > $chopper_output | ||
reads=$chopper_output | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/bash | ||
|
||
haplogroup_class(){ | ||
|
||
custom_prints "Classify haplogroup" | ||
|
||
## In case you are starting from here, you need this varibles | ||
gatk_folder="$WD/VariantCall/gatk_mutect2" | ||
vcf_file="$gatk_folder/$prefix.$ID.gatk.filt.vcf" | ||
|
||
haplogroup_folder="$WD/haplogroup" | ||
create_wd $haplogroup_folder | ||
|
||
## Install trees | ||
"$exec_path/haplogrep3" install-tree $haplogrep_trees && echo " " || echo "${color_red} ERROR ${no_color} while downloading trees. Make sure .yaml has permissions and that you have internet" | ||
|
||
## Classify | ||
IFS="," read -a trees <<< "$haplogrep_trees" | ||
for tree in "${trees[@]}"; | ||
do | ||
"$exec_path/haplogrep3" classify --tree=$tree --in $vcf_file --hits $top_hits \ | ||
--extend-report --out "$haplogroup_folder/haplogrep3.$tree.txt" || echo "${color_red} ERROR ${no_color} Are the trees downloaded?" | ||
done | ||
|
||
## SUMMARY RESULTS | ||
echo "$timestamp [ATTENTION]: The report with the top $top_hits closest haplogroups is at" "$haplogroup_folder/haplogrep3.$tree.txt" | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#!/bin/bash | ||
|
||
map_reads(){ | ||
|
||
custom_prints "Mapping to reference" | ||
|
||
## Map reads to reference | ||
## GATK needs read groups. -R for that reason in Minimap2. | ||
|
||
## Minimap2 output | ||
aln_file="$WD/$prefix.$ID.sorted.bam" | ||
minimap2 --secondary=no $minimap2_opts -g 1k $ref_genome $reads | \ | ||
samtools view -@ $threads -b --min-MQ $min_mapQ -F2052 -T $ref_genome | \ | ||
samtools sort -@ $threads > $aln_file | ||
|
||
## Assemble with flye to remove possible NUMTs | ||
custom_prints "Assemble with MetaFlye to remove bad quality and some Numts " | ||
|
||
## Output for first MT reads and assemble with flye | ||
MT_reads="$WD/$prefix.fastq" | ||
flye_folder="$WD/flye_for_numts" | ||
samtools fastq -@ $threads $aln_file > $MT_reads | ||
rm $aln_file | ||
flye -t $threads --meta $flye_preset $MT_reads -o $flye_folder | ||
|
||
# Map to flye assembly | ||
minimap2 --secondary=no $minimap2_opts -k 25 $flye_folder"/assembly.fasta" $MT_reads | \ | ||
samtools view --threads $threads -b --min-MQ $min_mapQ -F2052 | samtools sort -@ $threads > $flye_folder"/aln_"$prefix".sorted.bam" | ||
samtools index -@ $threads $flye_folder"/aln_"$prefix".sorted.bam" | ||
|
||
custom_prints "Retrieve mitochondria and remap reads" | ||
# Retrieve the mitochondria in the flye assembly which is the one with the highest coverage. | ||
contig_ID=$(sort -n -k3 $flye_folder"/assembly_info.txt" | tail -n 1 | cut -f 1) | ||
|
||
## Save mitogenome flye consensus | ||
consensus_mitogenome="$WD/MT_genome.fasta" | ||
seqkit grep -p $contig_ID "$flye_folder/assembly.fasta" > $consensus_mitogenome | ||
|
||
## Retrieve reads which mapped to the consensus_mitogenome | ||
samtools view -@ $threads -b -F2052 $flye_folder"/aln_"$prefix".sorted.bam" $contig_ID > $flye_folder"/aln_"$prefix"_$contig_ID.bam" | ||
samtools sort -@ $threads $flye_folder"/aln_"$prefix"_$contig_ID.bam" > $flye_folder"/aln_"$prefix"_$contig_ID.sorted.bam" | ||
samtools fastq -@ $threads $flye_folder"/aln_"$prefix"_$contig_ID.sorted.bam" > $MT_reads | ||
|
||
## Removing unneeded files | ||
#rm $flye_folder"/aln_"$prefix"_$contig_ID.bam" | ||
#rm $flye_folder"/aln_"$prefix".sorted.bam" | ||
|
||
## Final align file for variant calling | ||
minimap2 --secondary=no -R '@RG\tID:samplename\tSM:samplename' $minimap2_opts $ref_genome $MT_reads | \ | ||
samtools view -@ $threads -b -F2052 -T $ref_genome | samtools sort -@ $threads > $aln_file | ||
samtools index -@ $threads $aln_file | ||
|
||
## PRINT OUTPUT SUMMARY | ||
echo "$timestamp [ATTENTION]: Consensus mitogenome is at" $consensus_mitogenome | ||
echo "$timestamp [ATTENTION]: Mitochondrial reads are at: " $MT_reads | ||
echo "$timestamp [ATTENTION]: Mitochondrial reads mapped mitochondrial reference $(basename $ref_genome) at: " $aln_file | ||
echo "$timestamp [ATTENTION]: Mitochondrial reads mapped mitochondrial consensus at: " $flye_folder"/aln_"$prefix"_$contig_ID.sorted.bam" | ||
num_mapped_reads=$(samtools view -c $aln_file) | ||
echo "$timestamp Number of reads mapped: $aln_file" | ||
} |
Oops, something went wrong.