Skip to content

Commit

Permalink
some test for haplotype assembly
Browse files Browse the repository at this point in the history
  • Loading branch information
juanjo255 committed Oct 22, 2024
1 parent c23948f commit 32b0c78
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 12 deletions.
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
map_identity = float(args[4]) # minimun coverage per cluster
output = args[5] # file to write mt reads
min_num_clusters = int(args[6]) ## Minimum number of clusters to keep with the highest coverage
haplotype_asm= int(args[7]) ## If true perform selection of reads of haplotyping

# MAIN PROGRAM
clusters_list = run(paf_file, map_identity)
Expand All @@ -43,7 +44,6 @@
clusters_info["coverage_norm"] = (
clusters_info["coverage"] / clusters_info["repr_read_len"]
)

## Get kmer composition from representative reads from all the cluster passed
repr_reads = [i for i in clusters_info["id_longest_read"]]
kmer_profiles, ids = utils_rs.get_kmer_profiles(repr_reads, reads_file, 3)
Expand All @@ -58,7 +58,7 @@

## Clustering reads using K-means expecting 2 clusters ##
kmer_reduction_df["cluster_prediction"] = cluster_kmer_profiles(
kmer_reduction_df=kmer_reduction_df, n_clusters=2, max_iter=100
kmer_reduction_df=kmer_reduction_df, n_clusters=(len(kmer_reduction_df) if haplotype_asm else 2), max_iter=100
)

## Get the cluster of interest ##
Expand Down
11 changes: 8 additions & 3 deletions mitnanex_denovo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ flye_mode='--nano-hq'
keepPercent=90
output_dir='mitnanex_results/'
miniasm_stage=7
haplotype_asm=0

## Help message
mitnanex_help() {
Expand Down Expand Up @@ -42,13 +43,14 @@ mitnanex_help() {
-q Min mapping quality (>=). This is for samtools. [-1].
-f Flye mode. [--nano-hq]
-k keepPercent. Percentage of reads to keep during filter with filtlong. [$keepPercent].
-S Miniasm stage. [$miniasm_stage]
-S Miniasm stage. [$miniasm_stage].
-y Haplotype assembly. [False].
* Help.
"
exit 1
}

while getopts 'i:t:p:m:M:w:c:x:r:s:q:f:k:dS:' opt; do
while getopts 'i:t:p:m:M:w:c:x:r:s:q:f:k:dS:y' opt; do
case $opt in
i)
input_file=$OPTARG
Expand Down Expand Up @@ -95,6 +97,9 @@ while getopts 'i:t:p:m:M:w:c:x:r:s:q:f:k:dS:' opt; do
S)
miniasm_stage=$OPTARG
;;
y)
haplotype_asm=1
;;
*)
mitnanex_help
;;
Expand Down Expand Up @@ -180,7 +185,7 @@ mt_reads_filt(){
echo " "
echo $timestamp': Clustering and discriminating potential mt reads with MITNANEX'
echo " "
python3 main.py $wd$prefix"_sample.sorted.fastq" $wd$prefix".paf" $coverage $map_identity $wd$prefix"_putative_mt_reads.fasta" $min_num_clusters
python3 main.py $wd$prefix"_sample.sorted.fastq" $wd$prefix".paf" $coverage $map_identity $wd$prefix"_putative_mt_reads.fasta" $min_num_clusters $haplotype_asm
}

first_assembly(){
Expand Down
15 changes: 8 additions & 7 deletions mitnanex_reference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,15 @@ haplogroup_class(){

annotate_vcf(){
#Annotate VFC with rCRS reference
vcf_file_annotated=$vcf_file
if [ -s $vcf_file ]; then
bcftools annotate -a $vcf_file/HV.bed.gz $vcf_file -c "CHROM,FROM,TO,Hypervariable" -h <(echo '##INFO=<ID=Hypervariable,Number=1,Type=String,Description="Hypervariable">') -o vcf_file_annotated
bcftools annotate -a $vcf_file/HP.bed.gz $vcf_file -c "CHROM,FROM,TO,Homopolymer" -h <(echo '##INFO=<ID=Homopolymer,Number=0,Type=Flag,Description="Homoloplymer">') -o vcf_file_annotated
bcftools annotate -a $vcf_file/HS.bed.gz $vcf_file -c "CHROM,FROM,TO,Hotspot" -h <(echo '##INFO=<ID=Hotspot,Number=0,Type=Flag,Description="Hotspot">') -o vcf_file_annotated
bcftools annotate -a $vcf_file/CDS.bed.gz $vcf_file -c "CHROM,FROM,TO,CDS" -h <(echo '##INFO=<ID=CDS,Number=1,Type=String,Description="CDS">') -o vcf_file_annotated
bcftools annotate -a $vcf_file/RNR.bed.gz $vcf_file -c "CHROM,FROM,TO,RNR" -h <(echo '##INFO=<ID=RNR,Number=1,Type=String,Description="rRNA">') -o vcf_file_annotated
bcftools annotate -a $vcf_file/TRN.bed.gz $vcf_file -c "CHROM,FROM,TO,TRN" -h <(echo '##INFO=<ID=TRN,Number=1,Type=String,Description="tRNA">') -o vcf_file_annotated
bcftools annotate -a $vcf_file/DLOOP.bed.gz $vcf_file -c "CHROM,FROM,TO,DLOOP" -h <(echo '##INFO=<ID=DLOOP,Number=0,Type=Flag,Description="DLOOP">') -o vcf_file_annotated
bcftools annotate -a $vcf_file/HV.bed.gz $vcf_file -c "CHROM,FROM,TO,Hypervariable" -h <(echo '##INFO=<ID=Hypervariable,Number=1,Type=String,Description="Hypervariable">') -o $vcf_file_annotated
bcftools annotate -a $vcf_file/HP.bed.gz $vcf_file -c "CHROM,FROM,TO,Homopolymer" -h <(echo '##INFO=<ID=Homopolymer,Number=0,Type=Flag,Description="Homoloplymer">') -o $vcf_file_annotated
bcftools annotate -a $vcf_file/HS.bed.gz $vcf_file -c "CHROM,FROM,TO,Hotspot" -h <(echo '##INFO=<ID=Hotspot,Number=0,Type=Flag,Description="Hotspot">') -o $vcf_file_annotated
bcftools annotate -a $vcf_file/CDS.bed.gz $vcf_file -c "CHROM,FROM,TO,CDS" -h <(echo '##INFO=<ID=CDS,Number=1,Type=String,Description="CDS">') -o $vcf_file_annotated
bcftools annotate -a $vcf_file/RNR.bed.gz $vcf_file -c "CHROM,FROM,TO,RNR" -h <(echo '##INFO=<ID=RNR,Number=1,Type=String,Description="rRNA">') -o $vcf_file_annotated
bcftools annotate -a $vcf_file/TRN.bed.gz $vcf_file -c "CHROM,FROM,TO,TRN" -h <(echo '##INFO=<ID=TRN,Number=1,Type=String,Description="tRNA">') -o $vcf_file_annotated
bcftools annotate -a $vcf_file/DLOOP.bed.gz $vcf_file -c "CHROM,FROM,TO,DLOOP" -h <(echo '##INFO=<ID=DLOOP,Number=0,Type=Flag,Description="DLOOP">') -o $vcf_file_annotated
fi
}

Expand Down

0 comments on commit 32b0c78

Please sign in to comment.