grid

#!/bin/bash

package=""  # Default to empty package
GRID_DIR="$( cd "$( dirname "$(readlink -f ${BASH_SOURCE[0]})" )" && pwd )"
WDR=$(pwd)

while getopts ":hv" opt; do
  case ${opt} in
    h )
      echo "Usage:"
      echo "    grid single <options>       GRiD using a single genome"
      echo "    grid multiplex <options>    GRiD high throughput"
      echo "    grid -v                     Version"
      echo "    grid -h                     Display this help message"
      exit 0
      ;;
    v )
      echo "grid v1.3" 
      exit 0
      ;;
   \? )
     echo "Invalid Option: -$OPTARG" 1>&2
     exit 1
     ;;
  esac
done
###########################################################################
if [ $# -eq 0 ];
then
    echo "Usage:"
    echo "    grid single <options>       GRiD using a single genome"
    echo "    grid multiplex <options>    GRiD high throughput"
    echo "    grid -v                     Version"
    echo "    grid -h                     Display this help message"
    exit 1
fi
##########################################################################

shift $((OPTIND -1))

subcommand=$1;
case "$subcommand" in
  # Parse options to the single sub command
  single)
    package=$1; shift  # Remove 'single' from the argument list
    LIST=false
    NUM_THREAD=1
    INT='^[0-9]+$'
    # Process package options
    while getopts ":r:e:o:g:l:n:h" opt; do
      case ${opt} in
        r )
          READS_DIR=$OPTARG
          ;;
        e )
          READS_EXT=$OPTARG
          ;;
        o )
          OUTPUT_DIR=$OPTARG
          ;;
        g )
          REFGEN=$OPTARG
          ;;
        l )
          LIST=$OPTARG
          ;;
	n )
          NUM_THREAD=$OPTARG
          ;;
	h )
      echo "Usage:"
      echo "    grid single <options>"
      echo "    <options>"
      echo "    -r      Samples directory (single end reads or SAM alignment files)"
      echo "    -e      Sample filename extension (fq, fastq, fastq.gz, sam) [default fastq]"
      echo "    -o      Output directory"
      echo "    -g      Reference genome (fasta)"
      echo "    -l      Path to file listing a subset of reads"
      echo "            for analysis [default = analyze all samples in reads directory]"
      echo "    -n INT  Number of threads [default 1]"
      echo "    -h      Display this message"
      exit 0
      ;;
        \? )
          echo "Invalid Option: -$OPTARG" 1>&2
          exit 1
          ;;
        : )
          echo "Invalid Option: -$OPTARG requires an argument" 1>&2
          exit 1
          ;;
       *  ) 
          echo "Unimplemented option: -$OPTARG" >&2
          exit 1
          ;;
esac
done

if [ $# -eq 0 ];
then
      echo "Usage:"
      echo "    grid single <options>"
      echo "    <options>"
      echo "    -r      Samples directory (single end reads or SAM alignment files)"
      echo "    -e      Sample filename extension (fq, fastq, fastq.gz, sam) [default fastq]"
      echo "    -o      Output directory"
      echo "    -g      Reference genome (fasta)"
      echo "    -l      Path to file listing a subset of reads"
      echo "            for analysis [default = analyze all samples in reads directory]"
      echo "    -n INT  Number of threads [default 1]"
      echo "    -h      Display this message"
      exit 1
fi

if [ "x" == "x$READS_DIR" ]; then
  echo "-r [option] is required"
  exit 1
fi
if [ "x" == "x$READS_EXT" ]; then
  READS_EXT="fastq"
fi
if [ "$READS_EXT" != "fq" ] && [ "$READS_EXT" != "fastq" ] && [ "$READS_EXT" != "fastq.gz" ] && [ "$READS_EXT" != "sam" ] ; then
  echo "Invalid filename extension. Recognized extensions are fq, fastq, fastq.gz, and sam"
  exit 1
fi
if [ "x" == "x$OUTPUT_DIR" ]; then
 directory=$(echo "output_single_$(date +%s)") 
 mkdir $WDR/$directory
 OUTPUT_DIR=$WDR/$directory 
fi
if [ "x" == "x$REFGEN" ]; then
  echo "-g [option] is required"
  exit 1
fi
if ! [[ $NUM_THREAD =~ $INT ]] && [[ $NUM_THREAD != 1 ]] ; then
    echo "-n [option] requires an integer"
    exit 1
fi
if [ "$LIST" != "false" ]; then
LIS=$(readlink -f $LIST)
fi

shift $((OPTIND -1))
    ;;
###############
 multiplex)
    package=$1; shift  # Remove 'single' from the argument list
   PATHO=false
   LIST=false
   COV_CUTOFF=1
   NUM_THREAD=1
   THETA=0
   INT='^[0-9]+$'
   FLOAT='^[0-9]+([.][0-9]+)?$'
   MERGE=false
    # Process package options
    while getopts ":r:e:o:d:c:t:l:n:pmh" opt; do
      case ${opt} in
        r )
          READS_DIR=$OPTARG
          ;;
        e )
          READS_EXT=$OPTARG
          ;;
        o )
          OUTPUT_DIR=$OPTARG
          ;;
        d )
          DATABASE_DIR=$OPTARG
          ;;
        c )
          COV_CUTOFF=$OPTARG
          ;;
	n )
          NUM_THREAD=$OPTARG
          ;;
        p )
          PATHO=true
          ;;
        t )
          THETA=$OPTARG
          ;;
        m )
          MERGE=true
          ;;
        l )
          LIST=$OPTARG
          ;;
        h )
      echo "Usage:"
      echo "    grid multiplex <options>"
      echo "    <options>"
      echo "    -r         Samples directory (single end reads)"
      echo "    -e         Sample filename extension (fq, fastq, fastq.gz) [default fastq]"
      echo "    -o         Output directory"
      echo "    -d         GRiD database directory"
      echo "    -c  FLOAT  Coverage cutoff (>= 0.2) [default 1]"
      echo "    -p         Enable reassignment of ambiguous reads using Pathoscope2"
      echo "    -t  INT    Theta prior for reads reassignment [default 0]. Requires the -p flag"
      echo "    -l         Path to file listing a subset of reads"
      echo "               for analysis [default = analyze all samples in reads directory]"
      echo "    -m         merge output tables into a single matrix file"
      echo "    -n  INT    Number of threads [default 1]"
      echo "    -h         Display this message"
      exit 0
      ;;
        \? )
          echo "Invalid Option: -$OPTARG" 1>&2
          exit 1
          ;;
        : )
          echo "Invalid Option: -$OPTARG requires an argument" 1>&2
          exit 1
          ;;
      esac
    done

if [ $# -eq 0 ];
then
 echo "Usage:"
      echo "    grid multiplex <options>"
      echo "    <options>"
      echo "    -r         Samples directory (single end reads)"
      echo "    -e         Sample filename extension (fq, fastq, fastq.gz) [default fastq]"
      echo "    -o         Output directory"
      echo "    -d         GRiD database directory"
      echo "    -c  FLOAT  Coverage cutoff (>= 0.2) [default 1]"
      echo "    -p         Enable reassignment of ambiguous reads using Pathoscope2"
      echo "    -t  INT    Theta prior for reads reassignment [default 0]. Requires the -p flag"
      echo "    -l         Path to file listing a subset of reads"
      echo "               for analysis [default = analyze all samples in reads directory]"
      echo "    -m         merge output tables into a single matrix file"
      echo "    -n  INT    Number of threads [default 1]"
      echo "    -h         Display this message"
      exit 1
fi
if [ "x" == "x$READS_DIR" ]; then
  echo "-r [option] is required"
  exit 1
fi
if [ "x" == "x$READS_EXT" ]; then
  READS_EXT="fastq"
fi
if [ "$READS_EXT" != "fq" ] && [ "$READS_EXT" != "fastq" ] && [ "$READS_EXT" != "fastq.gz" ] ; then
  echo "Invalid filename extension. Recognized extensions for the multiplex module are fq, fastq, fastq.gz"
  exit 1
fi
if [ "x" == "x$OUTPUT_DIR" ]; then
  directory=$(echo "output_multiplex_$(date +%s)")
 mkdir $WDR/$directory
 OUTPUT_DIR=$WDR/$directory
fi
if [ "x" == "x$DATABASE_DIR" ]; then
  echo "-d [option] is required"
  exit 1
fi
if ! [[ $COV_CUTOFF =~ $FLOAT ]] && [[ $COV_CUTOFF != 1 ]]; then
    echo "-c [option] requires a numeric value"
    exit 1
fi
if (( $(bc <<< "$COV_CUTOFF < 0.2") )); then
    echo "required minimum coverage cutoff is 0.2"
    exit 1
fi
if ! [[ $THETA =~ $INT ]] && [[ $THETA != 0 ]]; then
    echo "-t [option] requires an integer"
    exit 1
fi
if ! [[ $NUM_THREAD =~ $INT ]] && [[ $NUM_THREAD != 1 ]] ; then
    echo "-n [option] requires an integer"
    exit 1
fi
if [ $THETA -gt 0 ] && [ "$PATHO" == "false" ]; then
    echo "-t [option] requires -p flag" 
    echo "Use grid multiplex -h for usage information "
    exit 1
fi
if [ "$LIST" != "false" ]; then
LIS=$(readlink -f $LIST)
fi

  shift $((OPTIND -1))
    ;;
esac
###################################
if [ "$package" != "single" ] && [ "$package" != "multiplex" ]; then
  echo "Invalid option"
  exit 1
fi

#############################################
############################################
# single option activated
############################################
############################################
if [ "$package" == "single" ]
  then
cd $WDR
RDR=$(readlink -f $READS_DIR)
ODR=$(readlink -f $OUTPUT_DIR)
GENOME=$(readlink -f $REFGEN)
echo "$package option activated"
echo "$WDR is present directory" 
echo "$RDR is the reads directory"
echo "$ODR is the output directory"
echo "$GENOME is the path to reference genome"  
 
#########
echo " ################ Checking for dependencies ########"
requirements=$(echo "parallel R bowtie2 seqtk samtools bedtools bamtools blastn pathoscope mosdepth")
for f in `echo $requirements` 
do 
toolCheck=$(type -P $f)
if [ -z $toolCheck ]; then 
echo "ERROR: $f missing" 
echo "Check https://github.com/ohlab/GRiD for required packages"
exit 1
else
echo "$f found"
fi 
done
echo "All required packages found"
echo " ################ Checking for required R libraries ########"
cd $GRID_DIR
Rscript check_R_libraries.R
if [ -s .checkedR ]; then
echo "ERROR: The following R libraries are missing"
cat .checkedR
echo "Check https://github.com/ohlab/GRiD for required packages"
rm .checkedR
exit 1
else
echo "R libraries ok"
rm .checkedR
fi
##############################

mkdir -p $ODR

if [ -z "$(ls -A $ODR)" ]; then
   echo "Output directory ok"
else
   echo "ERROR: Output directory not Empty"
   exit 1
fi

cd $ODR
ref_genome=$(echo "$GENOME" | rev | cut -d'/' -f1 | rev)

echo "Creating bowtie index"
bowtie2-build -q $GENOME bowtie_$ref_genome

echo "done"
echo "Running BLAST to dnaA and dif databases"

dnaa_id_var=$(echo "dnaa_$(date +%s)")
dif_id_var=$(echo "dif_$(date +%s)")
readsforgrid=$(echo "reads_$(date +%s)")

blastn -query $GENOME -db $GRID_DIR/blast_database/dnaa.fasta -evalue 0.05 -num_threads 60 -max_target_seqs 1 -outfmt 6 -out $dnaa_id_var -word_size 11
blastn -query $GENOME -db $GRID_DIR/blast_database/dif.fasta -evalue 0.05 -num_threads 60 -max_target_seqs 1 -outfmt 6 -out $dif_id_var -word_size 11

dnaa_fragment=$(sort -nr -k12,12 $dnaa_id_var | head -1 | cut -f1)
dnaa_start=$(sort -nr -k12,12 $dnaa_id_var | head -1 | cut -f7)

dif_fragment=$(sort -nr -k12,12 $dif_id_var | head -1 | cut -f1)
dif_start=$(sort -nr -k12,12 $dif_id_var | head -1 | cut -f7)

echo "done"

cd $RDR
if [ "$LIST" == "false" ]; then
ls *.$READS_EXT | sed "s/\.$READS_EXT$//" > $ODR/$readsforgrid.txt
else
cat $LIS | sed "s/\.$READS_EXT$//" > $ODR/$readsforgrid.txt
fi

for i in `cat $ODR/$readsforgrid.txt`
do
echo $i

if [ "$READS_EXT" != "sam" ]; then
echo "Running bowtie"
bowtie2 -x $ODR/bowtie_$ref_genome -U $i.$READS_EXT --very-sensitive -S $ODR/$i.main.sam --al $ODR/$i.MAP.fastq -p $NUM_THREAD || exit 1
echo "done"
else
samtools view -@ $NUM_THREAD -h -bS -F4 $i.$READS_EXT > $ODR/$i.temp.bam || exit 1
bedtools bamtofastq -i $ODR/$i.temp.bam -fq $ODR/$i.MAP.fastq || exit 1
samtools view -@ $NUM_THREAD -h $ODR/$i.temp.bam > $ODR/$i.main.sam || exit 1
rm $ODR/$i.temp.bam
fi

cd $ODR
# subsample 85% of mapped reads twice 
seqtk sample -s100 $i.MAP.fastq 0.85 > $i.MAP.subsample.fastq || exit 1
bowtie2 -x bowtie_$ref_genome -U $i.MAP.subsample.fastq --very-sensitive -S $i.sub1.sam -p $NUM_THREAD || exit 1

seqtk sample -s90 $i.MAP.fastq 0.85 > $i.MAP.subsample2.fastq || exit 1
bowtie2 -x bowtie_$ref_genome -U $i.MAP.subsample2.fastq --very-sensitive -S $i.sub2.sam -p $NUM_THREAD || exit 1

rm $i.MAP*fastq


###########################################
for FILE in $i.*.sam
do
samtools view -@ $NUM_THREAD -bS $FILE > $FILE.bam
rm $FILE

## checking version of samtools
exec 3>&2
exec 2> /dev/null
samcheck=$(samtools --version | grep "samtools" | cut -f2 -d' ')
exec 2>&3
if [ -z $samcheck ]; then
samtools view -b -F 4 $FILE.bam | samtools sort -@ $NUM_THREAD - $FILE.sorted
else
samtools view -b -F 4 $FILE.bam | samtools sort -@ $NUM_THREAD -o $FILE.sorted.bam
fi
##############

rm $FILE.bam
echo "Running genomeCoverageBed"
genomeCoverageBed -ibam $FILE.sorted.bam -d >$FILE.txt
rm $FILE.sorted.bam

echo "done"
awk -F"\t" '{obj1[$1]+=$3;++obj2[$1]}END{for (key in obj1) print key"\011"obj1[key]/obj2[key]}' $FILE.txt | sort -nr -k2,2 > $FILE.filtered_scaffold_coverage.txt

#split contigs to two temporary files
sed -n 2~2p $FILE.filtered_scaffold_coverage.txt | sort -n -k2,2 > $FILE.evenfile.txt #important to re-sort one of either files
sed -n 1~2p $FILE.filtered_scaffold_coverage.txt > $FILE.oddfile.txt

#merge both files to obtain re-organized scaffolds order
cat $FILE.oddfile.txt $FILE.evenfile.txt | cut -f1 > $FILE.organized_scaffold.txt

# obtain the coverage file with reorganized scaffold
for var in `cat $FILE.organized_scaffold.txt`
do
grep "$var" $FILE.txt >> $FILE.reorganized.txt
done
rm $FILE.organized_scaffold.txt
rm $FILE.oddfile.txt
rm $FILE.evenfile.txt
rm $FILE.filtered_scaffold_coverage.txt
rm $FILE.txt
done

base_cov=$(awk '{ sum+=$3} END {print sum}' $i.main.sam.reorganized.txt)
total_genome_length=$(grep -c "." $i.main.sam.reorganized.txt)
coverage=$(echo "scale=3; $base_cov/$total_genome_length" |bc )

dnaa_position_temp=$(grep -wn "$dnaa_fragment" $i.main.sam.reorganized.txt | sed -n ''$dnaa_start'p;' | cut -f1 -d':')
dif_position_temp=$(grep -wn "$dif_fragment" $i.main.sam.reorganized.txt | sed -n ''$dif_start'p;' | cut -f1 -d':')

dnaa_position=$(echo "scale=3; $dnaa_position_temp + 1" |bc )
dif_position=$(echo "scale=3; $dif_position_temp + 1" |bc )

if (( $(bc <<< "$coverage < 0.2") )); then
echo " ###################################################"
echo " ##### WARNING: $i -- Genome coverage less than 0.2 #####"
echo " ###################################################"
fi

echo "Running GRiD"
Rscript $GRID_DIR/GRiD.R -i $i.main.sam.reorganized.txt -x $i.sub1.sam.reorganized.txt -y $i.sub2.sam.reorganized.txt -o $i.pdf -p $i.GRiD.txt -c $coverage -d $dnaa_position -f $dif_position
echo "done"
rm $i.sub*.sam.reorganized.txt
rm $i.main.sam.reorganized.txt

cd $RDR
done

rm $ODR/$dnaa_id_var
rm $ODR/$dif_id_var
rm $ODR/bowtie_$ref_genome*bt2
rm $ODR/$readsforgrid.txt

else
######################################
#####################################
# multiplex option activated
#####################################
####################################
cd $WDR
RDR=$(readlink -f $READS_DIR)
ODR=$(readlink -f $OUTPUT_DIR)
DBR=$(readlink -f $DATABASE_DIR)
echo "$package option activated"
echo "$WDR is present directory"
echo "$RDR is the reads directory"
echo "$ODR is the output directory"
echo "$DBR is GRiD database directory"
echo "coverage cutoff is $COV_CUTOFF"

#################################
echo " ################ Checking for dependencies ########"
requirements=$(echo "parallel R bowtie2 seqtk samtools bedtools bamtools blastn pathoscope mosdepth")
for f in `echo $requirements`
do
toolCheck=$(type -P $f)
if [ -z $toolCheck ]; then
echo "ERROR: $f missing"
echo "Check https://github.com/ohlab/GRiD for required packages"
exit 1
else
echo "$f found"
fi
done
echo "All required packages found"
echo " ################ Checking for required R libraries ########"
cd $GRID_DIR
Rscript check_R_libraries.R
if [ -s .checkedR ]; then
echo "ERROR: The following R libraries are missing"
cat .checkedR
echo "Check https://github.com/ohlab/GRiD for required packages"
rm .checkedR
exit 1
else
echo "R libraries ok"
rm .checkedR
fi
##############################
mkdir -p $ODR

if [ -z "$(ls -A $ODR)" ]; then
   echo "Output directory ok"
else
   echo "ERROR: Output directory not Empty"
   exit 1
fi

cd $RDR
readsforgrid=$(echo "reads_$(date +%s)")
if [ "$LIST" == "false" ]; then
ls *.$READS_EXT | sed "s/\.$READS_EXT$//" > $ODR/$readsforgrid.txt

else
cat $LIS | sed "s/\.$READS_EXT$//" > $ODR/$readsforgrid.txt
fi


for i in `cat $ODR/$readsforgrid.txt`
do
#############################################################  

for BOWTIE in `cat $DBR/bowtie.txt`
do
bowtie2 -x $DBR/$BOWTIE -U $i.$READS_EXT --very-sensitive -S $ODR/$i.$BOWTIE.main.sam -p $NUM_THREAD --quiet -a || exit 1
done

cd $ODR

for f in $i.*.main.sam
do
samtools view -H -S $f >> $i.headers.allSeq.sam || exit 1
done

for f in $i.*.main.sam
do
samtools view -S $f >> $i.allSeq.no.headers.sam || exit 1
done
rm $i.*.main.sam

cat $i.headers.allSeq.sam $i.allSeq.no.headers.sam > $i.allSeq.sam || exit 1

rm $i.headers.allSeq.sam
rm $i.allSeq.no.headers.sam

if [ "$PATHO" == "true" ]; then
pathoscope ID -alignFile $i.allSeq.sam -fileType sam -outDir . -thetaPrior $THETA
rm $i.allSeq.sam
rm *report.tsv

samtools view -@ $NUM_THREAD -bS updated_$i.allSeq.sam > $i.bam
rm updated_$i.allSeq.sam
else
samtools view -@ $NUM_THREAD -bS $i.allSeq.sam > $i.bam
rm $i.allSeq.sam
fi

## checking version of samtools
exec 3>&2
exec 2> /dev/null
samcheck=$(samtools --version | grep "samtools" | cut -f2 -d' ')
exec 2>&3
if [ -z $samcheck ]; then
samtools view -b -F 4 $i.bam | samtools sort -@ $NUM_THREAD - $i.sorted
else
samtools view -b -F 4 $i.bam | samtools sort -@ $NUM_THREAD -o $i.sorted.bam
fi
###########

samtools index $i.sorted.bam
rm $i.bam

MOSDEPTH_PRECISION=5 mosdepth -n -F 4 -t 3 $i $i.sorted.bam

Rscript $GRID_DIR/GRiD_multiplex_cov_cutoff.R -i $i.mosdepth.global.dist.txt -o $i.passed_genomes.tmp.txt -x $COV_CUTOFF
rm $i.mosdepth.global.dist.txt

if (( $(bc <<< "$COV_CUTOFF < 1") )); then
grep -w -f $i.passed_genomes.tmp.txt $DBR/database_misc.txt | awk '$5 < 90' | cut -f1 > $i.passed_genomes.txt
else
grep -w -f $i.passed_genomes.tmp.txt $DBR/database_misc.txt | cut -f1 > $i.passed_genomes.txt
fi

rm $i.passed_genomes.tmp.txt

if [ -s $i.passed_genomes.txt ]
then
xargs samtools view -hb $i.sorted.bam < $i.passed_genomes.txt > $i.filtered.bam
bamtools split -in $i.filtered.bam -reference

for GENOME in `cat $i.passed_genomes.txt`
do
cat <<EOT >> $GENOME.parallel.sh
#!/bin/bash
samtools view -H $i.filtered.REF_$GENOME.bam | grep -w "$GENOME" >> $i.$GENOME.temp.sam
samtools view $i.filtered.REF_$GENOME.bam >> $i.$GENOME.temp.sam
samtools view -@ $NUM_THREAD -bS $i.$GENOME.temp.sam > $i.$GENOME.bam
rm $i.$GENOME.temp.sam
genomeCoverageBed -ibam $i.$GENOME.bam -d > $i.$GENOME.txt
grep -w "$GENOME" $DBR/database_misc.txt | cut -f2 | sed 's/p,/p\n/g' > $i.$GENOME.chunk.txt
grep -w "$GENOME" $DBR/database_misc.txt | cut -f3 > $i.$GENOME.remove.txt
for mychunk in \`cat $i.$GENOME.chunk.txt\`
do
sed -n "\$mychunk" $i.$GENOME.txt |  sed "s/^/\$mychunk\_/g" >> $i.$GENOME.temp.txt #split and rename contigs
done
sed "\$(cat $i.$GENOME.remove.txt)" $i.$GENOME.temp.txt > $i.$GENOME.edited.txt
rm $i.$GENOME.txt
rm $i.$GENOME.temp.txt
rm $i.$GENOME.chunk.txt
rm $i.$GENOME.remove.txt
rm $i.filtered.REF_$GENOME.bam
rm $i.$GENOME.bam
awk -F"\t" '{obj1[\$1]+=\$3;++obj2[\$1]}END{for (key in obj1) print key"\011"obj1[key]/obj2[key]}' $i.$GENOME.edited.txt | sort -nr -k2,2 > $i.$GENOME.filtered_scaffold_coverage.txt
#split contigs to two temporary files
sed -n 2~2p $i.$GENOME.filtered_scaffold_coverage.txt | sort -n -k2,2 > $i.$GENOME.evenfile.txt #important to re-sort one of either files
sed -n 1~2p $i.$GENOME.filtered_scaffold_coverage.txt > $i.$GENOME.oddfile.txt
#merge both files to obtain re-organized scaffolds order
cat $i.$GENOME.oddfile.txt $i.$GENOME.evenfile.txt | cut -f1 > $i.$GENOME.organized_scaffold.txt
# obtain the coverage file with reorganized scaffold
for var in \`cat $i.$GENOME.organized_scaffold.txt\`
do
grep "\$var" $i.$GENOME.edited.txt >> $i.$GENOME.reorganized.txt
done
rm $i.$GENOME.organized_scaffold.txt
rm $i.$GENOME.oddfile.txt
rm $i.$GENOME.evenfile.txt
rm $i.$GENOME.filtered_scaffold_coverage.txt
rm $i.$GENOME.edited.txt
base_cov=\$(awk '{ sum+=\$3} END {print sum}' $i.$GENOME.reorganized.txt)
total_genome_length=\$(grep -c "." $i.$GENOME.reorganized.txt)
coverage=\$(echo "scale=3; \$base_cov/\$total_genome_length" |bc )
Rscript $GRID_DIR/GRiD_multiplex.R -i $i.$GENOME.reorganized.txt -p $i.$GENOME.GRiD.txt -c \$coverage
rm $i.$GENOME.reorganized.txt
EOT

echo "bash $GENOME.parallel.sh" >> parallel_commands
done

cat parallel_commands | parallel '{}'
rm parallel_commands
rm *.parallel.sh

paste -d '\n' $i.*.GRiD.txt | awk '$5 > '$COV_CUTOFF'' | awk -F'\t' '{ $2 = ($2 > 10 ? 1 : $2) } 1' OFS='\t' > $i.GRiD.temp.txt
sed -i "s/$i\.//g" $i.GRiD.temp.txt

awk '$2 > 3' $i.GRiD.temp.txt | cut -f1 > $i.QC.txt
touch $i.removedGenomes.txt
for f in `cat $i.QC.txt`
do
len=$(grep -w "$f" $DBR/database_misc.txt | cut -f4)
if [[ $len -lt 4000000 ]] 
then
echo "$f" >> $i.removedGenomes.txt
else
echo "$f passed"
fi
done
grep -wvf $i.removedGenomes.txt $i.GRiD.temp.txt > $i.GRiD.txt

rm $i.removedGenomes.txt
rm $i.GRiD.temp.txt
rm $i.QC.txt

else
touch $i.GRiD.txt
echo "GRiD -- No genome above coverage cutoff $COV_CUTOFF --" >> $i.GRiD.txt
fi

sort -k5,5 -nr $i.GRiD.txt | head -70 > $i.GRiD.limit.txt
printf '1\ni\nGenome\tGRiD\tGRiD_unrefined\tSpecies_heterogeneity\tCoverage\n.\nw\n' | ed -s $i.GRiD.txt
printf '1\ni\nGenome\tGRiD\tGRiD_unrefined\tSpecies_heterogeneity\tCoverage\n.\nw\n' | ed -s $i.GRiD.limit.txt
rm $i.*.GRiD.txt
rm $i.sorted.bam*
rm $i.*.bam
rm $i.passed_genomes.txt

exec 3>&2
exec 2> /dev/null
Rscript $GRID_DIR/GRiD_multiplex_tree.R -i $i.GRiD.limit.txt -o $i.GRiD.pdf
rm $i.GRiD.limit.txt
exec 2>&3

cd $RDR
done
rm $ODR/$readsforgrid.txt
fi
################## merge option ####################
cd $ODR
if [ "$MERGE" == "true" ]; then
cat *GRiD.txt | grep -v "GRiD" | cut -f1 | sort | uniq > GRiD.genomes.txt
ls *.GRiD.txt | sed 's/\.GRiD.txt//g' > GRiD_list.txt

for i in `cat GRiD_list.txt`
do
for f in `cat GRiD.genomes.txt`
do
paste <(awk '$1 == "'$f'" {print $1}' GRiD.genomes.txt ) <(awk '$1 == "'$f'" {print $2}' $i.GRiD.txt )  >> $i.temp.merge.txt
done
printf '1\ni\nGenome\t'$i'\n.\nw\n' | ed -s $i.temp.merge.txt
done

for f in *.temp.merge.txt
do
cut -f2 $f > $f.tmp
rm $f
done

printf '1\ni\nGenome\n.\nw\n' | ed -s GRiD.genomes.txt
paste -d'\t' GRiD.genomes.txt *.temp.merge.txt.tmp > merged_table.txt
rm *.temp.merge.txt.tmp
rm GRiD_list.txt
rm GRiD.genomes.txt
echo "run complete"
else
echo "run complete"
fi