Skip to content

Commit

Permalink
Bug fixes
Browse files Browse the repository at this point in the history
- Added check if db directory is present.
- db files are automatically extracted if db directory is missing.
- Blast SCG search does not require write permissions in the
installation directory any more.
  • Loading branch information
cmks committed Sep 6, 2017
1 parent d60329f commit ceabdc0
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 16 deletions.
36 changes: 26 additions & 10 deletions DAS_Tool
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ thisdir=$(pwd)

function display_version() {
echo
echo "DAS Tool version 1.1"
echo "DAS Tool version 1.1.0"
echo
exit 1
}


function display_help() {
echo " "
echo "DAS Tool version 1.1"
echo "DAS Tool version 1.1.0"
echo " "
echo "Usage: DAS_Tool -i methodA.scaffolds2bin,...,methodN.scaffolds2bin"
echo " -l methodA,...,methodN -c contigs.fa -o myOutput"
Expand Down Expand Up @@ -142,8 +142,7 @@ while [ "$1" != "" ]; do
--megabin_penalty ) shift
c_weight=$1
;;
--debug ) shift
debug="TRUE"
--debug ) debug="TRUE"
;;
-h | --help ) display_help
exit
Expand Down Expand Up @@ -206,14 +205,14 @@ fi
if [ ! -f "$contigs" ]
then
echo contig file not found: $contigs
echo stopping
echo Aborting
exit 1
fi

if [ ! -z "$proteins" ] && [ ! -f "$proteins" ]
then
echo proteins file not found: $proteins
echo stopping
echo Aborting
exit 1
fi

Expand All @@ -222,13 +221,25 @@ do
if [ ! -f $i ];
then
echo "scaffolds2bin file not found: $i "
echo Aborting
exit 1
fi
done

# Existence and permissions of output directory
if [ ! -d $(dirname $outputbasename) ]; then echo "Directory for output files does not exist: $(dirname $outputbasename). Aborting"; exit 1; fi
if [ ! -w $(dirname $outputbasename) ] ; then echo "Can't write in output folder: $(dirname $outputbasename). Permission denied. Aborting"; exit 1; fi
if [ ! -d $(dirname "$outputbasename") ]; then echo "Directory for output files does not exist: $(dirname $outputbasename). Aborting"; exit 1; fi
if [ ! -w $(dirname "$outputbasename") ] ; then echo "Can't write in output folder: $(dirname $outputbasename). Permission denied. Aborting"; exit 1; fi

# Existence of database directory
if [ ! -d $DIR\/db ]; then
echo "Database directory does not exist: $(dirname $DIR\/db)."
echo "Trying to extract $DIR\/db.zip";
unzip -o $DIR\/db.zip -d $DIR
if [ ! -d $DIR\/db ]; then
echo "Extraction failed. Aborting."
exit 1;
fi
fi


#
Expand All @@ -244,7 +255,7 @@ then
proteins=$outputbasename\_proteins.faa

echo "predicting genes using prodigal"
# grep ">" $contigs | perl -pe 's/>//g;' | shuf > $outputbasename\_dastoolthreadstmpheader
#grep ">" $contigs | perl -pe 's/>//g;' | shuf > $outputbasename\_dastoolthreadstmpheader
grep ">" $contigs | sed 's/>//g;' | shuf > $outputbasename\_dastoolthreadstmpheader
dastoolthreadstmpheader=$outputbasename\_dastoolthreadstmpheader
total_lines=$(wc -l <${dastoolthreadstmpheader})
Expand All @@ -264,7 +275,7 @@ then
elif [ ! -f $proteins ]
then
echo protein file not found: $proteins
echo stopping
echo Aborting
exit 1
fi

Expand Down Expand Up @@ -292,6 +303,11 @@ else
echo skipping single copy gene identification
fi

if [ ! -f $bscg ] || [ ! -f $ascg ]; then
echo "single copy gene prediction using $search_engine failed. Aborting"
exit 1
fi



#
Expand Down
13 changes: 7 additions & 6 deletions src/scg_blank_blast.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
input_file = ARGV[1]
output_dir = File.dirname(input_file)

datab = ARGV[2]
puts "database name of all proteins is #{datab}"
datab_in = ARGV[2]
datab_out = "#{File.dirname ARGV[1]}" + "/" + "#{File.basename ARGV[2]}"
puts "database name of all proteins is #{datab_in}"

db_name = ARGV[3]
puts "database name of SCGs is #{db_name}"
Expand All @@ -28,8 +29,8 @@
threads = ARGV[5]

#build databases
full_db = system "makeblastdb -in #{datab} -dbtype prot"
abort "makeblastdb did not work for #{datab}, please check your input file" unless full_db
full_db = system "makeblastdb -in #{datab_in} -dbtype prot -out #{datab_out}"
abort "makeblastdb did not work for #{datab_in}, please check your input file" unless full_db

# find SCG candidates
puts "finding SCG candidates..."
Expand All @@ -49,9 +50,9 @@
# verify SCGs by blasting against all proteins of all genomes
puts "verifying selected SCGs..."
db_blast_out = File.join(output_dir,File.basename(input_file) + ".all.b6")
db_blast_ok = system "blastp -db #{datab} -query #{scg_candidates} -outfmt '6 qseqid sseqid pident length qlen slen evalue bitscore' -evalue 0.00001 -out #{db_blast_out} -max_target_seqs 1 -num_threads #{threads}"
db_blast_ok = system "blastp -db #{datab_out} -query #{scg_candidates} -outfmt '6 qseqid sseqid pident length qlen slen evalue bitscore' -evalue 0.00001 -out #{db_blast_out} -max_target_seqs 1 -num_threads #{threads}"
abort "verifying blast did not work" unless db_blast_ok
system "rm #{datab}.psq #{datab}.pin #{datab}.phr"
system "rm #{datab_out}.psq #{datab_out}.pin #{datab_out}.phr"
puts "starting annotations of single copy cogs..."

# Read db_lookup
Expand Down

0 comments on commit ceabdc0

Please sign in to comment.