Skip to content

Commit 4f1e9de

Browse files
authored
Merge pull request #122 from ncbi/dev
AMRFinderPlus release 3.11.14 This release addresses a few issues brought up on GitHub. We weren't able to solve all of them when we couldn't reproduce them, but we are trying. Changes: - On failure no `-o` output file is created - #115 - AMRFinderPlus will now automatically decompress files ending in .gz with gunzip (relies on gunzip being in PATH) - #61 - AMRFinderPlus does not support unicode, but it no longer checks GFF files to prohibit unicode characters specifically - #119 - Add reporting of curl error messages - #120
2 parents 1961989 + d437940 commit 4f1e9de

File tree

11 files changed

+391
-220
lines changed

11 files changed

+391
-220
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ release: clean
9292
make all
9393

9494
common.o: common.hpp common.inc
95+
curl_easy.o: curl_easy.hpp common.hpp common.inc
9596
gff.o: gff.hpp common.hpp common.inc
9697
alignment.o: alignment.hpp alignment.hpp common.inc
9798

@@ -106,7 +107,7 @@ amrfinder: $(amrfinderOBJS)
106107
$(CXX) -o $@ $(amrfinderOBJS) -pthread $(DBDIR)
107108

108109
amrfinder_update.o: common.hpp common.inc
109-
amrfinder_updateOBJS=amrfinder_update.o common.o
110+
amrfinder_updateOBJS=amrfinder_update.o common.o curl_easy.o
110111
amrfinder_update: $(amrfinder_updateOBJS)
111112
@if [ "$(TEST_UPDATE)" != "" ] ; \
112113
then \

amr_report.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -910,22 +910,6 @@ struct BlastAlignment : Alignment
910910
}
911911
string getMethod (const Locus &cds) const
912912
{ //IMPLY (refExactlyMatched () && ! mutation_all. get (), ! isMutationProt ())
913-
#if 0
914-
string method (fromHmm
915-
? "HMM"
916-
: refExactlyMatched ()
917-
? alleleReported ()
918-
? "ALLELE"
919-
: "EXACT" // PD-776
920-
: isMutationProt ()
921-
? "POINT"
922-
: partial ()
923-
? truncated (cds)
924-
? "PARTIAL_CONTIG_END" // PD-2267
925-
: "PARTIAL"
926-
: "BLAST"
927-
);
928-
#endif
929913
string method (fromHmm
930914
? "HMM"
931915
: isMutationProt ()

amrfinder.cpp

Lines changed: 61 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,15 @@
3030
* AMRFinder
3131
*
3232
* Dependencies: NCBI BLAST, HMMer
33+
* gunzip (optional)
3334
*
3435
* Release changes:
36+
* 05/06/2023 PD-4598 error messages in curl_easy.cpp
37+
* 3.11.14 05/05/2023 extern "C" { #include <curl.h> }
38+
* 3.11.13 05/04/2023 PD-4596 Prohibit ASCII characters only between 0x00 and 0x1F in GFF files
39+
* 04/24/2023 PD-4583 Process files ending with ".gz", see https://github.com/ncbi/amr/issues/61, dependence on gunzip (optional)
40+
* 04/19/2023 On failure no empty output file (-o) is created
41+
* 3.11.12 04/13/2023 Application::makeKey()
3542
* PD-4548 fasta_check.cpp prohibits '\t' (not any '\'), and all restrictions are only for nucleotide sequences
3643
* 3.11.11 04/13/2023 PD-4566 --hmmer_bin
3744
* 3.11.10 04/12/2023 PD-4548 fasta_check.cpp prohibits ';', '.', '~' in the last position of a sequence identifier
@@ -470,6 +477,22 @@ struct ThisApplication : ShellApplication
470477
t. qc ();
471478
t. saveFile (tmp + "/" + tmpSuf);
472479
}
480+
481+
482+
483+
string uncompress (const string &quotedFName,
484+
const string &suffix) const
485+
{
486+
const string res (shellQuote (tmp + "/" + suffix));
487+
ASSERT (quotedFName != res);
488+
const string s (unQuote (quotedFName));
489+
if (isRight (s, ".gz"))
490+
{
491+
exec ("gunzip -c " + quotedFName + " > " + res);
492+
return res;
493+
}
494+
return quotedFName;
495+
}
473496

474497

475498

@@ -553,8 +576,11 @@ struct ThisApplication : ShellApplication
553576
}
554577

555578
if (! output. empty ())
579+
{
556580
try { OFStream f (output); }
557581
catch (...) { throw runtime_error ("Cannot create output file " + shellQuote (output)); }
582+
removeFile (output);
583+
}
558584

559585

560586
// For timing...
@@ -641,13 +667,9 @@ struct ThisApplication : ShellApplication
641667
if (! dbDir. items. empty () && dbDir. items. back () == "latest")
642668
{
643669
prog2dir ["amrfinder_update"] = execDir;
644-
string blast_bin_par;
645-
if (! blast_bin. empty ())
646-
blast_bin_par = " --blast_bin " + shellQuote (blast_bin);
647-
string hmmer_bin_par;
648-
if (! hmmer_bin. empty ())
649-
hmmer_bin_par = " --hmmer_bin " + shellQuote (hmmer_bin);
650-
exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + blast_bin_par + hmmer_bin_par
670+
exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update")
671+
+ makeKey ("blast_bin", blast_bin)
672+
+ makeKey ("hmmer_bin", hmmer_bin)
651673
+ ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + logFName, logFName);
652674
}
653675
else
@@ -759,18 +781,27 @@ struct ThisApplication : ShellApplication
759781

760782
for (const string& include : includes)
761783
stderr << " - include " << include << '\n';
784+
}
785+
786+
787+
// Quoted names
788+
const string prot_flat = uncompress (prot, "prot_flat");
789+
const string dna_flat = uncompress (dna, "dna_flat");
790+
const string gff_flat = uncompress (gff, "gff_flat");
791+
762792

793+
{
763794
StringVector emptyFiles;
764-
if (! emptyArg (prot) && ! getFileSize (unQuote (prot))) emptyFiles << prot;
765-
if (! emptyArg (dna) && ! getFileSize (unQuote (dna))) emptyFiles << dna;
766-
if (! emptyArg (gff) && ! getFileSize (unQuote (gff))) emptyFiles << gff;
795+
if (! emptyArg (prot) && ! getFileSize (unQuote (prot_flat))) emptyFiles << prot;
796+
if (! emptyArg (dna) && ! getFileSize (unQuote (dna_flat))) emptyFiles << dna;
797+
if (! emptyArg (gff) && ! getFileSize (unQuote (gff_flat))) emptyFiles << gff;
767798
for (const string& emptyFile : emptyFiles)
768799
{
769800
const Warning warning (stderr);
770801
stderr << "Empty file: " << emptyFile;
771802
}
772803
}
773-
804+
774805

775806
// organism --> organism1
776807
string organism1;
@@ -853,7 +884,7 @@ struct ThisApplication : ShellApplication
853884
bool lcl = false;
854885
if (gffType == Gff::pgap && ! emptyArg (dna)) // PD-3347
855886
{
856-
LineInput f (unQuote (dna));
887+
LineInput f (unQuote (dna_flat));
857888
while (f. nextLine ())
858889
if (isLeft (f. line, ">"))
859890
{
@@ -863,6 +894,9 @@ struct ThisApplication : ShellApplication
863894
}
864895

865896

897+
const bool blastn = ! emptyArg (dna) && ! organism1. empty () && fileExists (db + "/AMR_DNA-" + organism1);
898+
899+
866900
// Create files for amr_report
867901
string amr_report_blastp;
868902
string amr_report_blastx;
@@ -881,20 +915,20 @@ struct ThisApplication : ShellApplication
881915
{
882916
string gff_prot_match;
883917
string gff_dna_match;
884-
if (getFileSize (unQuote (prot)))
918+
if (getFileSize (unQuote (prot_flat)))
885919
{
886920
findProg ("blastp");
887921
findProg ("hmmsearch");
888922

889-
string prot1 (prot); // Protein FASTA with no dashes in the sequences
923+
string prot1 (prot_flat); // Protein FASTA with no dashes in the sequences
890924
size_t nProt = 0;
891925
size_t protLen_max = 0;
892926
size_t protLen_total = 0;
893-
if (! fastaCheck (prot, true, qcS, logFName, nProt, protLen_max, protLen_total))
927+
if (! fastaCheck (prot_flat, true, qcS, logFName, nProt, protLen_max, protLen_total))
894928
{
895929
prot1 = shellQuote (tmp + "/prot");
896930
OFStream outF (unQuote (prot1));
897-
LineInput f (unQuote (prot));
931+
LineInput f (unQuote (prot_flat));
898932
while (f. nextLine ())
899933
{
900934
trimTrailing (f. line);
@@ -941,13 +975,13 @@ struct ThisApplication : ShellApplication
941975
string dnaPar;
942976
if (! emptyArg (dna))
943977
{
944-
dnaPar = " -dna " + dna;
978+
dnaPar = " -dna " + dna_flat;
945979
if (gffType == Gff::pseudomonasdb)
946980
gff_dna_match = " -gff_dna_match " + tmp + "/dna_match";
947981
}
948982
try
949983
{
950-
exec (fullProg ("gff_check") + gff + annotS + " -prot " + prot1 + dnaPar + gff_prot_match + gff_dna_match + qcS + " -log " + logFName, logFName);
984+
exec (fullProg ("gff_check") + gff_flat + annotS + " -prot " + prot1 + dnaPar + gff_prot_match + gff_dna_match + qcS + " -log " + logFName, logFName);
951985
}
952986
catch (...)
953987
{
@@ -1004,19 +1038,18 @@ struct ThisApplication : ShellApplication
10041038

10051039
amr_report_blastp = "-blastp " + tmp + "/blastp -hmmsearch " + tmp + "/hmmsearch -hmmdom " + tmp + "/dom";
10061040
if (! emptyArg (gff))
1007-
amr_report_blastp += " -gff " + gff + gff_prot_match + gff_dna_match + annotS;
1041+
amr_report_blastp += " -gff " + gff_flat + gff_prot_match + gff_dna_match + annotS;
10081042
}
10091043

10101044

10111045
if (! emptyArg (dna))
10121046
{
1013-
const bool blastn = ! organism1. empty () && fileExists (db + "/AMR_DNA-" + organism1);
1014-
if (getFileSize (unQuote (dna)))
1047+
if (getFileSize (unQuote (dna_flat)))
10151048
{
10161049
size_t nDna = 0;
10171050
size_t dnaLen_max = 0;
10181051
size_t dnaLen_total = 0;
1019-
EXEC_ASSERT (fastaCheck (dna, false, qcS, logFName, nDna, dnaLen_max, dnaLen_total));
1052+
EXEC_ASSERT (fastaCheck (dna_flat, false, qcS, logFName, nDna, dnaLen_max, dnaLen_total));
10201053
const string blastx (/*"tblastn"*/ dnaLen_max > 100000 ? "tblastn" : "blastx"); // PAR // SB-3643
10211054

10221055
stderr. section ("Running " + blastx);
@@ -1028,14 +1061,14 @@ struct ThisApplication : ShellApplication
10281061
const string blastx_par (blastp_par + " -word_size 3 -query_gencode " + to_string (gencode));
10291062
ASSERT (threads_max >= 1);
10301063
if (blastx == "blastx")
1031-
exec (fullProg ("blastx") + " -query " + dna + " -db " + tmp + "/db/AMRProt" + " "
1064+
exec (fullProg ("blastx") + " -query " + dna_flat + " -db " + tmp + "/db/AMRProt" + " "
10321065
+ blastx_par + " " BLAST_FMT " " + get_num_threads_param ("blastx", min (nDna, dnaLen_total / 10002))
10331066
+ " -out " + tmp + "/blastx > /dev/null 2> " + tmp + "/blastx-err", tmp + "/blastx-err");
10341067
else
10351068
{
10361069
ASSERT (blastx == "tblastn");
10371070
findProg ("makeblastdb");
1038-
exec (fullProg ("makeblastdb") + " -in " + dna + " -out " + tmp + "/nucl" + " -dbtype nucl -logfile " + tmp + "/makeblastdb.log", tmp + "/makeblastdb.log");
1071+
exec (fullProg ("makeblastdb") + " -in " + dna_flat + " -out " + tmp + "/nucl" + " -dbtype nucl -logfile " + tmp + "/makeblastdb.log", tmp + "/makeblastdb.log");
10391072
if (threads_max > 1)
10401073
{
10411074
createDirectory (tmp + "/AMRProt_chunk");
@@ -1061,7 +1094,7 @@ struct ThisApplication : ShellApplication
10611094
findProg ("blastn");
10621095
stderr. section ("Running blastn");
10631096
const Chronometer_OnePass cop ("blastn", cerr, false, qc_on && ! quiet);
1064-
exec (fullProg ("blastn") + " -query " + dna + " -db " + tmp + "/db/AMR_DNA-" + organism1 + " -evalue 1e-20 -dust no -max_target_seqs 10000 "
1097+
exec (fullProg ("blastn") + " -query " + dna_flat + " -db " + tmp + "/db/AMR_DNA-" + organism1 + " -evalue 1e-20 -dust no -max_target_seqs 10000 "
10651098
+ get_num_threads_param ("blastn", min (nDna, dnaLen_total / 2500000)) + " " BLAST_FMT " -out " + tmp + "/blastn > " + logFName + " 2> " + tmp + "/blastn-err", tmp + "/blastn-err");
10661099
}
10671100
}
@@ -1127,10 +1160,7 @@ struct ThisApplication : ShellApplication
11271160
+ ifS (suppress_common, " -suppress_prot " + tmp + "/suppress_prot")
11281161
+ nameS + qcS + " " + parm + " -log " + logFName + " > " + tmp + "/amr", logFName);
11291162
}
1130-
if ( ! emptyArg (dna)
1131-
&& ! organism1. empty ()
1132-
&& fileExists (db + "/AMR_DNA-" + organism1)
1133-
)
1163+
if (blastn)
11341164
{
11351165
const Chronometer_OnePass cop ("dna_mutation", cerr, false, qc_on && ! quiet);
11361166
const string mutation_allS (mutation_all. empty () ? "" : ("-mutation_all " + tmp + "/mutation_all.dna"));
@@ -1188,7 +1218,7 @@ struct ThisApplication : ShellApplication
11881218
if (! emptyArg (dna_out))
11891219
{
11901220
prepare_fasta_extract (StringVector {"Contig id", "Start", "Stop", "Strand", "Gene symbol", "Sequence name"}, "dna_out", false);
1191-
exec (fullProg ("fasta_extract") + dna + " " + tmp + "/dna_out" + qcS + " -log " + logFName + " > " + dna_out, logFName);
1221+
exec (fullProg ("fasta_extract") + dna_flat + " " + tmp + "/dna_out" + qcS + " -log " + logFName + " > " + dna_out, logFName);
11921222
}
11931223
if (! emptyArg (dnaFlank5_out))
11941224
{
@@ -1204,7 +1234,7 @@ struct ThisApplication : ShellApplication
12041234
t. saveHeader = false;
12051235
t. qc ();
12061236
t. saveFile (tmp + "/dnaFlank5_out");
1207-
exec (fullProg ("fasta_extract") + dna + " " + tmp + "/dnaFlank5_out" + qcS + " -log " + logFName + " > " + dnaFlank5_out, logFName);
1237+
exec (fullProg ("fasta_extract") + dna_flat + " " + tmp + "/dnaFlank5_out" + qcS + " -log " + logFName + " > " + dnaFlank5_out, logFName);
12081238
}
12091239

12101240

0 commit comments

Comments
 (0)