diff --git a/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.py b/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.py
index 683310b5da1..6c5a5458265 100644
--- a/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.py
+++ b/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.py
@@ -5,6 +5,7 @@
from ftplib import FTP
from io import BytesIO
from pathlib import Path
+from typing import Optional, Set
import pandas as pd
@@ -20,11 +21,13 @@ def __init__(
db_name="amrfinderplus-db",
amrfinderplus_version="latest",
date_version=None,
+ known_databases: Optional[Set[str]] = None,
):
self.data_table_name = amrfinderplus_database
self._db_name = db_name
self._amrfinderplus_version = amrfinderplus_version
self._amrfinderplus_date_version = date_version
+ self._known_databases = known_databases or set()
self.data_table_entry = None
self.amrfinderplus_table_list = None
@@ -49,6 +52,11 @@ def get_data_manager(self):
amrfinderplus_name = (
f"V{self._amrfinderplus_version}" f"-{self._amrfinderplus_date_version}"
)
+ # Galaxy data table values are unique row identifiers. If this value is
+ # already present, do not emit another row for the same database.
+ if amrfinderplus_value in self._known_databases:
+ self.amrfinderplus_table_list["data_tables"][self.data_table_name] = []
+ return self.amrfinderplus_table_list
data_info = dict(
value=amrfinderplus_value,
name=amrfinderplus_name,
@@ -79,9 +87,10 @@ def __init__(
date_version=None,
amrfinderplus_db_path=None,
test_mode=False,
+ known_databases: Optional[Set[str]] = None,
):
- super().__init__()
+ super().__init__(known_databases=known_databases)
self.json_file_path = json_file_path
self._output_dir = output_dir
self._ncbi_ftp_url = ncbi_url
@@ -98,6 +107,17 @@ def __init__(
self.test_mode = test_mode
self.amrfinderplus_db_path = amrfinderplus_db_path
+ @property
+ def amrfinderplus_value(self) -> str:
+ return (
+ f"amrfinderplus_V{self._amrfinderplus_version}"
+ f"_{self._amrfinderplus_date_version}"
+ )
+
+ @property
+ def is_known_database(self) -> bool:
+ return self.amrfinderplus_value in self._known_databases
+
@staticmethod
def subprocess_cmd(command, *args):
"""
@@ -108,14 +128,22 @@ def subprocess_cmd(command, *args):
"""
cmd = [command]
[cmd.append(i) for i in args]
- proc = sp.run(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
+ proc = sp.run(cmd, stdout=sp.PIPE, stderr=sp.PIPE, text=True)
if proc.returncode != 0:
- print(f"Error type {proc.returncode} with : \n {proc}")
+ raise RuntimeError(
+ f"Command failed with exit code {proc.returncode}: {' '.join(cmd)}\n"
+ f"stdout:\n{proc.stdout}\n"
+ f"stderr:\n{proc.stderr}"
+ )
def download_amrfinderplus_db(self):
"""
Download the amrfinderplus database from the ncbi ftp server
"""
+ # Avoid overwriting files or appending duplicate .loc rows when this
+ # database value is already registered in Galaxy.
+ if self.is_known_database:
+ return
self.amrfinderplus_db_path = f"{self._output_dir}/{self._db_name}"
os.makedirs(self.amrfinderplus_db_path)
@@ -170,6 +198,8 @@ def make_hmm_profile(self):
"""
Make the hmm profile using the AMR.LIB file previously download
"""
+ if self.is_known_database:
+ return
hmm_file = Path(f"{self.amrfinderplus_db_path}/AMR.LIB")
if Path.exists(hmm_file) and self.test_mode is False:
self.subprocess_cmd("hmmpress", "-f", hmm_file)
@@ -208,6 +238,8 @@ def make_blastdb(self):
"""
Index fasta file for blast
"""
+ if self.is_known_database:
+ return
self.extract_filelist_makeblast()
if self._amrfinderplus_version == "3.12":
nucl_file_db_list = [
@@ -301,6 +333,11 @@ def parse_arguments():
action="store_true",
help="option to test the script with an lighted database",
)
+ arg_parser.add_argument(
+ "--known_databases",
+ default="",
+ help="comma-separated list of installed amrfinderplus database values",
+ )
return arg_parser.parse_args()
@@ -311,6 +348,7 @@ def main():
date_version=all_args.db_date,
json_file_path=all_args.data_manager_json,
test_mode=all_args.test,
+ known_databases=set(filter(None, all_args.known_databases.split(","))),
)
amrfinderplus_download.read_json_input_file()
amrfinderplus_download.download_amrfinderplus_db()
diff --git a/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.xml b/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.xml
index 67b96f70d7e..9cfcb6f9e80 100644
--- a/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.xml
+++ b/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.xml
@@ -6,12 +6,21 @@
@@ -38,18 +47,38 @@ python '$__tool_directory__/data_manager_build_amrfinderplus.py'
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
@@ -66,7 +95,7 @@ python '$__tool_directory__/data_manager_build_amrfinderplus.py'
-
+
diff --git a/data_managers/data_manager_build_amrfinderplus/data_manager/macro.xml b/data_managers/data_manager_build_amrfinderplus/data_manager/macro.xml
index 8812c313b5d..8ae9b671f51 100644
--- a/data_managers/data_manager_build_amrfinderplus/data_manager/macro.xml
+++ b/data_managers/data_manager_build_amrfinderplus/data_manager/macro.xml
@@ -3,7 +3,7 @@
4.0.23
3.11.14
2.3.3
- 0
+ 1
21.05
diff --git a/data_managers/data_manager_build_amrfinderplus/data_manager_conf.xml b/data_managers/data_manager_build_amrfinderplus/data_manager_conf.xml
index 490abdd3a7b..3ff41174670 100644
--- a/data_managers/data_manager_build_amrfinderplus/data_manager_conf.xml
+++ b/data_managers/data_manager_build_amrfinderplus/data_manager_conf.xml
@@ -5,6 +5,7 @@
-
+
+
+
+
+
+
+
+
diff --git a/tools/amrfinderplus/macro.xml b/tools/amrfinderplus/macro.xml
index d512edc32cb..4588d8812b7 100644
--- a/tools/amrfinderplus/macro.xml
+++ b/tools/amrfinderplus/macro.xml
@@ -1,6 +1,6 @@
3.12.8
- 0
+ 1
21.05
diff --git a/tools/amrfinderplus/test-data/amrfinderplus_versioned.loc b/tools/amrfinderplus/test-data/amrfinderplus_versioned.loc
index a11d9c5d17b..1f6052f0892 100644
--- a/tools/amrfinderplus/test-data/amrfinderplus_versioned.loc
+++ b/tools/amrfinderplus/test-data/amrfinderplus_versioned.loc
@@ -4,4 +4,6 @@
# value, name, db_version, path
#
# for example
+# duplicate rows intentionally exercise the wrapper's unique_value filter
+amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 ${__HERE__}/test-db
amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 ${__HERE__}/test-db