diff --git a/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.py b/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.py index 683310b5da1..6c5a5458265 100644 --- a/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.py +++ b/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.py @@ -5,6 +5,7 @@ from ftplib import FTP from io import BytesIO from pathlib import Path +from typing import Optional, Set import pandas as pd @@ -20,11 +21,13 @@ def __init__( db_name="amrfinderplus-db", amrfinderplus_version="latest", date_version=None, + known_databases: Optional[Set[str]] = None, ): self.data_table_name = amrfinderplus_database self._db_name = db_name self._amrfinderplus_version = amrfinderplus_version self._amrfinderplus_date_version = date_version + self._known_databases = known_databases or set() self.data_table_entry = None self.amrfinderplus_table_list = None @@ -49,6 +52,11 @@ def get_data_manager(self): amrfinderplus_name = ( f"V{self._amrfinderplus_version}" f"-{self._amrfinderplus_date_version}" ) + # Galaxy data table values are unique row identifiers. If this value is + # already present, do not emit another row for the same database. + if amrfinderplus_value in self._known_databases: + self.amrfinderplus_table_list["data_tables"][self.data_table_name] = [] + return self.amrfinderplus_table_list data_info = dict( value=amrfinderplus_value, name=amrfinderplus_name, @@ -79,9 +87,10 @@ def __init__( date_version=None, amrfinderplus_db_path=None, test_mode=False, + known_databases: Optional[Set[str]] = None, ): - super().__init__() + super().__init__(known_databases=known_databases) self.json_file_path = json_file_path self._output_dir = output_dir self._ncbi_ftp_url = ncbi_url @@ -98,6 +107,17 @@ def __init__( self.test_mode = test_mode self.amrfinderplus_db_path = amrfinderplus_db_path + @property + def amrfinderplus_value(self) -> str: + return ( + f"amrfinderplus_V{self._amrfinderplus_version}" + f"_{self._amrfinderplus_date_version}" + ) + + @property + def is_known_database(self) -> bool: + return self.amrfinderplus_value in self._known_databases + @staticmethod def subprocess_cmd(command, *args): """ @@ -108,14 +128,22 @@ def subprocess_cmd(command, *args): """ cmd = [command] [cmd.append(i) for i in args] - proc = sp.run(cmd, stdout=sp.PIPE, stderr=sp.PIPE) + proc = sp.run(cmd, stdout=sp.PIPE, stderr=sp.PIPE, text=True) if proc.returncode != 0: - print(f"Error type {proc.returncode} with : \n {proc}") + raise RuntimeError( + f"Command failed with exit code {proc.returncode}: {' '.join(cmd)}\n" + f"stdout:\n{proc.stdout}\n" + f"stderr:\n{proc.stderr}" + ) def download_amrfinderplus_db(self): """ Download the amrfinderplus database from the ncbi ftp server """ + # Avoid overwriting files or appending duplicate .loc rows when this + # database value is already registered in Galaxy. + if self.is_known_database: + return self.amrfinderplus_db_path = f"{self._output_dir}/{self._db_name}" os.makedirs(self.amrfinderplus_db_path) @@ -170,6 +198,8 @@ def make_hmm_profile(self): """ Make the hmm profile using the AMR.LIB file previously download """ + if self.is_known_database: + return hmm_file = Path(f"{self.amrfinderplus_db_path}/AMR.LIB") if Path.exists(hmm_file) and self.test_mode is False: self.subprocess_cmd("hmmpress", "-f", hmm_file) @@ -208,6 +238,8 @@ def make_blastdb(self): """ Index fasta file for blast """ + if self.is_known_database: + return self.extract_filelist_makeblast() if self._amrfinderplus_version == "3.12": nucl_file_db_list = [ @@ -301,6 +333,11 @@ def parse_arguments(): action="store_true", help="option to test the script with an lighted database", ) + arg_parser.add_argument( + "--known_databases", + default="", + help="comma-separated list of installed amrfinderplus database values", + ) return arg_parser.parse_args() @@ -311,6 +348,7 @@ def main(): date_version=all_args.db_date, json_file_path=all_args.data_manager_json, test_mode=all_args.test, + known_databases=set(filter(None, all_args.known_databases.split(","))), ) amrfinderplus_download.read_json_input_file() amrfinderplus_download.download_amrfinderplus_db() diff --git a/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.xml b/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.xml index 67b96f70d7e..9cfcb6f9e80 100644 --- a/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.xml +++ b/data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.xml @@ -6,12 +6,21 @@ @@ -38,18 +47,38 @@ python '$__tool_directory__/data_manager_build_amrfinderplus.py' - + + + + + + + + + + + + + + + + + + + + + - + @@ -66,7 +95,7 @@ python '$__tool_directory__/data_manager_build_amrfinderplus.py' - + diff --git a/data_managers/data_manager_build_amrfinderplus/data_manager/macro.xml b/data_managers/data_manager_build_amrfinderplus/data_manager/macro.xml index 8812c313b5d..8ae9b671f51 100644 --- a/data_managers/data_manager_build_amrfinderplus/data_manager/macro.xml +++ b/data_managers/data_manager_build_amrfinderplus/data_manager/macro.xml @@ -3,7 +3,7 @@ 4.0.23 3.11.14 2.3.3 - 0 + 1 21.05 diff --git a/data_managers/data_manager_build_amrfinderplus/data_manager_conf.xml b/data_managers/data_manager_build_amrfinderplus/data_manager_conf.xml index 490abdd3a7b..3ff41174670 100644 --- a/data_managers/data_manager_build_amrfinderplus/data_manager_conf.xml +++ b/data_managers/data_manager_build_amrfinderplus/data_manager_conf.xml @@ -5,6 +5,7 @@ + ${path} diff --git a/data_managers/data_manager_build_amrfinderplus/test-data/amrfinderplus_versioned.loc.test b/data_managers/data_manager_build_amrfinderplus/test-data/amrfinderplus_versioned.loc.test index 71304a68752..3982b1474a2 100644 --- a/data_managers/data_manager_build_amrfinderplus/test-data/amrfinderplus_versioned.loc.test +++ b/data_managers/data_manager_build_amrfinderplus/test-data/amrfinderplus_versioned.loc.test @@ -5,6 +5,4 @@ # # for example amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 amrfinderplus-db -amrfinderplus_V3.12_2024-01-31.1 V3.12-2024-01-31.1 3.12 amrfinderplus-db amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 /private/var/folders/7j/21czvpk170zcyjdbqrp0hmsh0000gn/T/tmp3d_yq23a/galaxy-dev/tool-data/amrfinderplus-db/amrfinderplus_V3.12_2024-05-02.2 -amrfinderplus_V3.12_2024-01-31.1 V3.12-2024-01-31.1 3.12 /private/var/folders/7j/21czvpk170zcyjdbqrp0hmsh0000gn/T/tmp3d_yq23a/galaxy-dev/tool-data/amrfinderplus-db/amrfinderplus_V3.12_2024-01-31.1 diff --git a/tools/amrfinderplus/amrfinderplus.xml b/tools/amrfinderplus/amrfinderplus.xml index d0d58ed1b4f..fdde43407bb 100644 --- a/tools/amrfinderplus/amrfinderplus.xml +++ b/tools/amrfinderplus/amrfinderplus.xml @@ -84,6 +84,7 @@ + @@ -230,7 +231,20 @@ - + + +
+ + + + + +
+ + + + +
diff --git a/tools/amrfinderplus/macro.xml b/tools/amrfinderplus/macro.xml index d512edc32cb..4588d8812b7 100644 --- a/tools/amrfinderplus/macro.xml +++ b/tools/amrfinderplus/macro.xml @@ -1,6 +1,6 @@ 3.12.8 - 0 + 1 21.05 diff --git a/tools/amrfinderplus/test-data/amrfinderplus_versioned.loc b/tools/amrfinderplus/test-data/amrfinderplus_versioned.loc index a11d9c5d17b..1f6052f0892 100644 --- a/tools/amrfinderplus/test-data/amrfinderplus_versioned.loc +++ b/tools/amrfinderplus/test-data/amrfinderplus_versioned.loc @@ -4,4 +4,6 @@ # value, name, db_version, path # # for example +# duplicate rows intentionally exercise the wrapper's unique_value filter +amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 ${__HERE__}/test-db amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 ${__HERE__}/test-db