55from ftplib import FTP
66from io import BytesIO
77from pathlib import Path
8+ from typing import Optional , Set
89
910import pandas as pd
1011
@@ -20,11 +21,13 @@ def __init__(
2021 db_name = "amrfinderplus-db" ,
2122 amrfinderplus_version = "latest" ,
2223 date_version = None ,
24+ known_databases : Optional [Set [str ]] = None ,
2325 ):
2426 self .data_table_name = amrfinderplus_database
2527 self ._db_name = db_name
2628 self ._amrfinderplus_version = amrfinderplus_version
2729 self ._amrfinderplus_date_version = date_version
30+ self ._known_databases = known_databases or set ()
2831 self .data_table_entry = None
2932 self .amrfinderplus_table_list = None
3033
@@ -49,6 +52,11 @@ def get_data_manager(self):
4952 amrfinderplus_name = (
5053 f"V{ self ._amrfinderplus_version } " f"-{ self ._amrfinderplus_date_version } "
5154 )
55+ # Galaxy data table values are unique row identifiers. If this value is
56+ # already present, do not emit another row for the same database.
57+ if amrfinderplus_value in self ._known_databases :
58+ self .amrfinderplus_table_list ["data_tables" ][self .data_table_name ] = []
59+ return self .amrfinderplus_table_list
5260 data_info = dict (
5361 value = amrfinderplus_value ,
5462 name = amrfinderplus_name ,
@@ -79,9 +87,10 @@ def __init__(
7987 date_version = None ,
8088 amrfinderplus_db_path = None ,
8189 test_mode = False ,
90+ known_databases : Optional [Set [str ]] = None ,
8291 ):
8392
84- super ().__init__ ()
93+ super ().__init__ (known_databases = known_databases )
8594 self .json_file_path = json_file_path
8695 self ._output_dir = output_dir
8796 self ._ncbi_ftp_url = ncbi_url
@@ -98,6 +107,17 @@ def __init__(
98107 self .test_mode = test_mode
99108 self .amrfinderplus_db_path = amrfinderplus_db_path
100109
110+ @property
111+ def amrfinderplus_value (self ) -> str :
112+ return (
113+ f"amrfinderplus_V{ self ._amrfinderplus_version } "
114+ f"_{ self ._amrfinderplus_date_version } "
115+ )
116+
117+ @property
118+ def is_known_database (self ) -> bool :
119+ return self .amrfinderplus_value in self ._known_databases
120+
101121 @staticmethod
102122 def subprocess_cmd (command , * args ):
103123 """
@@ -116,6 +136,10 @@ def download_amrfinderplus_db(self):
116136 """
117137 Download the amrfinderplus database from the ncbi ftp server
118138 """
139+ # Avoid overwriting files or appending duplicate .loc rows when this
140+ # database value is already registered in Galaxy.
141+ if self .is_known_database :
142+ return
119143 self .amrfinderplus_db_path = f"{ self ._output_dir } /{ self ._db_name } "
120144 os .makedirs (self .amrfinderplus_db_path )
121145
@@ -170,6 +194,8 @@ def make_hmm_profile(self):
170194 """
171195 Make the hmm profile using the AMR.LIB file previously download
172196 """
197+ if self .is_known_database :
198+ return
173199 hmm_file = Path (f"{ self .amrfinderplus_db_path } /AMR.LIB" )
174200 if Path .exists (hmm_file ) and self .test_mode is False :
175201 self .subprocess_cmd ("hmmpress" , "-f" , hmm_file )
@@ -208,6 +234,8 @@ def make_blastdb(self):
208234 """
209235 Index fasta file for blast
210236 """
237+ if self .is_known_database :
238+ return
211239 self .extract_filelist_makeblast ()
212240 if self ._amrfinderplus_version == "3.12" :
213241 nucl_file_db_list = [
@@ -301,6 +329,11 @@ def parse_arguments():
301329 action = "store_true" ,
302330 help = "option to test the script with an lighted database" ,
303331 )
332+ arg_parser .add_argument (
333+ "--known_databases" ,
334+ default = "" ,
335+ help = "comma-separated list of installed amrfinderplus database values" ,
336+ )
304337 return arg_parser .parse_args ()
305338
306339
@@ -311,6 +344,7 @@ def main():
311344 date_version = all_args .db_date ,
312345 json_file_path = all_args .data_manager_json ,
313346 test_mode = all_args .test ,
347+ known_databases = set (filter (None , all_args .known_databases .split ("," ))),
314348 )
315349 amrfinderplus_download .read_json_input_file ()
316350 amrfinderplus_download .download_amrfinderplus_db ()
0 commit comments