Skip to content

Commit 86af6c8

Browse files
committed
AMRFinderPlus DM: Make reentrant
Installing databases with the AMRFinder Data Manager will cause multiple entries into the .loc file with the same name. This does not make any sense - if the database already exists, it should not be duplicated in the loc file.
1 parent 314b6fc commit 86af6c8

3 files changed

Lines changed: 58 additions & 5 deletions

File tree

data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from ftplib import FTP
66
from io import BytesIO
77
from pathlib import Path
8+
from typing import Optional, Set
89

910
import pandas as pd
1011

@@ -20,11 +21,13 @@ def __init__(
2021
db_name="amrfinderplus-db",
2122
amrfinderplus_version="latest",
2223
date_version=None,
24+
known_databases: Optional[Set[str]] = None,
2325
):
2426
self.data_table_name = amrfinderplus_database
2527
self._db_name = db_name
2628
self._amrfinderplus_version = amrfinderplus_version
2729
self._amrfinderplus_date_version = date_version
30+
self._known_databases = known_databases or set()
2831
self.data_table_entry = None
2932
self.amrfinderplus_table_list = None
3033

@@ -49,6 +52,11 @@ def get_data_manager(self):
4952
amrfinderplus_name = (
5053
f"V{self._amrfinderplus_version}" f"-{self._amrfinderplus_date_version}"
5154
)
55+
# Galaxy data table values are unique row identifiers. If this value is
56+
# already present, do not emit another row for the same database.
57+
if amrfinderplus_value in self._known_databases:
58+
self.amrfinderplus_table_list["data_tables"][self.data_table_name] = []
59+
return self.amrfinderplus_table_list
5260
data_info = dict(
5361
value=amrfinderplus_value,
5462
name=amrfinderplus_name,
@@ -79,9 +87,10 @@ def __init__(
7987
date_version=None,
8088
amrfinderplus_db_path=None,
8189
test_mode=False,
90+
known_databases: Optional[Set[str]] = None,
8291
):
8392

84-
super().__init__()
93+
super().__init__(known_databases=known_databases)
8594
self.json_file_path = json_file_path
8695
self._output_dir = output_dir
8796
self._ncbi_ftp_url = ncbi_url
@@ -98,6 +107,17 @@ def __init__(
98107
self.test_mode = test_mode
99108
self.amrfinderplus_db_path = amrfinderplus_db_path
100109

110+
@property
111+
def amrfinderplus_value(self) -> str:
112+
return (
113+
f"amrfinderplus_V{self._amrfinderplus_version}"
114+
f"_{self._amrfinderplus_date_version}"
115+
)
116+
117+
@property
118+
def is_known_database(self) -> bool:
119+
return self.amrfinderplus_value in self._known_databases
120+
101121
@staticmethod
102122
def subprocess_cmd(command, *args):
103123
"""
@@ -116,6 +136,10 @@ def download_amrfinderplus_db(self):
116136
"""
117137
Download the amrfinderplus database from the ncbi ftp server
118138
"""
139+
# Avoid overwriting files or appending duplicate .loc rows when this
140+
# database value is already registered in Galaxy.
141+
if self.is_known_database:
142+
return
119143
self.amrfinderplus_db_path = f"{self._output_dir}/{self._db_name}"
120144
os.makedirs(self.amrfinderplus_db_path)
121145

@@ -170,6 +194,8 @@ def make_hmm_profile(self):
170194
"""
171195
Make the hmm profile using the AMR.LIB file previously download
172196
"""
197+
if self.is_known_database:
198+
return
173199
hmm_file = Path(f"{self.amrfinderplus_db_path}/AMR.LIB")
174200
if Path.exists(hmm_file) and self.test_mode is False:
175201
self.subprocess_cmd("hmmpress", "-f", hmm_file)
@@ -208,6 +234,8 @@ def make_blastdb(self):
208234
"""
209235
Index fasta file for blast
210236
"""
237+
if self.is_known_database:
238+
return
211239
self.extract_filelist_makeblast()
212240
if self._amrfinderplus_version == "3.12":
213241
nucl_file_db_list = [
@@ -301,6 +329,11 @@ def parse_arguments():
301329
action="store_true",
302330
help="option to test the script with an lighted database",
303331
)
332+
arg_parser.add_argument(
333+
"--known_databases",
334+
default="",
335+
help="comma-separated list of installed amrfinderplus database values",
336+
)
304337
return arg_parser.parse_args()
305338

306339

@@ -311,6 +344,7 @@ def main():
311344
date_version=all_args.db_date,
312345
json_file_path=all_args.data_manager_json,
313346
test_mode=all_args.test,
347+
known_databases=set(filter(None, all_args.known_databases.split(","))),
314348
)
315349
amrfinderplus_download.read_json_input_file()
316350
amrfinderplus_download.download_amrfinderplus_db()

data_managers/data_manager_build_amrfinderplus/data_manager/data_manager_build_amrfinderplus.xml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,21 @@
66
<expand macro="requirements"/>
77
<command detect_errors="exit_code">
88
<![CDATA[
9+
#set $data_table = $__app__.tool_data_tables.get('amrfinderplus_versioned_database')
10+
#if $data_table is not None and len($data_table.get_fields()) != 0
11+
## Pass installed values to the script so rerunning the data manager
12+
## does not append duplicate rows for an already-installed database.
13+
#set $known_databases = '--known_databases=' + ','.join([row[0] for row in $data_table.get_fields()])
14+
#else
15+
#set $known_databases = ''
16+
#end if
917
python '$__tool_directory__/data_manager_build_amrfinderplus.py'
1018
'$output_file'
1119
--db_version '$database_list.database_version_select'
1220
#if $database_list.database_version_select != 'latest':
1321
--db_date '$database_list.database_date_select'
1422
#end if
23+
$known_databases
1524
$test_data_manager
1625
]]></command>
1726
<inputs>
@@ -49,7 +58,19 @@ python '$__tool_directory__/data_manager_build_amrfinderplus.py'
4958
</assert_contents>
5059
</output>
5160
</test>
52-
<!-- Test_2 DB 3.12 2024-01-31.1 -->
61+
<!-- Test_2 already installed DB is not added again -->
62+
<test expect_num_outputs="1">
63+
<param name="test_data_manager" value="--known_databases amrfinderplus_V3.12_2024-05-02.2 --test"/>
64+
<output name="output_file">
65+
<assert_contents>
66+
<has_n_lines n="1"/>
67+
<has_text text="{&quot;data_tables&quot;"/>
68+
<has_text text="amrfinderplus_versioned_database"/>
69+
<has_text text='"amrfinderplus_versioned_database": []'/>
70+
</assert_contents>
71+
</output>
72+
</test>
73+
<!-- Test_3 DB 3.12 2024-01-31.1 -->
5374
<test expect_num_outputs="1">
5475
<param name="test_data_manager" value="--test"/>
5576
<conditional name="database_list">
@@ -66,7 +87,7 @@ python '$__tool_directory__/data_manager_build_amrfinderplus.py'
6687
</assert_contents>
6788
</output>
6889
</test>
69-
<!-- Test_3 DB 4.0 2025-07-16.1 -->
90+
<!-- Test_4 DB 4.0 2025-07-16.1 -->
7091
<test expect_num_outputs="1">
7192
<param name="test_data_manager" value="--test"/>
7293
<conditional name="database_list">

data_managers/data_manager_build_amrfinderplus/test-data/amrfinderplus_versioned.loc.test

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,4 @@
55
#
66
# for example
77
amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 amrfinderplus-db
8-
amrfinderplus_V3.12_2024-01-31.1 V3.12-2024-01-31.1 3.12 amrfinderplus-db
98
amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 /private/var/folders/7j/21czvpk170zcyjdbqrp0hmsh0000gn/T/tmp3d_yq23a/galaxy-dev/tool-data/amrfinderplus-db/amrfinderplus_V3.12_2024-05-02.2
10-
amrfinderplus_V3.12_2024-01-31.1 V3.12-2024-01-31.1 3.12 /private/var/folders/7j/21czvpk170zcyjdbqrp0hmsh0000gn/T/tmp3d_yq23a/galaxy-dev/tool-data/amrfinderplus-db/amrfinderplus_V3.12_2024-01-31.1

0 commit comments

Comments
 (0)