Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from ftplib import FTP
from io import BytesIO
from pathlib import Path
from typing import Optional, Set

import pandas as pd

Expand All @@ -20,11 +21,13 @@ def __init__(
db_name="amrfinderplus-db",
amrfinderplus_version="latest",
date_version=None,
known_databases: Optional[Set[str]] = None,
):
self.data_table_name = amrfinderplus_database
self._db_name = db_name
self._amrfinderplus_version = amrfinderplus_version
self._amrfinderplus_date_version = date_version
self._known_databases = known_databases or set()
self.data_table_entry = None
self.amrfinderplus_table_list = None

Expand All @@ -49,6 +52,11 @@ def get_data_manager(self):
amrfinderplus_name = (
f"V{self._amrfinderplus_version}" f"-{self._amrfinderplus_date_version}"
)
# Galaxy data table values are unique row identifiers. If this value is
# already present, do not emit another row for the same database.
if amrfinderplus_value in self._known_databases:
self.amrfinderplus_table_list["data_tables"][self.data_table_name] = []
return self.amrfinderplus_table_list
data_info = dict(
value=amrfinderplus_value,
name=amrfinderplus_name,
Expand Down Expand Up @@ -79,9 +87,10 @@ def __init__(
date_version=None,
amrfinderplus_db_path=None,
test_mode=False,
known_databases: Optional[Set[str]] = None,
):

super().__init__()
super().__init__(known_databases=known_databases)
self.json_file_path = json_file_path
self._output_dir = output_dir
self._ncbi_ftp_url = ncbi_url
Expand All @@ -98,6 +107,17 @@ def __init__(
self.test_mode = test_mode
self.amrfinderplus_db_path = amrfinderplus_db_path

@property
def amrfinderplus_value(self) -> str:
return (
f"amrfinderplus_V{self._amrfinderplus_version}"
f"_{self._amrfinderplus_date_version}"
)

@property
def is_known_database(self) -> bool:
return self.amrfinderplus_value in self._known_databases

@staticmethod
def subprocess_cmd(command, *args):
"""
Expand All @@ -116,6 +136,10 @@ def download_amrfinderplus_db(self):
"""
Download the amrfinderplus database from the ncbi ftp server
"""
# Avoid overwriting files or appending duplicate .loc rows when this
# database value is already registered in Galaxy.
if self.is_known_database:
return
self.amrfinderplus_db_path = f"{self._output_dir}/{self._db_name}"
os.makedirs(self.amrfinderplus_db_path)

Expand Down Expand Up @@ -170,6 +194,8 @@ def make_hmm_profile(self):
"""
Make the hmm profile using the AMR.LIB file previously download
"""
if self.is_known_database:
return
hmm_file = Path(f"{self.amrfinderplus_db_path}/AMR.LIB")
if Path.exists(hmm_file) and self.test_mode is False:
self.subprocess_cmd("hmmpress", "-f", hmm_file)
Expand Down Expand Up @@ -208,6 +234,8 @@ def make_blastdb(self):
"""
Index fasta file for blast
"""
if self.is_known_database:
return
self.extract_filelist_makeblast()
if self._amrfinderplus_version == "3.12":
nucl_file_db_list = [
Expand Down Expand Up @@ -301,6 +329,11 @@ def parse_arguments():
action="store_true",
help="option to test the script with an lighted database",
)
arg_parser.add_argument(
"--known_databases",
default="",
help="comma-separated list of installed amrfinderplus database values",
)
return arg_parser.parse_args()


Expand All @@ -311,6 +344,7 @@ def main():
date_version=all_args.db_date,
json_file_path=all_args.data_manager_json,
test_mode=all_args.test,
known_databases=set(filter(None, all_args.known_databases.split(","))),
)
amrfinderplus_download.read_json_input_file()
amrfinderplus_download.download_amrfinderplus_db()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,21 @@
<expand macro="requirements"/>
<command detect_errors="exit_code">
<![CDATA[
#set $data_table = $__app__.tool_data_tables.get('amrfinderplus_versioned_database')
#if $data_table is not None and len($data_table.get_fields()) != 0
## Pass installed values to the script so rerunning the data manager
## does not append duplicate rows for an already-installed database.
#set $known_databases = '--known_databases=' + ','.join([row[0] for row in $data_table.get_fields()])
#else
#set $known_databases = ''
#end if
python '$__tool_directory__/data_manager_build_amrfinderplus.py'
'$output_file'
--db_version '$database_list.database_version_select'
#if $database_list.database_version_select != 'latest':
--db_date '$database_list.database_date_select'
#end if
$known_databases
$test_data_manager
]]></command>
<inputs>
Expand All @@ -38,9 +47,13 @@ python '$__tool_directory__/data_manager_build_amrfinderplus.py'
<data name="output_file" format="data_manager_json"/>
</outputs>
<tests>
<!-- Test_1 DB latest -->
<!-- Test_1 DB 3.12 2024-05-02.2 -->
<test expect_num_outputs="1">
<param name="test_data_manager" value="--test"/>
<conditional name="database_list">
<param name="database_version_select" value="3.12"/>
<param name="database_date_select" value="2024-05-02.2"/>
</conditional>
<output name="output_file">
<assert_contents>
<has_n_lines n="1"/>
Expand All @@ -49,37 +62,36 @@ python '$__tool_directory__/data_manager_build_amrfinderplus.py'
</assert_contents>
</output>
</test>
<!-- Test_2 DB 3.12 2024-01-31.1 -->
<!-- Test_2 already installed DB is not added again -->
<test expect_num_outputs="1">
<param name="test_data_manager" value="--test"/>
<param name="test_data_manager" value="--known_databases amrfinderplus_V3.12_2024-05-02.2 --test"/>
<conditional name="database_list">
<param name="database_version_select" value="3.12"/>
<param name="database_date_select" value="2024-01-31.1"/>
<param name="database_date_select" value="2024-05-02.2"/>
</conditional>
<output name="output_file">
<assert_contents>
<has_n_lines n="1"/>
<has_text text="{&quot;data_tables&quot;"/>
<has_text text="amrfinderplus_versioned_database"/>
<has_text text='"name": "V3.12-2024-01-31.1"'/>
<has_text text='"db_version": "3.12"'/>
<has_text text='"amrfinderplus_versioned_database": []'/>
</assert_contents>
</output>
</test>
<!-- Test_3 DB 4.0 2025-07-16.1 -->
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why can the DM not download the latest DB, even if the tool is not wrapped yet, the download should work

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, that should still be tested. I'll add one.

<!-- Test_3 DB 3.12 2024-01-31.1 -->
<test expect_num_outputs="1">
<param name="test_data_manager" value="--test"/>
<conditional name="database_list">
<param name="database_version_select" value="4.0"/>
<param name="database_date_select" value="2025-07-16.1"/>
<param name="database_version_select" value="3.12"/>
<param name="database_date_select" value="2024-01-31.1"/>
</conditional>
<output name="output_file">
<assert_contents>
<has_n_lines n="1"/>
<has_text text="{&quot;data_tables&quot;"/>
<has_text text="amrfinderplus_versioned_database"/>
<has_text text='"name": "V4.0-2025-07-16.1"'/>
<has_text text='"db_version": "4.0"'/>
<has_text text='"name": "V3.12-2024-01-31.1"'/>
<has_text text='"db_version": "3.12"'/>
</assert_contents>
</output>
</test>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<token name="@TOOL_VERSION@">4.0.23</token>
<token name="@PYTHON_VERSION@">3.11.14</token>
<token name="@PANDAS@">2.3.3</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@PROFILE@">21.05</token>
<xml name="requirements">
<requirements>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<output>
<column name="value" />
<column name="name" />
<column name="db_version" />
<column name="path" output_ref="output_file">
<move type="directory" relativize_symlinks="True">
<source>${path}</source>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,4 @@
#
# for example
amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 amrfinderplus-db
amrfinderplus_V3.12_2024-01-31.1 V3.12-2024-01-31.1 3.12 amrfinderplus-db
amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 /private/var/folders/7j/21czvpk170zcyjdbqrp0hmsh0000gn/T/tmp3d_yq23a/galaxy-dev/tool-data/amrfinderplus-db/amrfinderplus_V3.12_2024-05-02.2
amrfinderplus_V3.12_2024-01-31.1 V3.12-2024-01-31.1 3.12 /private/var/folders/7j/21czvpk170zcyjdbqrp0hmsh0000gn/T/tmp3d_yq23a/galaxy-dev/tool-data/amrfinderplus-db/amrfinderplus_V3.12_2024-01-31.1
5 changes: 5 additions & 0 deletions tools/amrfinderplus/amrfinderplus.xml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
<param name="amrfinder_db_select" type="select" label="The amrfinderplus database">
<options from_data_table="amrfinderplus_versioned_database">
<filter type="static_value" value="3.12" column="db_version"/>
<filter type="unique_value" column="name"/>
Comment thread
jakobnissen marked this conversation as resolved.
<validator message="No amrfinderplus database is available" type="no_options"/>
</options>
</param>
Expand Down Expand Up @@ -230,6 +231,10 @@
<has_text text="TTGTCCAAAGCC"/>
</assert_contents>
</output>
<assert_command>
<has_text text="--database"/>
<not_has_text text="test-db,"/>
Comment thread
jakobnissen marked this conversation as resolved.
</assert_command>
</test>
<test expect_num_outputs="3"> <!-- TEST_2 nucleotide input and full options -->
<section name="input_option">
Expand Down
2 changes: 1 addition & 1 deletion tools/amrfinderplus/macro.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<token name="@TOOL_VERSION@">3.12.8</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@PROFILE@">21.05</token>
<xml name="version_command">
<version_command><![CDATA[amrfinder --version]]></version_command>
Expand Down
1 change: 1 addition & 0 deletions tools/amrfinderplus/test-data/amrfinderplus_versioned.loc
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
#
# for example
amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 ${__HERE__}/test-db
amrfinderplus_V3.12_2024-05-02.2 V3.12-2024-05-02.2 3.12 ${__HERE__}/test-db
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should not happen with the DM update, right ?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you need doublicated row for the tool test, I think you only need the example in the loc.test file.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's right, but I want to keep the fixes on both the tool wrapper and the DB manager side. Therefore, the ability to ignore doublets should also be tested for the AMRFinderPlus tool itself. I'll add an explicit test for this.

Loading