-
Notifications
You must be signed in to change notification settings - Fork 441
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
710 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
name: sourmash | ||
owner: iuc | ||
description: "Compute and compare MinHash signatures for DNA data sets." | ||
long_description: | | ||
Quickly search, compare, and analyze genomic and metagenomic data sets | ||
homepage_url: https://sourmash.readthedocs.io/en/latest/ | ||
remote_repository_url: https://github.com/sourmash-bio/sourmash/tree/latest/src/sourmash | ||
type: unrestricted | ||
categories: | ||
- Metagenomics | ||
auto_tool_repositories: | ||
name_template: "{{ tool_id }}" | ||
description_template: "Wrapper to sketch DNA, LCA classify and summarize : {{ tool_name }}" | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
"""create DNA signatures""" | ||
|
||
usage = """ | ||
sourmash sketch dna data/*.fna.gz | ||
The 'sketch dna' command reads in DNA sequences and outputs DNA | ||
sketches. | ||
By default, 'sketch dna' uses the parameter string 'k=31,scaled=1000,noabund'. | ||
This creates sketches with a k-mer size of 31, a scaled factor of | ||
1000, and no abundance tracking of k-mers. You can specify one or | ||
more parameter strings of your own with -p, e.g. 'sourmash sketch dna | ||
-p k=31,noabund -p k=21,scaled=100,abund'. Note that a single `-p` parameter string can contain multiple ksize values, but only a single scaled value or abundance value, e.g. -p k=21,k=31,abund | ||
'sourmash sketch' takes input sequences in FASTA and FASTQ, | ||
uncompressed or gz/bz2 compressed. | ||
Please see the 'sketch' documentation for more details: | ||
https://sourmash.readthedocs.io/en/latest/sourmash-sketch.html | ||
""" | ||
|
||
import sourmash | ||
from sourmash.logging import notify, print_results, error | ||
|
||
from sourmash import command_sketch | ||
|
||
assert command_sketch.DEFAULTS["dna"] == "k=31,scaled=1000,noabund" | ||
|
||
|
||
def subparser(subparsers): | ||
subparser = subparsers.add_parser( | ||
"dna", aliases=["rna", "nucleotide", "nt"], usage=usage | ||
) | ||
subparser.add_argument( | ||
"--license", | ||
default="CC0", | ||
type=str, | ||
help="signature license. Currently only CC0 is supported.", | ||
) | ||
subparser.add_argument( | ||
"--check-sequence", | ||
action="store_true", | ||
help="complain if input sequence is invalid DNA", | ||
) | ||
subparser.add_argument( | ||
"-p", | ||
"--param-string", | ||
default=[], | ||
help="signature parameters to use.", | ||
action="append", | ||
) | ||
|
||
subparser.add_argument("filenames", nargs="*", help="file(s) of sequences") | ||
file_args = subparser.add_argument_group("File handling options") | ||
file_args.add_argument( | ||
"-f", | ||
"--force", | ||
action="store_true", | ||
help="recompute signatures even if the file exists", | ||
) | ||
subparser.add_argument( | ||
"--from-file", help="a text file containing a list of sequence files to load" | ||
) | ||
file_args.add_argument( | ||
"-o", "--output", help="output computed signatures to this file" | ||
) | ||
file_args.add_argument( | ||
"--set-name", | ||
"--name", | ||
"--merge", | ||
dest="merge", | ||
type=str, | ||
default="", | ||
metavar="FILE", | ||
help="name the output sketch as specified; note, merges all input " | ||
"files while sketching", | ||
) | ||
file_args.add_argument( | ||
"--output-dir", | ||
"--outdir", | ||
help="output computed signatures to this directory", | ||
) | ||
file_args.add_argument( | ||
"--singleton", | ||
action="store_true", | ||
help="compute a signature for each sequence record individually", | ||
) | ||
file_args.add_argument( | ||
"--name-from-first", | ||
action="store_true", | ||
help="name the signature generated from each file after the first " | ||
"record in the file", | ||
) | ||
file_args.add_argument( | ||
"--randomize", | ||
action="store_true", | ||
help="shuffle the list of input filenames randomly", | ||
) | ||
|
||
|
||
def main(args): | ||
import sourmash.command_sketch | ||
|
||
return sourmash.command_sketch.dna(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<?xml version="1.0"?> | ||
<macros> | ||
<token name="@TOOL_VERSION@">4.8.11</token> | ||
<token name="@VERSION_SUFFIX@">0</token> | ||
<token name="@PROFILE@">23.2</token> | ||
|
||
<xml name="xrefs"> | ||
<xrefs> | ||
<xref type="bio.tools">sourmash</xref> | ||
</xrefs> | ||
</xml> | ||
<xml name="requirements"> | ||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">bioconductor-sourmash</requirement> | ||
|
||
</requirements> | ||
</xml> | ||
<xml name="citations"> | ||
<citations> | ||
<citation type="doi">10.21105/joss.00027</citation> | ||
</citations> | ||
</xml> | ||
</macros> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
<tool id="sourmash" name="sourmash" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>Compute and compare MinHash signatures for DNA data sets</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="xrefs"/> | ||
<expand macro="requirements"/> | ||
|
||
<command detect_errors="exit_code"><![CDATA[ | ||
python '${__tool_directory__}/dna.py' | ||
--param-string '[$additional_options.k_mers,$additional_options.scaled,$additional_options.noabund]' | ||
--from-file '$input_seq' | ||
--output '$dna_sketch' | ||
--output-dir 'outputFolder' | ||
--singleton '$singleton' | ||
--name-from-first '$name_from_first' | ||
--randomize '$randomize' | ||
]]></command> | ||
<inputs> | ||
<param name="input_seq" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" label="Input DNA Sequence"/> | ||
<section name="additional_options" title="Additional Options" expanded="true"> | ||
<param name="k_mers" type="integer" value="31" optional="true" multiple="true" label="K-mer size" help="multiple value separated by ',', DEFAULT: 31"/> | ||
<param name="scaled" type="integer" value="1000" optional="true" label="Scaled factor" help="DEFAULT: 1000"/> | ||
<param name="noabund" type="select" value="noabund" optional="true" label="Abundance tracking of k-mers" help="DEFAULT: noabund"> | ||
<option value="noabund" selected="true">No abundance tracking of k-mer</option> | ||
<option value="abund">Abundance tracking of k-mer</option> | ||
</param> | ||
<param argument="--singleton" type="boolean" value="true" optional="true" label="compute a signature for each sequence record individual"/> | ||
<param argument="--name-from-first" type="boolean" value="true" label="name the signature generated from each file after the first"/> | ||
<param argument="--randomize" type="boolean" value="true" label="shuffle the list of input filenames randomly"/> | ||
</section> | ||
</inputs> | ||
<outputs> | ||
<data name="dna_sketch" format="pdf" from_work_dir="outputFolder/output" label="${tool.name} on ${on_string}:" /> | ||
|
||
|
||
</outputs> | ||
<tests> | ||
<test expect_num_outputs="1"> | ||
<param name="input_seq" value="GCA_903797575.1_PARATYPHIC668_genomic (1).fna"/> | ||
<section name="additional_options"> | ||
<param name="k_mers" value="45"/> | ||
<param name="scaled" value="1040"/> | ||
<param name="noabund" value="noabund"/> | ||
<param name="singleton" value="TRUE"/> | ||
<param name="name_from_first" value="TRUE"/> | ||
<param name="randomize" value="TRUE"/> | ||
</section> | ||
|
||
<output name="dna_sketch"> | ||
<assert_contents> | ||
<has_size value="7373" delta="1000" /> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
|
||
<help><![CDATA[ | ||
Quickly search, compare, and analyze genomic and metagenomic data sets | ||
]]></help> | ||
<expand macro="citations"/> | ||
</tool> |
Oops, something went wrong.