Skip to content

Commit fd43031

Browse files
authored
Merge pull request #32 from B-UMMI/dev
Add StrainXpress to short-read assembly workflow
2 parents 225fba6 + 0929429 commit fd43031

12 files changed

+97
-4
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@ To use LMAS the following options are available:
173173
(default auto)
174174
--skesa Boolean controling the execution of the SKESA assembler.
175175
(default true)
176+
--strainxpress Boolean controling the execution of the StrainXpress assembler.
177+
(default true)
176178
--unicycler Boolean controling the execution of the Unicycler assembler.
177179
(default true)
178180
--velvetoptimiser Boolean controling the execution of the VelvetOptimiser assembler.

conf/containers.config

+6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ process {
22
withName: REFORMAT {
33
container = "pcerqueira/bbtools:38.44"
44
}
5+
withName: REFORMAT_FASTQ {
6+
container = "pcerqueira/bbtools:38.44"
7+
}
58
withName: ABYSS {
69
container = "cimendes/abyss:2.3.1-1"
710
}
@@ -29,6 +32,9 @@ process {
2932
withName: SPADES {
3033
container = "cimendes/spades:3.15.3-1"
3134
}
35+
withName: STRAINXPRESS {
36+
container = "cimendes/strainxpress:11.08.2022"
37+
}
3238
withName: UNICYCLER {
3339
container = "cimendes/unicycler:0.4.9-1"
3440
}

conf/params.config

+3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ params {
4646
spades = true
4747
spadesKmerSize = 'auto'
4848

49+
//StrainXpress
50+
strainxpress = false
51+
4952
//SKESA
5053
skesa = true
5154

docker/strainXpress/Dockerfile

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
FROM continuumio/miniconda:4.7.12
2+
3+
LABEL software="StrainXpress" \
4+
software.version="11.08.2022" \
5+
about.home="https://github.com/kangxiongbin/StrainXpress" \
6+
about.summary="De novo assembler based on overlap-layout-consensus to assemble high complexity metagenome sequencing data at strain resolution." \
7+
about.documentation="https://github.com/kangxiongbin/StrainXpress#readme" \
8+
about.license="https://github.com/kangxiongbin/StrainXpress/blob/main/LICENSE" \
9+
author="Inês Mendes <[email protected]>"
10+
11+
WORKDIR /NGStools
12+
13+
RUN apt-get --allow-releaseinfo-change update \
14+
&& apt-get install procps git build-essential cmake wget libboost-all-dev -y
15+
16+
RUN conda install -c bioconda python=3.6 scipy pandas minimap2
17+
18+
RUN git clone https://github.com/kangxiongbin/StrainXpress.git \
19+
&& cd StrainXpress \
20+
&& sh install.sh
21+
22+
ENV PATH="/NGStools/StrainXpress/scripts:$PATH"

lib/Helper.groovy

+2
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ class Help {
103103
println " (default $params.spadesKmerSize)"
104104
println " --skesa Boolean controling the execution of the SKESA assembler."
105105
println " (default $params.skesa)"
106+
println " --strainxpress Boolean controling the execution of the StrainXpress assembler."
107+
println " (default $params.strainxpress)"
106108
println " --unicycler Boolean controling the execution of the Unicycler assembler."
107109
println " (default $params.unicycler)"
108110
println " --velvetoptimiser Boolean controling the execution of the VelvetOptimiser assembler."

modules/assembly/assembly.nf

+54
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,24 @@ process REFORMAT {
1919
"reformat.sh in=${fastq_pair[0]} in2=${fastq_pair[1]} out=${sample_id}_reads.fasta"
2020
}
2121

22+
process REFORMAT_FASTQ {
23+
24+
tag { sample_id }
25+
label 'process_assembly'
26+
27+
when:
28+
params.strainxpress
29+
30+
input:
31+
tuple val(sample_id), path(fastq_pair)
32+
33+
output:
34+
tuple val(sample_id), file('*_reads.fq')
35+
36+
script:
37+
"reformat.sh in=${fastq_pair[0]} in2=${fastq_pair[1]} out=${sample_id}_reads.fq"
38+
}
39+
2240
process ABYSS {
2341

2442
tag { sample_id }
@@ -334,6 +352,38 @@ process SPADES {
334352
"""
335353
}
336354

355+
process STRAINXPRESS {
356+
357+
tag { sample_id }
358+
label 'process_assembly'
359+
publishDir "results/$sample_id/assembly/strainxpress"
360+
361+
when:
362+
params.strainxpress
363+
364+
input:
365+
tuple val(sample_id), path(fasta_reads_single)
366+
367+
output:
368+
tuple val(sample_id), val('StrainXpress'), path('*_strainxpress.fasta'), emit: assembly
369+
path('.*version'), emit: version
370+
371+
script:
372+
"""
373+
echo '' > .${sample_id}_strainxpress_version
374+
{
375+
python3 /NGStools/StrainXpress/scripts/strainxpress.py -fq $fasta_reads_single -t $task.cpus
376+
377+
mv all.contigs_*.fasta ${sample_id}_strainxpress.fasta
378+
echo pass > .status
379+
} || {
380+
echo fail > .status
381+
:> ${sample_id}_strainxpress.fasta
382+
}
383+
"""
384+
385+
}
386+
337387
process UNICYCLER {
338388

339389
tag { sample_id }
@@ -419,6 +469,7 @@ workflow assembly_wf {
419469

420470
main:
421471
REFORMAT(IN_fastq_raw)
472+
REFORMAT_FASTQ(IN_fastq_raw)
422473
ABYSS(IN_fastq_raw, abyssKmerSize, abyssBloomSize)
423474
GATBMINIAPIPELINE(IN_fastq_raw, gatbKmerSize, GATB_error_correction, gatb_besst_iter)
424475
IDBA(REFORMAT.out)
@@ -428,6 +479,7 @@ workflow assembly_wf {
428479
MINIA(IN_fastq_raw, miniaKmerSize)
429480
SKESA(IN_fastq_raw)
430481
SPADES(IN_fastq_raw, spadesKmerSize)
482+
STRAINXPRESS(REFORMAT_FASTQ.out)
431483
UNICYCLER(IN_fastq_raw)
432484
VELVETOPTIMISER(IN_fastq_raw)
433485

@@ -440,6 +492,7 @@ workflow assembly_wf {
440492
MINIA.out.assembly,
441493
SKESA.out.assembly,
442494
SPADES.out.assembly,
495+
STRAINXPRESS.out.assembly,
443496
UNICYCLER.out.assembly,
444497
VELVETOPTIMISER.out.assembly)
445498
all_versions = ABYSS.out.version | mix(GATBMINIAPIPELINE.out.version,
@@ -450,6 +503,7 @@ workflow assembly_wf {
450503
MINIA.out.version,
451504
SKESA.out.version,
452505
SPADES.out.version,
506+
STRAINXPRESS.out.version,
453507
UNICYCLER.out.version,
454508
VELVETOPTIMISER.out.version) | collect
455509

pull_images.sh

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ eval $pull_command"cimendes/mhm2:v2.0.0-65-gaad446d-generic"
2525
eval $pull_command"cimendes/spades:3.15.3-1"
2626
eval $pull_command"cimendes/minia:3.2.6-1"
2727
eval $pull_command"cimendes/skesa:2.5.0-1"
28+
eval $pull_command"cimendes/strainxpress:11.08.2022"
2829
eval $pull_command"cimendes/unicycler:0.4.9-1"
2930
eval $pull_command"cimendes/velvetoptimiser:2.2.6-1"
3031
eval $pull_command"cimendes/minimap2:2.22-1"

templates/process_assembly_stats_global.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@
3939
ASSEMBLER_SKIP = {"ABySS":json.loads("$params.abyss"), "GATBMiniaPipeline": json.loads("$params.gatb_minia"),
4040
"MetaHipMer2": json.loads("$params.metahipmer2"), "MINIA": json.loads("$params.minia"), "MEGAHIT": json.loads("$params.megahit"),
4141
"metaSPAdes": json.loads("$params.metaspades"), "Unicycler": json.loads("$params.unicycler"), "SPAdes": json.loads("$params.spades"),
42-
"SKESA": json.loads("$params.skesa"), "VelvetOptimiser": json.loads("$params.velvetoptimiser"), "IDBA-UD": json.loads("$params.idba")}
42+
"SKESA": json.loads("$params.skesa"), "StrainXpress": json.loads("$params.strainxpress"),"VelvetOptimiser": json.loads("$params.velvetoptimiser"),
43+
"IDBA-UD": json.loads("$params.idba")}
4344
logger.debug("Running {} with parameters:".format(
4445
os.path.basename(__file__)))
4546
logger.debug("ASSEMBLY_STATS_GLOBAL_FILE: {}".format(

templates/process_assembly_stats_mapping.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
ASSEMBLER_SKIP = {"ABySS":json.loads("$params.abyss"), "GATBMiniaPipeline": json.loads("$params.gatb_minia"),
3737
"MetaHipMer2": json.loads("$params.metahipmer2"), "MINIA": json.loads("$params.minia"), "MEGAHIT": json.loads("$params.megahit"),
3838
"metaSPAdes": json.loads("$params.metaspades"), "Unicycler": json.loads("$params.unicycler"), "SPAdes": json.loads("$params.spades"),
39-
"SKESA": json.loads("$params.skesa"), "VelvetOptimiser": json.loads("$params.velvetoptimiser"), "IDBA-UD": json.loads("$params.idba")}
39+
"SKESA": json.loads("$params.skesa"), "StrainXpress": json.loads("$params.strainxpress"), "VelvetOptimiser": json.loads("$params.velvetoptimiser"),
40+
"IDBA-UD": json.loads("$params.idba")}
4041
logger.debug("Running {} with parameters:".format(
4142
os.path.basename(__file__)))
4243
logger.debug("ASSEMBLY_STATS_GLOBAL_FILE_JSON: {}".format(

templates/process_versions.py

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
"Unicycler": "UNICYCLER",
4646
"SPAdes": "SPADES",
4747
"SKESA": "SKESA",
48+
"strainxpress": "StrainXpress",
4849
"VelvetOptimiser": "VELVETOPTIMISER",
4950
"IDBA": "IDBA"}
5051

templates/utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
'#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ebdb75', '#b15928']
1616

1717
ASSEMBLER_NAMES = ["ABySS", "GATBMiniaPipeline", "MetaHipMer2", "MINIA", "MEGAHIT", "metaSPAdes", "Unicycler", "SPAdes",
18-
"SKESA", "VelvetOptimiser", "IDBA-UD"]
18+
"StrainXpress", "SKESA", "VelvetOptimiser", "IDBA-UD"]
1919

2020
ASSEMBLER_PROCESS_LIST = ["ABYSS", "GATBMINIAPIPELINE", "MINIA", "METAHIPMER2", "MEGAHIT", "METASPADES", "UNICYCLER", "SPADES",
21-
"SKESA", "VELVETOPTIMISER", "IDBA"]
21+
"STRAINXPRESS", "SKESA", "VELVETOPTIMISER", "IDBA"]
2222

2323

2424
def get_logger(filepath, level=logging.DEBUG):

test/data/tiny_reads.fq.gz

16.8 MB
Binary file not shown.

0 commit comments

Comments
 (0)