Skip to content

Commit

Permalink
Merge pull request #32 from B-UMMI/dev
Browse files Browse the repository at this point in the history
Add StrainXpress to short-read assembly workflow
  • Loading branch information
cimendes authored Sep 26, 2022
2 parents 225fba6 + 0929429 commit fd43031
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 4 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ To use LMAS the following options are available:
(default auto)
--skesa Boolean controling the execution of the SKESA assembler.
(default true)
--strainxpress Boolean controling the execution of the StrainXpress assembler.
(default true)
--unicycler Boolean controling the execution of the Unicycler assembler.
(default true)
--velvetoptimiser Boolean controling the execution of the VelvetOptimiser assembler.
Expand Down
6 changes: 6 additions & 0 deletions conf/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ process {
withName: REFORMAT {
container = "pcerqueira/bbtools:38.44"
}
withName: REFORMAT_FASTQ {
container = "pcerqueira/bbtools:38.44"
}
withName: ABYSS {
container = "cimendes/abyss:2.3.1-1"
}
Expand Down Expand Up @@ -29,6 +32,9 @@ process {
withName: SPADES {
container = "cimendes/spades:3.15.3-1"
}
withName: STRAINXPRESS {
container = "cimendes/strainxpress:11.08.2022"
}
withName: UNICYCLER {
container = "cimendes/unicycler:0.4.9-1"
}
Expand Down
3 changes: 3 additions & 0 deletions conf/params.config
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ params {
spades = true
spadesKmerSize = 'auto'

//StrainXpress
strainxpress = false

//SKESA
skesa = true

Expand Down
22 changes: 22 additions & 0 deletions docker/strainXpress/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM continuumio/miniconda:4.7.12

LABEL software="StrainXpress" \
software.version="11.08.2022" \
about.home="https://github.com/kangxiongbin/StrainXpress" \
about.summary="De novo assembler based on overlap-layout-consensus to assemble high complexity metagenome sequencing data at strain resolution." \
about.documentation="https://github.com/kangxiongbin/StrainXpress#readme" \
about.license="https://github.com/kangxiongbin/StrainXpress/blob/main/LICENSE" \
author="Inês Mendes <[email protected]>"

WORKDIR /NGStools

RUN apt-get --allow-releaseinfo-change update \
&& apt-get install procps git build-essential cmake wget libboost-all-dev -y

RUN conda install -c bioconda python=3.6 scipy pandas minimap2

RUN git clone https://github.com/kangxiongbin/StrainXpress.git \
&& cd StrainXpress \
&& sh install.sh

ENV PATH="/NGStools/StrainXpress/scripts:$PATH"
2 changes: 2 additions & 0 deletions lib/Helper.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ class Help {
println " (default $params.spadesKmerSize)"
println " --skesa Boolean controling the execution of the SKESA assembler."
println " (default $params.skesa)"
println " --strainxpress Boolean controling the execution of the StrainXpress assembler."
println " (default $params.strainxpress)"
println " --unicycler Boolean controling the execution of the Unicycler assembler."
println " (default $params.unicycler)"
println " --velvetoptimiser Boolean controling the execution of the VelvetOptimiser assembler."
Expand Down
54 changes: 54 additions & 0 deletions modules/assembly/assembly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,24 @@ process REFORMAT {
"reformat.sh in=${fastq_pair[0]} in2=${fastq_pair[1]} out=${sample_id}_reads.fasta"
}

process REFORMAT_FASTQ {

tag { sample_id }
label 'process_assembly'

when:
params.strainxpress

input:
tuple val(sample_id), path(fastq_pair)

output:
tuple val(sample_id), file('*_reads.fq')

script:
"reformat.sh in=${fastq_pair[0]} in2=${fastq_pair[1]} out=${sample_id}_reads.fq"
}

process ABYSS {

tag { sample_id }
Expand Down Expand Up @@ -334,6 +352,38 @@ process SPADES {
"""
}

process STRAINXPRESS {

tag { sample_id }
label 'process_assembly'
publishDir "results/$sample_id/assembly/strainxpress"

when:
params.strainxpress

input:
tuple val(sample_id), path(fasta_reads_single)

output:
tuple val(sample_id), val('StrainXpress'), path('*_strainxpress.fasta'), emit: assembly
path('.*version'), emit: version

script:
"""
echo '' > .${sample_id}_strainxpress_version
{
python3 /NGStools/StrainXpress/scripts/strainxpress.py -fq $fasta_reads_single -t $task.cpus
mv all.contigs_*.fasta ${sample_id}_strainxpress.fasta
echo pass > .status
} || {
echo fail > .status
:> ${sample_id}_strainxpress.fasta
}
"""

}

process UNICYCLER {

tag { sample_id }
Expand Down Expand Up @@ -419,6 +469,7 @@ workflow assembly_wf {

main:
REFORMAT(IN_fastq_raw)
REFORMAT_FASTQ(IN_fastq_raw)
ABYSS(IN_fastq_raw, abyssKmerSize, abyssBloomSize)
GATBMINIAPIPELINE(IN_fastq_raw, gatbKmerSize, GATB_error_correction, gatb_besst_iter)
IDBA(REFORMAT.out)
Expand All @@ -428,6 +479,7 @@ workflow assembly_wf {
MINIA(IN_fastq_raw, miniaKmerSize)
SKESA(IN_fastq_raw)
SPADES(IN_fastq_raw, spadesKmerSize)
STRAINXPRESS(REFORMAT_FASTQ.out)
UNICYCLER(IN_fastq_raw)
VELVETOPTIMISER(IN_fastq_raw)

Expand All @@ -440,6 +492,7 @@ workflow assembly_wf {
MINIA.out.assembly,
SKESA.out.assembly,
SPADES.out.assembly,
STRAINXPRESS.out.assembly,
UNICYCLER.out.assembly,
VELVETOPTIMISER.out.assembly)
all_versions = ABYSS.out.version | mix(GATBMINIAPIPELINE.out.version,
Expand All @@ -450,6 +503,7 @@ workflow assembly_wf {
MINIA.out.version,
SKESA.out.version,
SPADES.out.version,
STRAINXPRESS.out.version,
UNICYCLER.out.version,
VELVETOPTIMISER.out.version) | collect

Expand Down
1 change: 1 addition & 0 deletions pull_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ eval $pull_command"cimendes/mhm2:v2.0.0-65-gaad446d-generic"
eval $pull_command"cimendes/spades:3.15.3-1"
eval $pull_command"cimendes/minia:3.2.6-1"
eval $pull_command"cimendes/skesa:2.5.0-1"
eval $pull_command"cimendes/strainxpress:11.08.2022"
eval $pull_command"cimendes/unicycler:0.4.9-1"
eval $pull_command"cimendes/velvetoptimiser:2.2.6-1"
eval $pull_command"cimendes/minimap2:2.22-1"
3 changes: 2 additions & 1 deletion templates/process_assembly_stats_global.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
ASSEMBLER_SKIP = {"ABySS":json.loads("$params.abyss"), "GATBMiniaPipeline": json.loads("$params.gatb_minia"),
"MetaHipMer2": json.loads("$params.metahipmer2"), "MINIA": json.loads("$params.minia"), "MEGAHIT": json.loads("$params.megahit"),
"metaSPAdes": json.loads("$params.metaspades"), "Unicycler": json.loads("$params.unicycler"), "SPAdes": json.loads("$params.spades"),
"SKESA": json.loads("$params.skesa"), "VelvetOptimiser": json.loads("$params.velvetoptimiser"), "IDBA-UD": json.loads("$params.idba")}
"SKESA": json.loads("$params.skesa"), "StrainXpress": json.loads("$params.strainxpress"),"VelvetOptimiser": json.loads("$params.velvetoptimiser"),
"IDBA-UD": json.loads("$params.idba")}
logger.debug("Running {} with parameters:".format(
os.path.basename(__file__)))
logger.debug("ASSEMBLY_STATS_GLOBAL_FILE: {}".format(
Expand Down
3 changes: 2 additions & 1 deletion templates/process_assembly_stats_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
ASSEMBLER_SKIP = {"ABySS":json.loads("$params.abyss"), "GATBMiniaPipeline": json.loads("$params.gatb_minia"),
"MetaHipMer2": json.loads("$params.metahipmer2"), "MINIA": json.loads("$params.minia"), "MEGAHIT": json.loads("$params.megahit"),
"metaSPAdes": json.loads("$params.metaspades"), "Unicycler": json.loads("$params.unicycler"), "SPAdes": json.loads("$params.spades"),
"SKESA": json.loads("$params.skesa"), "VelvetOptimiser": json.loads("$params.velvetoptimiser"), "IDBA-UD": json.loads("$params.idba")}
"SKESA": json.loads("$params.skesa"), "StrainXpress": json.loads("$params.strainxpress"), "VelvetOptimiser": json.loads("$params.velvetoptimiser"),
"IDBA-UD": json.loads("$params.idba")}
logger.debug("Running {} with parameters:".format(
os.path.basename(__file__)))
logger.debug("ASSEMBLY_STATS_GLOBAL_FILE_JSON: {}".format(
Expand Down
1 change: 1 addition & 0 deletions templates/process_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"Unicycler": "UNICYCLER",
"SPAdes": "SPADES",
"SKESA": "SKESA",
"strainxpress": "StrainXpress",
"VelvetOptimiser": "VELVETOPTIMISER",
"IDBA": "IDBA"}

Expand Down
4 changes: 2 additions & 2 deletions templates/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
'#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ebdb75', '#b15928']

ASSEMBLER_NAMES = ["ABySS", "GATBMiniaPipeline", "MetaHipMer2", "MINIA", "MEGAHIT", "metaSPAdes", "Unicycler", "SPAdes",
"SKESA", "VelvetOptimiser", "IDBA-UD"]
"StrainXpress", "SKESA", "VelvetOptimiser", "IDBA-UD"]

ASSEMBLER_PROCESS_LIST = ["ABYSS", "GATBMINIAPIPELINE", "MINIA", "METAHIPMER2", "MEGAHIT", "METASPADES", "UNICYCLER", "SPADES",
"SKESA", "VELVETOPTIMISER", "IDBA"]
"STRAINXPRESS", "SKESA", "VELVETOPTIMISER", "IDBA"]


def get_logger(filepath, level=logging.DEBUG):
Expand Down
Binary file added test/data/tiny_reads.fq.gz
Binary file not shown.

0 comments on commit fd43031

Please sign in to comment.