@@ -123,6 +123,24 @@ data:
123123# See later in the "params" category for the parameters of each tool.
124124settings :
125125
126+ # Computational resources.
127+ # Next to this `config.yaml` file, we provide a system-independent `resources.yaml`, which
128+ # specifies all computational resources (time, memory, CPUs, etc) to use. This is mostly relevant
129+ # in cluster environments (such as when using slurm to submit individual jobs), as those systems
130+ # need to know in advance how much of each resource a job will need. However, we do not want to
131+ # clutter this config file here will all this information - this file here is meant to describe
132+ # the data and tool settings, but should not be concerned with "practical" aspects such as how to
133+ # run them. So instead, these are specified in the `resources.yaml`.
134+ # We search for this file in three places, in this order: First, in the path specified here.
135+ # Second, in the working directory (where you copy this `config.yaml` file to as well, and which
136+ # is provided to snakemake as `--directory`). Third, in the `config` directory within grenepipe,
137+ # which is where the default file lives.
138+ # We hence recommend to set up the `resources.yaml` by copying it to your working directory
139+ # (where you also copied this `config.yaml` to), and adapt it there as needed. However,
140+ # if you have multiple runs of grenepipe with the same resource requirements, you can instead
141+ # specify a path to a shared `resources.yaml` file here.
142+ resources-yaml : " "
143+
126144 # ----------------------------------------------------------------------
127145 # Basic Steps
128146 # ----------------------------------------------------------------------
@@ -423,7 +441,6 @@ params:
423441 # See adapterremoval manual: https://adapterremoval.readthedocs.io/en/latest/
424442 # and https://adapterremoval.readthedocs.io/en/latest/manpage.html
425443 adapterremoval :
426- threads : 4
427444
428445 # Extra parameters for single reads. Param `--gzip` is alreaday set internally.
429446 se : " "
@@ -439,7 +456,6 @@ params:
439456 # Used only if settings:trimming-tool == cutadapt
440457 # See cutadapt manual: https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types
441458 cutadapt :
442- threads : 4
443459
444460 # Set the adapters and any extra parameters.
445461 # For example, adapters: "-a AGAGCACACGTCTGAACTCCAGTCAC -g AGATCGGAAGAGCACACGT -A AGAGCACACGTCTGAACTCCAGTCAC -G AGATCGGAAGAGCACACGT"
@@ -462,7 +478,6 @@ params:
462478 # Used only if settings:trimming-tool == fastp
463479 # See fastp manual: https://github.com/OpenGene/fastp
464480 fastp :
465- threads : 4
466481
467482 # Extra parameters for single reads.
468483 se : " "
@@ -490,7 +505,6 @@ params:
490505 # See skewer manual: https://github.com/relipmoc/skewer
491506 # By default, we internally already set the options `--format sanger --compress`
492507 skewer :
493- threads : 4
494508
495509 # Extra parameters for single reads.
496510 se : " --mode any"
@@ -506,7 +520,8 @@ params:
506520 # See trimmomatic manual: http://www.usadellab.org/cms/?page=trimmomatic
507521 # Download adapters here: https://github.com/usadellab/Trimmomatic/tree/main/adapters
508522 trimmomatic :
509- threads : 6
523+
524+ # Extra parameters for single reads.
510525 se :
511526 extra : " "
512527 trimmer :
@@ -521,6 +536,8 @@ params:
521536 - " TRAILING:3"
522537 - " SLIDINGWINDOW:4:15"
523538 - " MINLEN:36"
539+
540+ # Extra parameters for paired end reads.
524541 pe :
525542 extra : " "
526543 trimmer :
@@ -538,7 +555,6 @@ params:
538555 # Used only if settings:mapping-tool == bowtie2
539556 # See bowtie2 manual: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
540557 bowtie2 :
541- threads : 10
542558
543559 # Extra parameters. We internally already set `--rg` and `--rg-id`, using read group ("@RG")
544560 # tags "ID" and "SM", and potentially "PL".
@@ -554,7 +570,6 @@ params:
554570 # Used only if settings:mapping-tool == bwaaln
555571 # See bwa manual: http://bio-bwa.sourceforge.net/
556572 bwaaln :
557- threads : 10
558573
559574 # Extra parameters for bwa aln, which maps the reads and produces intermediate *.sai files.
560575 extra : " "
@@ -575,7 +590,6 @@ params:
575590 # Used only if settings:mapping-tool == bwamem
576591 # See bwa manual: http://bio-bwa.sourceforge.net/
577592 bwamem :
578- threads : 10
579593
580594 # Extra parameters for bwa mem.
581595 # We internally already set `-R` to use read group ("@RG") tags "ID" and "SM",
@@ -592,7 +606,6 @@ params:
592606 # Used only if settings:mapping-tool == bwamem2
593607 # See bwa manual: https://github.com/bwa-mem2/bwa-mem2
594608 bwamem2 :
595- threads : 10
596609
597610 # Extra parameters for bwa mem.
598611 # We internally already set `-R` to use read group ("@RG") tags "ID" and "SM",
@@ -615,7 +628,6 @@ params:
615628 # in order to streamline the process, and to make sure that all tools understand that all units
616629 # of a sample belong to the same sample.
617630 merge : " "
618- merge-threads : 4
619631
620632 # Extra parameters for samtools/view.
621633 # Used only if settings:filter-mapped-reads == true, in order to filter the mapped samples
@@ -702,22 +714,13 @@ params:
702714 # system-provided tmp dir is too small (which can happen on clusters).
703715 # Note that the Java memory options, such as `-Xmx10g` to increase the available memory within
704716 # the Java virtual machine are provided via the Snakemake memory management directly,
705- # and hence cannot be specified here. Instead, use the below `*-mem-mb` options,
706- # or, if you are running grenepipe via slurm, use the slurm job configuration.
717+ # and hence cannot be specified here. Instead, use the resources.yaml config file for this.
707718 # The last option, SortVcf-java-opts, is used by bcftools when using contig-group-size > 0.
708719 MarkDuplicates-java-opts : " "
709720 CollectMultipleMetrics-java-opts : " "
710721 SortVcf-java-opts : " "
711722 MergeVcfs-java-opts : " "
712723
713- # Memory for the Java virtual machine for the picard programs.
714- # Unfortunately, Java does not automatically use the available memory, and instead needs
715- # to be told that it is allowed to do that. Specify the memory here as needed, in MB.
716- MarkDuplicates-mem-mb : 5000
717- CollectMultipleMetrics-mem-mb : 1024
718- SortVcf-mem-mb : 1024
719- MergeVcfs-mem-mb : 1024
720-
721724 # ----------------------------------------------------------------------
722725 # dedup
723726 # ----------------------------------------------------------------------
@@ -740,7 +743,6 @@ params:
740743 # Note that the bcftools filter step (if configured above via `settings: filter-variants`)
741744 # is configured below in the `bcftools-filter` setting, instead of here.
742745 bcftools :
743- threads : 8
744746
745747 # We offer two ways to run bcftools call: Combined on all samples at the same time,
746748 # or on each sample individually, merging the calls later.
@@ -779,8 +781,6 @@ params:
779781 extra : " "
780782
781783 # Settings for parallelization
782- threads : 8
783- compress-threads : 2
784784 chunksize : 100000
785785
786786 # ----------------------------------------------------------------------
@@ -803,10 +803,6 @@ params:
803803 # Others might work as well, depending on GATK BaseRecalibrator.
804804 platform : " "
805805
806- # Number of threads to use for the HaplotypeCaller. We recommend to keep this at 2,
807- # as GATK does not seem to do a great job of parallelizing anyway.
808- HaplotypeCaller-threads : 2
809-
810806 # By default, starting in grenepipe v0.14.0, we are using GATK GenomicsDBImport instead of
811807 # GATK CombineGVCFs to prepare the singular GVCF for GATK GenotypeGVCFs. However, for full
812808 # compatibility, we also offer to use the old way with CombineGVCFs here, by setting
@@ -829,21 +825,12 @@ params:
829825 # For some specific error cases, it might be necessary to adjust java settings for the tools.
830826 # Note that the Java memory options, such as `-Xmx10g` to increase the available memory within
831827 # the Java virtual machine are provided via the Snakemake memory management directly,
832- # and hence cannot be specified here. Instead, use the below `*-mem-mb` options,
833- # or, if you are running grenepipe via slurm, use the slurm job configuration.
828+ # and hence cannot be specified here. Instead, use the resources.yaml config file for this.
834829 HaplotypeCaller-java-opts : " "
835830 GenomicsDBImport-java-opts : " "
836831 CombineGVCFs-java-opts : " "
837832 GenotypeGVCFs-java-opts : " "
838833
839- # Memory for the Java virtual machine for the GATK programs.
840- # Unfortunately, Java does not automatically use the available memory, and instead needs
841- # to be told that it is allowed to do that. Specify the memory here as needed, in MB.
842- HaplotypeCaller-mem-mb : 1024
843- GenomicsDBImport-mem-mb : 1024
844- CombineGVCFs-mem-mb : 1024
845- GenotypeGVCFs-mem-mb : 1024
846-
847834 # ----------------------------------------------------------------------
848835 # GATK VariantFiltration
849836 # ----------------------------------------------------------------------
@@ -863,7 +850,6 @@ params:
863850 # We also offer extra settings that are used for both.
864851 extra : " "
865852 java-opts : " "
866- mem-mb : 1024
867853
868854 # ----------------------------------------------------------------------
869855 # GATK VariantRecalibrator + ApplyVQSR
@@ -948,13 +934,11 @@ params:
948934 variantrecalibrator-extra-SNP : " --max-gaussians 1"
949935 variantrecalibrator-extra-INDEL : " --max-gaussians 1"
950936 variantrecalibrator-java-opts : " "
951- variantrecalibrator-mem-mb : 1024
952937
953938 # Extra command line params, and optional Java runtime options to provide to GATK ApplyVQSR
954939 applyvqsr-extra-SNP : " --truth-sensitivity-filter-level 99.0"
955940 applyvqsr-extra-INDEL : " --truth-sensitivity-filter-level 99.0"
956941 applyvqsr-java-opts : " "
957- applyvqsr-mem-mb : 1024
958942
959943 # ----------------------------------------------------------------------
960944 # bcftools filter
@@ -1003,9 +987,6 @@ params:
1003987 # this local path is used, which is expected to contain a valid snpEff database.
1004988 custom-db-dir : " "
1005989
1006- # Memory (in MB) to be given to SnpEFF. Increase this if the command fails.
1007- mem : 4000
1008-
1009990 # Additional parameters for snpeff, see https://pcingola.github.io/SnpEff/se_commandline/
1010991 extra : " "
1011992
@@ -1112,8 +1093,7 @@ params:
11121093 bams : " processed"
11131094
11141095 # Additional parameters for qualimap, see http://qualimap.conesalab.org/
1115- extra : " --java-mem-size=10G"
1116- threads : 2
1096+ extra : " "
11171097
11181098 # ----------------------------------------------------------------------
11191099 # SeqKit
0 commit comments