@@ -39,12 +39,13 @@ default:
3939 # Fixed offset of the required memory, in MB. This accounts for basic memory
4040 # needed by the rule (independent of input sizes), and is needed for small files
4141 # where the scaling might be too little.
42- mem-offset : 1000
42+ mem-offset : 5000
4343
4444 # Scaling of the memory needed for the computation. That is, for each MB of combined
4545 # input file sizes, how many MB of memory are needed for the processing?
46- # The default of 1 should work in most cases, but can tightly be adapted as needed
47- # if rules run out of memory.
46+ # The default of 1 should work in most cases, as it simply gives memory equal to
47+ # the input file sizes. This can easily be an overestimate, and can tightly be
48+ # adapted as needed if rules run out of memory.
4849 mem-scaler : 1
4950
5051 # Maximum memory, in MB, defaulting to 1TB. This is provided as a safety mechanism,
@@ -57,17 +58,17 @@ default:
5758 # Fixed offset of the required runtime, in minutes. Similar to above.
5859 time-offset : 60
5960
60- # Scaling of the runtime, from MB to minutes. That is, how many minuts are needed
61- # for each additional MB of input files? The default of 0.1 for instance would give
62- # 100 minutes per GB of input data.
63- time-scaler : 0.1
61+ # Scaling of the runtime, from MB to minutes. That is, how many minutes are needed
62+ # for each additional MB of input files? The default of 0.01 for instance would give
63+ # an addiitonal 10 minutes per GB of input data.
64+ time-scaler : 0.01
6465
6566 # Maximum runtime, in minutes. Defaults to one week. If you have large datasets
6667 # and your cluster allows longer wall times, adjust this accordingly.
6768 # Generally, set this to the maximum allowed wall time on your cluster.
6869 time-max : 10080
6970
70- # Default number of CPU cores per task/job. Most programs and scripts in
71+ # Default number of CPU cores (threads) per task/job. Most programs and scripts in
7172 # bioinformatics are unfortunately not paralellized well, so 1 is the default.
7273 cpus : 1
7374
@@ -96,21 +97,58 @@ default:
9697# for rules that we now already can benefit from different resources.
9798
9899# Read trimming tools can typically use multiple CPUs, so let's give them more!
100+ # They however are very fast, so we don't need as much scaling for time.
99101trim_reads_se :
100102 cpus : 4
103+ time-scaler : 0.005
101104trim_reads_pe :
102105 cpus : 4
106+ time-scaler : 0.005
103107trim_reads_pe_merged :
104108 cpus : 4
109+ time-scaler : 0.005
105110
106111# Even better for the mapping! More cores! Note that we internally
107- # might assign an additional cpu thread for sorting the bam files.
112+ # might assign an additional cpu thread for sorting the bam files.
113+ # We however set different scalers here based on empirical experience.
114+ # The memory needs are mostly depending on the reference genome size.
108115map_reads :
109- cpus : 10
116+ cpus : 8
117+ time-scaler : 0.02
110118
111119# The usable cpu threads for the actual variant calling step depends on the tool.
112120# Freebayes and bcftools can make efficient use of threads, so if you are using those,
113121# increase the value here to, e.g., 10. However, the GATK HaplotypeCaller is
114122# notoriously bad and inefficient, and seems to not benefit from more than 2 threads.
115123call_variants :
124+ # GATK HaplotypeCaller
116125 cpus : 2
126+ time-scaler : 0.05
127+ mem-offset : 5000
128+ mem-scaler : 0
129+
130+ # Freebayes and bcfcalls. You can comment out the above and use these defaults
131+ # here if you are using either of those callers. Adjust as needed for your data.
132+ # cpus: 8
133+ # time-scaler: 0.01
134+
135+ # FastQC never seems to use more than 1GB, so let's cap it there.
136+ fastqc :
137+ mem-offset : 1000
138+ mem-scaler : 0
139+
140+ # Same for samtools statistics
141+ samtools_stats :
142+ mem-offset : 1000
143+ mem-scaler : 0
144+ samtools_flagstat :
145+ mem-offset : 1000
146+ mem-scaler : 0
147+
148+ # Multiqc takes a lot of files as input, but we want to run it idependently
149+ # of their sizes, as it solely looks at logs etc, and not the actual file contents.
150+ multiqc :
151+ mem-offset : 5000
152+ mem-scaler : 0
153+ time-offset : 60
154+ time-scaler : 0
0 commit comments