|
18 | 18 | # ================================================================================================= |
19 | 19 |
|
20 | 20 | from snakemake.shell import shell |
| 21 | +import os |
21 | 22 |
|
22 | 23 | shell.executable("bash") |
23 | | - |
24 | | -log = snakemake.log_fmt_shell(stdout=True, stderr=True) |
| 24 | +log = snakemake.log_fmt_shell(stdout=True, stderr=True, append=True) |
25 | 25 |
|
26 | 26 | extra_params = snakemake.params.get("extra", "") |
27 | 27 | norm = snakemake.params.get("normalize", False) |
28 | 28 | assert norm in [True, False] |
29 | 29 |
|
| 30 | +def local_log(message): |
| 31 | + """ |
| 32 | + Append a message to the Snakemake log file. |
| 33 | + Ensures there's a trailing newline. |
| 34 | + """ |
| 35 | + log_path = snakemake.log[0] |
| 36 | + os.makedirs(os.path.dirname(log_path), exist_ok=True) |
| 37 | + if not message.endswith("\n"): |
| 38 | + message += "\n" |
| 39 | + with open(log_path, "a") as lf: |
| 40 | + lf.write(message) |
| 41 | + |
30 | 42 | # Additions to the original wrapper made by LC: |
31 | 43 | # |
32 | 44 | # We separate by contig, to increase parallel runs on clusters. |
|
91 | 103 | chrom_length = int(fields[1]) |
92 | 104 | if chrom_name == contig: |
93 | 105 | regions = '<(echo "' + chrom_name + ":0-" + str(chrom_length) + '")' |
| 106 | +local_log(f"regions = {regions} (after chromosome)") |
94 | 107 |
|
95 | 108 | # If we are here, we must have found the contig in the fai file, |
96 | 109 | # otherwise that name would not have appeared in the "{contig}" wildcard of our snakemake rule - |
|
111 | 124 | "{regions}) -b {snakemake.input.regions} | " |
112 | 125 | r"sed 's/\t\([0-9]*\)\t\([0-9]*\)$/:\1-\2/')" |
113 | 126 | ).format(regions=regions, snakemake=snakemake) |
| 127 | + local_log(f"regions = {regions} (after intersection)") |
114 | 128 | else: |
115 | 129 | # If there are no regions yet, we have the case that a small contig group was provided. |
116 | 130 | # In this case, we just parse that file and turn its bed format into the freebayes |
117 | 131 | # regions format. |
118 | 132 | regions = ("<(cat {snakemake.input.regions} | sed 's/\\t/:/' | sed 's/\\t/-/')").format( |
119 | 133 | snakemake=snakemake |
120 | 134 | ) |
| 135 | + local_log(f"regions = {regions} (after small contig)") |
121 | 136 |
|
122 | | -if snakemake.threads == 1: |
| 137 | +# The single threaded cases has some issue with the region bash substitution... |
| 138 | +# Just deactivating this for now, and running the parallel case instead. |
| 139 | +# if snakemake.threads == 1: |
| 140 | +if False: |
123 | 141 | freebayes = "freebayes --region " + regions |
| 142 | + # freebayes = "freebayes --region <(" + regions + ")" |
124 | 143 | else: |
125 | 144 | # Ideally, we'd be using bamtools coverage and coverage_to_regions.py here, |
126 | 145 | # as suggsted in the freebayes-parallel script, but this runs a long time and had some errors |
|
140 | 159 | "{chunks}) | " |
141 | 160 | r"sed 's/\t\([0-9]*\)\t\([0-9]*\)$/:\1-\2/')" |
142 | 161 | ).format(regions=regions, chunks=chunks) |
| 162 | + local_log(f"regions = {regions} (with threads)") |
143 | 163 | freebayes = ("freebayes-parallel {regions} {snakemake.threads}").format( |
144 | 164 | snakemake=snakemake, regions=regions |
145 | 165 | ) |
|
162 | 182 | pipe = "| bcftools sort -Ou - " + pipe |
163 | 183 |
|
164 | 184 | shell( |
165 | | - "({freebayes} {extra_params} -f {snakemake.input.ref}" |
| 185 | + "set -x; ({freebayes} {extra_params} -f {snakemake.input.ref}" |
166 | 186 | " --bam-list {snakemake.output.bamlist} {pipe} > {snakemake.output[0]}) {log}" |
167 | 187 | ) |
0 commit comments