Skip to content

Commit 8eb4f7f

Browse files
PuneetSSPuneet Sidhu
and
Puneet Sidhu
authored
Added tigmint steps to makefile (#70)
* Added tigmint steps to makefile * Updated targets in makefile * Update makefile targets and README.MD * Update test file * Update test file names * Correct output files names * Update tigmint-make Co-authored-by: Puneet Sidhu <[email protected]>
1 parent 8c11a12 commit 8eb4f7f

11 files changed

+62
-45
lines changed

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,9 @@ tigmint-make tigmint-long draft=myassembly reads=myreads span=auto G=gsize dist=
187187
- When aligning long reads with Minimap2, use the `-y` option to include the barcode in the BX tag of the alignments.
188188
- When using long reads, the minimum spanning molecule thresholds (`span`) should be no greater than 1/4 of the sequence coverage. Setting the parameter `span=auto` allows the appropriate parameter value to be selected automatically (this setting requires the parameter `G` as well).
189189
- When using long reads, the edit distance threshold (`nm`) is automatically set to the cut length (`cut`) to compensate for the higher error rate and length. This parameter should be kept relatively high to include as many alignments as possible.
190-
190+
- Each Tigmint (and Tigmint-long) step can be run separately through specifying the target using `tigmint-make`. For example, the bwa index step for Tigmint with linked reads can be launched with `tigmint-make tigmint-index`
191+
- Tigmint steps\targets (for linked reads): `tigmint-index`, `tigmint-align`, `tigmint-molecule`, `tigmint-cut`
192+
- Tigmint-long steps\targets: `tigmint-long-estimate`, `tigmint-long-to-linked`, `tigmint-long-cut`
191193
# Using stLFR linked reads
192194

193195
To use stLFR linked reads with Tigmint, you will need to re-format the reads to have the barcode in a `BX:Z:` tag in the read header.

bin/tigmint-make

+45-30
Original file line numberDiff line numberDiff line change
@@ -146,41 +146,66 @@ draft_metrics: \
146146

147147
# Correct misassemblies using Tigmint
148148
tigmint: \
149-
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.fa \
149+
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.fa \
150150
$(draft).tigmint.fa
151151

152+
# Index the linked reads
153+
tigmint-index: $(draft).fa.bwt
154+
155+
# Align the linked reads (using bwa mem)
156+
tigmint-align: $(draft).$(reads).sortbx.bam
157+
158+
# Perform the tigmint-molecule step for linked reads
159+
tigmint-molecule: $(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).bed
160+
161+
# Perform the final tigmint-cut step for linked reads
162+
tigmint-cut: \
163+
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.fa \
164+
$(draft).tigmint.fa
165+
152166
# Correct misassemblies for long reads using Tigmint
153167
tigmint-long: \
154-
$(draft).$(reads).cut$(cut).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.fa \
168+
$(draft).$(reads).cut$(cut).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.fa \
155169
$(draft).cut$(cut).tigmint.fa
156170

171+
# Estimate the tigmint parameters for long reads
172+
tigmint-long-estimate: $(reads).tigmint-long.params.tsv
173+
174+
# Covert the long reads to pseudo-linked reads, perform minimap2 alignment and the tigmint-molecule step
175+
tigmint-long-to-linked: $(draft).$(reads).cut$(cut).molecule.size$(minsize).dist$(dist).bed
176+
177+
# Perform the final tigmint-cut step for long reads
178+
tigmint-long-cut: \
179+
$(draft).$(reads).cut$(cut).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.fa \
180+
$(draft).cut$(cut).tigmint.fa
181+
157182
# Calculate assembly metrics after Tigmint
158183
tigmint_metrics: \
159-
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.abyss-fac.tsv \
160-
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.scaftigs.abyss-fac.tsv \
161-
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.scaftigs.$(ref).samtobreak.tsv
184+
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.abyss-fac.tsv \
185+
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.scaftigs.abyss-fac.tsv \
186+
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.scaftigs.$(ref).samtobreak.tsv
162187

163188
# Scaffold using ARCS
164189
arcs: \
165-
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.$(reads).c$c_e$e_r$r.arcs.a$a_l$l.links.fa \
190+
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.$(reads).c$c_e$e_r$r.arcs.a$a_l$l.links.fa \
166191
$(draft).tigmint.arcs.fa
167192

168193
# Calculate assembly metrics after ARCS
169194
arcs_metrics: \
170-
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.$(reads).c$c_e$e_r$r.arcs.a$a_l$l.links.abyss-fac.tsv \
171-
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.$(reads).c$c_e$e_r$r.arcs.a$a_l$l.links.scaftigs.abyss-fac.tsv \
172-
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.$(reads).c$c_e$e_r$r.arcs.a$a_l$l.links.scaftigs.$(ref).samtobreak.tsv
195+
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.$(reads).c$c_e$e_r$r.arcs.a$a_l$l.links.abyss-fac.tsv \
196+
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.$(reads).c$c_e$e_r$r.arcs.a$a_l$l.links.scaftigs.abyss-fac.tsv \
197+
$(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.$(reads).c$c_e$e_r$r.arcs.a$a_l$l.links.scaftigs.$(ref).samtobreak.tsv
173198

174199
# Symlink the Tigmint results
175-
$(draft).tigmint.fa: $(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.fa
200+
$(draft).tigmint.fa: $(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.fa
176201
ln -sf $< $@
177202

178203
# Symlink the long read Tigmint results
179-
$(draft).cut$(cut).tigmint.fa: $(draft).$(reads).cut$(cut).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.fa
204+
$(draft).cut$(cut).tigmint.fa: $(draft).$(reads).cut$(cut).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.fa
180205
ln -sf $< $@
181206

182207
# Symlink the ARCS results
183-
$(draft).tigmint.arcs.fa: $(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).trim$(trim).window$(window).span$(span).breaktigs.$(reads).c$c_e$e_r$r.arcs.a$a_l$l.links.fa
208+
$(draft).tigmint.arcs.fa: $(draft).$(reads).as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).trim$(trim).window$(window).span$(span).breaktigs.$(reads).c$c_e$e_r$r.arcs.a$a_l$l.links.fa
184209
ln -sf $< $@
185210

186211
# BWA
@@ -207,30 +232,14 @@ ifeq ($G, 0)
207232
endif
208233
endif
209234

210-
# Segment long reads from gzipped fasta file, optionally calculating tigmint-long parameters.
211-
$(reads).cut$(cut).fq.gz: $(longreads) check_span_g
212-
ifeq ($(span), auto)
213-
ifeq ($(dist), auto)
214-
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) -g$G -s -d -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
215-
else
216-
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) -g$G -s -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
217-
endif
218-
else
219-
ifeq ($(dist), auto)
220-
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) -d -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
221-
else
222-
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) | $(gzip) > $@
223-
endif
224-
endif
225-
226235
# samtools
227236

228237
# Index a FASTA file.
229238
%.fa.fai: %.fa
230239
samtools faidx $<
231240

232241
# Create molecule extents BED
233-
%.as$(as).nm$(nm).molecule.size$(minsize).bed: %.sortbx.bam
242+
%.as$(as).nm$(nm).molecule.size$(minsize).dist$(dist).bed: %.sortbx.bam
234243
$(gtime) $(bin)/tigmint_molecule.py -a$(as) -n$(nm) -q$(mapq) -d$(dist) -s$(minsize) $< | sort -k1,1 -k2,2n -k3,3n >$@
235244

236245
# Estimate dist parameter for tigmint-long
@@ -240,7 +249,13 @@ $(reads).tigmint-long.params.tsv: $(longreads)
240249
$(bin)/tigmint_estimate_dist.py - -n $(dist_sample) -o $@'
241250

242251
# Create molecule extents BED using cut long reads
243-
$(draft).$(reads).cut$(cut).molecule.size$(minsize).bed: $(longreads) $(draft).fa $(reads).tigmint-long.params.tsv check_span_g
252+
$(draft).$(reads).cut$(cut).molecule.size$(minsize).dist$(dist).bed: $(longreads) $(draft).fa $(reads).tigmint-long.params.tsv
253+
ifeq ($(span), auto)
254+
ifeq ($G, 0)
255+
$(error Must set genome size parameter (G) to calculate span automatically)
256+
endif
257+
endif
258+
244259
ifeq ($(dist), auto)
245260
$(gtime) $(bin)/../src/long-to-linked-pe -l $(cut) -m$(minsize) -g$G -s -b $(reads).barcode-multiplicity.tsv --bx -t$t --fasta -f $(reads).tigmint-long.params.tsv $< | \
246261
minimap2 -y -t$t -x map-$(longmap) --secondary=no $(draft).fa - | \
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.trim0.window1000.span20.breaktigs.fa
1+
test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.dist50000.trim0.window1000.span20.breaktigs.fa
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
test_contig_long.test_longreads.cut500.molecule.size2000.trim0.window1000.span10.breaktigs.fa
1+
test_contig_long.test_longreads.cut500.molecule.size2000.distauto.trim0.window1000.span10.breaktigs.fa

tests/tigmint_test.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,12 @@ def tigmint_pipeline():
6666
outfiles = ["test_contig.fa.amb", "test_contig.fa.ann", "test_contig.fa.bwt",
6767
"test_contig.fa.fai", "test_contig.fa.pac", "test_contig.fa.sa",
6868
"test_contig_long.cut500.tigmint.fa", "test_contig.tigmint.fa", "test_contig_long.fa.fai",
69-
"test_contig_long.test_longreads.cut500.molecule.size2000.bed",
70-
"test_contig_long.test_longreads.cut500.molecule.size2000.trim0.window1000.span10.breaktigs.fa",
71-
"test_contig_long.test_longreads.cut500.molecule.size2000.trim0.window1000.span10.breaktigs.fa.bed",
72-
"test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.bed",
73-
"test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.trim0.window1000.span20.breaktigs.fa",
74-
"test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.trim0.window1000.span20.breaktigs.fa.bed",
69+
"test_contig_long.test_longreads.cut500.molecule.size2000.distauto.bed",
70+
"test_contig_long.test_longreads.cut500.molecule.size2000.distauto.trim0.window1000.span10.breaktigs.fa",
71+
"test_contig_long.test_longreads.cut500.molecule.size2000.distauto.trim0.window1000.span10.breaktigs.fa.bed",
72+
"test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.dist50000.bed",
73+
"test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.dist50000.trim0.window1000.span20.breaktigs.fa",
74+
"test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.dist50000.trim0.window1000.span20.breaktigs.fa.bed",
7575
"test_contig.test_linkedreads.sortbx.bam",
7676
"test_longreads.cut500.fq.gz", "test_longreads.tigmint-long.span.txt",
7777
"test_longreads.tigmint-long.params.tsv"]
@@ -231,9 +231,9 @@ def test_pipeline(tigmint_pipeline):
231231
assert exp_alignment == obs_bam[i]
232232

233233
# Other readable files
234-
tigmint_outputs = ["test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.bed",
235-
"test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.trim0.window1000.span20.breaktigs.fa",
236-
"test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.trim0.window1000.span20.breaktigs.fa.bed"]
234+
tigmint_outputs = ["test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.dist50000.bed",
235+
"test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.dist50000.trim0.window1000.span20.breaktigs.fa",
236+
"test_contig.test_linkedreads.as0.65.nm5.molecule.size2000.dist50000.trim0.window1000.span20.breaktigs.fa.bed"]
237237
for output in tigmint_outputs:
238238
expected_output = "expected_outputs/" + output
239239
with open(expected_output) as exp:
@@ -251,9 +251,9 @@ def test_pipeline(tigmint_pipeline):
251251

252252
# Other output files
253253
tigmint_long_outputs = ["test_longreads.tigmint-long.params.tsv",
254-
"test_contig_long.test_longreads.cut500.molecule.size2000.bed",
255-
"test_contig_long.test_longreads.cut500.molecule.size2000.trim0.window1000.span10.breaktigs.fa",
256-
"test_contig_long.test_longreads.cut500.molecule.size2000.trim0.window1000.span10.breaktigs.fa.bed"]
254+
"test_contig_long.test_longreads.cut500.molecule.size2000.distauto.bed",
255+
"test_contig_long.test_longreads.cut500.molecule.size2000.distauto.trim0.window1000.span10.breaktigs.fa",
256+
"test_contig_long.test_longreads.cut500.molecule.size2000.distauto.trim0.window1000.span10.breaktigs.fa.bed"]
257257
for output in tigmint_long_outputs:
258258
expected_output = "expected_outputs/" + output
259259
with open(expected_output) as exp:

0 commit comments

Comments
 (0)