Skip to content

Commit 5f88a3a

Browse files
committed
added keep, refactored mity call, normalise, util to use classes
1 parent 485e8cd commit 5f88a3a

File tree

10 files changed

+110
-655
lines changed

10 files changed

+110
-655
lines changed

Dockerfile.dev

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,26 @@ ARG TAG
66
# Install freebayes dependencies
77
RUN apt-get update -yqq && \
88
apt-get install -yqq \
9-
make build-essential libssl-dev zlib1g-dev libbz2-dev \
10-
libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \
11-
xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git tabix \
12-
freebayes && \
9+
make build-essential libssl-dev zlib1g-dev libbz2-dev \
10+
libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \
11+
xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git tabix \
12+
freebayes && \
1313
apt-get clean
1414

1515
# Install gsort
1616
RUN \
1717
cd /usr/local/bin && \
1818
curl -s https://api.github.com/repos/brentp/gsort/releases/latest \
19-
| grep browser_download_url \
20-
| grep -i $(uname) \
21-
| cut -d '"' -f 4 \
22-
| wget -O gsort -qi - && \
19+
| grep browser_download_url \
20+
| grep -i $(uname) \
21+
| cut -d '"' -f 4 \
22+
| wget -O gsort -qi - && \
2323
chmod +x gsort
2424

2525
# Install mity from dev server (but first install the previous version to get the dependencies from pypi)
2626
# RUN pip install mitywgs==0.4.0
2727
# mitywgs 0.4.0 requires pyvcf which doesn't allow testing of python 3.10.6 so here I'm installing dependencies manually
28-
RUN pip install pandas xlsxwriter pyfastx scipy pysam
28+
RUN pip install pandas xlsxwriter pyfastx scipy pysam pyyaml
2929
RUN pip install -i https://test.pypi.org/simple/ mitywgs-test==${TAG}
3030

3131
WORKDIR /home

mitylib/call.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def set_mity_cmd(self):
157157

158158
mity_cmd += " " + " ".join(self.files)
159159
mity_cmd += '"'
160-
mity_cmd = mity_cmd.replace("/", "\/")
160+
mity_cmd = mity_cmd.replace("/", "\\/")
161161

162162
logger.debug(mity_cmd)
163163

@@ -215,26 +215,32 @@ def run_checks(self):
215215

216216
def bam_has_rg(self, bam):
217217
"""
218-
Does the BAM or CRAM File have an @RG header? This is critical for mity
219-
to correctly call variants.
218+
Check whether a BAM or CRAM file contains a valid @RG header, which is critical for accurate variant calling with mity.
220219
221-
:param bam: str: path to bam or cram file
222-
:return: True/False
223-
>>> bam_has_RG('NA12878.alt_bwamem_GRCh38DH.20150718.CEU.low_coverage.chrM.bam')
220+
Parameters:
221+
- bam (str): Path to a BAM or CRAM file.
222+
223+
Returns:
224+
- bool: True if the file has a valid @RG header, False otherwise.
224225
"""
225226
r = pysam.AlignmentFile(bam, "rb")
226227
return len(r.header["RG"]) > 0
227228

228229
def bam_get_mt_contig(self, bam, as_string=False):
229230
"""
230-
get the mitochondrial contig name and length from a BAM file
231-
:param bam: path to a bam or cram file
232-
:return: a tuple of contig name as str and length as int
233-
234-
>>> bam_get_mt_contig('NA12878.alt_bwamem_GRCh38DH.20150718.CEU.low_coverage.chrM.bam', False)
235-
('chrM', 16569)
236-
>>> bam_get_mt_contig('NA12878.alt_bwamem_GRCh38DH.20150718.CEU.low_coverage.chrM.bam', True)
237-
'chrM:1-16569'
231+
Retrieve mitochondrial contig information from a BAM or CRAM file.
232+
233+
Parameters:
234+
- bam (str): Path to a BAM or CRAM file.
235+
236+
Keyword Arguments:
237+
- with_coordinates (bool): If True, the result includes the contig
238+
name with coordinates (e.g., 'chrM:1-16569'). If False, it returns a
239+
tuple containing the contig name (str) and its length (int).
240+
241+
Returns:
242+
- If with_coordinates is False, a tuple (contig_name, contig_length).
243+
- If with_coordinates is True, a string with coordinates.
238244
"""
239245
r = pysam.AlignmentFile(bam, "rb")
240246
chroms = [str(record.get("SN")) for record in r.header["SQ"]]

mitylib/commands.py

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
55
66
Usage: See the online manual for details: http://github.com/KCCG/mity
7-
Authors: Clare Puttick, Mark Cowley
7+
Authors: Clare Puttick, Mark Cowley, Trent Zeng, Christian Fares
88
License: MIT
99
"""
1010
import argparse
@@ -13,13 +13,6 @@
1313
from mitylib import call, normalise, report, merge, util
1414
from ._version import __version__
1515

16-
__all__ = []
17-
18-
19-
def public(fn):
20-
__all__.append(fn.__name__)
21-
return fn
22-
2316

2417
usage = __doc__.split("\n\n\n", maxsplit=1)
2518
usage[-1] += "Version: " + __version__
@@ -34,8 +27,6 @@ def public(fn):
3427

3528
# call -------------------------------------------------------------------------
3629

37-
do_call = public(call.do_call)
38-
3930

4031
def _cmd_call(args):
4132
"""Call mitochondrial variants"""
@@ -157,7 +148,8 @@ def _cmd_call(args):
157148
action="store",
158149
type=str,
159150
default=None,
160-
help="A text file of BAM files to be processed. The path to each file should be on one row per Region of MT genome to call variants in. "
151+
help="A text file of BAM files to be processed. The path to each file should be"
152+
"on one row per Region of MT genome to call variants in. "
161153
"If unset will call variants in entire MT genome as specified in BAM header. "
162154
"Default: Entire MT genome. ",
163155
dest="region",
@@ -166,8 +158,6 @@ def _cmd_call(args):
166158

167159
# normalise --------------------------------------------------------------------
168160

169-
do_normalise = public(normalise.do_normalise)
170-
171161

172162
def _cmd_normalise(args):
173163
"""Normalise & FILTER mitochondrial variants"""
@@ -177,13 +167,14 @@ def _cmd_normalise(args):
177167
genome = util.MityUtil.select_reference_genome(args.reference, None)
178168
args.reference = util.MityUtil.select_reference_fasta(args.reference, None)
179169

180-
normalise.do_normalise(
170+
normalise.Normalise(
181171
args.debug,
182172
args.vcf,
183173
args.reference,
184174
genome,
185175
args.outfile,
186176
args.allsamples,
177+
args.keep,
187178
p=args.p,
188179
)
189180

@@ -206,6 +197,13 @@ def _cmd_normalise(args):
206197
required=False,
207198
help="PASS in the filter requires all samples to pass instead of just one",
208199
)
200+
P_normalise.add_argument(
201+
"-k",
202+
"--keep",
203+
action="store_true",
204+
required=False,
205+
help="Keep all intermediate files",
206+
)
209207
P_normalise.add_argument(
210208
"--p",
211209
action="store",
@@ -227,15 +225,13 @@ def _cmd_normalise(args):
227225

228226
# report -----------------------------------------------------------------------
229227

230-
do_report = public(report.do_report)
231-
232228

233229
def _cmd_report(args):
234230
"""Generate mity report"""
235231
logging.info("mity %s", __version__)
236232
logging.info("Generating mity report")
237-
report.do_report(
238-
args.debug, args.vcf, args.prefix, args.min_vaf, args.out_folder_path
233+
report.Report(
234+
args.debug, args.vcf, args.prefix, args.min_vaf, args.out_folder_path, args.keep
239235
)
240236

241237

@@ -265,13 +261,18 @@ def _cmd_report(args):
265261
P_report.add_argument(
266262
"vcf", action="append", nargs="+", help="mity vcf files to create a report from"
267263
)
264+
P_report.add_argument(
265+
"-k",
266+
"--keep",
267+
action="store_true",
268+
required=False,
269+
help="Keep all intermediate files",
270+
)
268271
P_report.set_defaults(func=_cmd_report)
269272

270273

271274
# merge -----------------------------------------------------------------------
272275

273-
do_merge = public(merge.do_merge)
274-
275276

276277
def _cmd_merge(args):
277278
"""Merging mity VCF with nuclear VCF"""

mitylib/mity

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,17 @@ License: MIT
99
"""
1010

1111
import logging
12-
from mitylib import commands
13-
from mitylib import util
1412
import sys
13+
from mitylib import commands
1514

1615
assert sys.version_info >= (3, 7, 4)
1716

17+
1818
def main():
19+
"""
20+
Entry function for mity.
21+
"""
1922
logging.basicConfig(level=logging.INFO, format="%(message)s")
20-
util.check_dependencies()
2123

2224
args = commands.parse_args()
2325

@@ -27,5 +29,5 @@ def main():
2729
args.func(args)
2830

2931

30-
if __name__ == '__main__':
32+
if __name__ == "__main__":
3133
main()

mitylib/normalise.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@
22

33
import subprocess
44
import logging
5-
from .util import MityUtil
6-
from scipy.stats import binom
5+
import os.path
76
from math import isinf
7+
from scipy.stats import binom
88
import numpy as np
9-
import configparser
109
import pysam
1110
import pysam.bcftools
12-
import os.path
11+
from mitylib.util import MityUtil
1312

1413

1514
# LOGGING
@@ -37,6 +36,7 @@ def __init__(
3736
genome,
3837
output_file=None,
3938
allsamples=False,
39+
keep=False,
4040
p=P_VAL,
4141
):
4242
self.debug = debug
@@ -45,6 +45,7 @@ def __init__(
4545
self.genome = genome
4646
self.output_file = output_file
4747
self.allsamples = allsamples
48+
self.keep = keep
4849
self.p = p
4950

5051
self.normalised_vcf_obj = None
@@ -88,9 +89,10 @@ def run(self):
8889
self.do_gsort()
8990
MityUtil.tabix(output_file)
9091

91-
# remove temporary files
92-
os.remove(self.filtered_vcf_name)
93-
os.remove(self.normalised_vcf_name)
92+
if not self.keep:
93+
# remove temporary files
94+
os.remove(self.filtered_vcf_name)
95+
os.remove(self.normalised_vcf_name)
9496

9597
def run_bcftools_norm(self):
9698
"""
@@ -210,25 +212,14 @@ def add_headers(self):
210212

211213
def mity_qual(self, AO, DP, p):
212214
"""
213-
Compute variant quality
214-
:param AO: (int) number of alternative reads
215-
:param DP: (int) total read depth
216-
:return: (float) phred-scaled quality score
217-
218-
>>> [mity_qual(x,10) for x in [1,2,3,4,5,6,7,8,9,10]]
219-
'[37.49, 60.22, 84.78, 110.97, 138.75, 168.16, 199.4, 232.92, 269.9, 296.89]'
220-
>>> [mity_qual(x,100) for x in [5,10,15,20,25,30,35,40,45,50]]
221-
'[71.87, 156.08, 251.23, 354.34, 463.9, 579.05, 699.21, 824.04, 953.32, 1086.94]'
222-
>>> [mity_qual(x,1000) for x in [5,10,15,20,25,30,35,40,45,50]]
223-
'[17.84, 50.97, 93.59, 142.89, 197.36, 256.02, 318.25, 383.57, 451.61, 522.1]'
224-
>>> [mity_qual(x,10000) for x in [5,10,15,20,25,30,35,40,45,50]]
225-
'[0.0, 0.05, 0.74, 3.56, 9.51, 18.73, 31.0, 46.04, 63.57, 83.37]'
226-
>>> mity_qual(118,118)
227-
'3211.76'
228-
>>> mity_qual(119,119)
229-
'3220'
230-
>>> mity_qual(10000,10000)
231-
'3220'
215+
Calculate the Phred-scaled quality score for a genetic variant.
216+
217+
Parameters:
218+
- AO (int): Number of alternative reads.
219+
- DP (int): Total read depth.
220+
221+
Returns:
222+
- float: Phred-scaled quality score.
232223
"""
233224
q = 0.0
234225
AO = int(AO)
@@ -248,6 +239,15 @@ def mity_qual(self, AO, DP, p):
248239
return q
249240

250241
def add_filter(self, variant):
242+
"""
243+
Adds filter to variant, and filters for samples.
244+
245+
Parameters:
246+
- variant (VariantRecord): Variant to add filters to
247+
248+
Returns:
249+
- VariantRecord: Variant with filters added
250+
"""
251251
# filter dictionary
252252
# each value represents the number of samples that pass each field
253253
# starts with all samples passing
@@ -391,5 +391,5 @@ def do_gsort(self):
391391
"""
392392
Run gsort.
393393
"""
394-
gsort_cmd = f"gsort {self.filtered_vcf_name} {self.genome} | bgzip -cf > {self.gsorted_name}"
394+
gsort_cmd = f"gsort {self.filtered_vcf_name} {self.genome} | bgzip -cf > {self.output_file}"
395395
subprocess.run(gsort_cmd, shell=True, check=False)

0 commit comments

Comments
 (0)