Skip to content

Commit 0294814

Browse files
committed
added some filter settings to CLI
1 parent a090ee9 commit 0294814

File tree

1 file changed

+29
-11
lines changed

1 file changed

+29
-11
lines changed

script/isovar-translate-variants.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import varcode
2222
import skbio
23+
import numpy as np
2324
from pysam import AlignmentFile
2425

2526
from isovar import gather_variant_reads, sequence_counts
@@ -39,13 +40,19 @@
3940
default=None)
4041

4142
parser.add_argument(
42-
"--min-count", type=int, default=3)
43+
"--min-read-count",
44+
type=int,
45+
default=3)
4346

4447
parser.add_argument(
45-
"--context-size",
46-
default=45,
48+
"--sequence-length",
49+
default=105,
4750
type=int)
4851

52+
parser.add_argument(
53+
"--max-sequences-per-variant",
54+
type=int,
55+
default=5)
4956

5057
if __name__ == "__main__":
5158
args = parser.parse_args()
@@ -54,31 +61,42 @@
5461
samfile = AlignmentFile(args.bam)
5562

5663
for variant in variants:
64+
print(variant)
5765
variant_reads = gather_variant_reads(
5866
samfile=samfile,
5967
chromosome="chr" + variant.contig,
6068
base1_location=variant.start,
6169
ref=variant.ref,
6270
alt=variant.alt)
63-
if len(variant_reads) < args.min_count:
71+
if len(variant_reads) < args.min_read_count:
6472
continue
73+
74+
# the number of context nucleotides on either side of the variant
75+
# is half the desired length (minus the number of variant nucleotides)
76+
context_size = int(
77+
np.ceil((args.sequence_length - len(variant.alt)) / 2.0))
6578
sequence_count_info = sequence_counts(
66-
variant_reads, context_size=args.context_size)
67-
for ((prefix, suffix), count) in sorted(
79+
variant_reads,
80+
context_size=context_size)
81+
for i, ((prefix, suffix), count) in enumerate(sorted(
6882
sequence_count_info.full_read_counts.items(),
69-
key=lambda x: -x[1]):
70-
if count < args.min_count:
83+
key=lambda x: -x[1])):
84+
if i >= args.max_sequences_per_variant:
7185
break
7286

73-
variant = sequence_count_info.variant_nucleotides
87+
if count < args.min_read_count:
88+
break
89+
90+
variant_seq = sequence_count_info.variant_nucleotides
91+
7492
print("\t%s_%s_%s: %d" % (
7593
prefix,
76-
variant,
94+
variant_seq,
7795
suffix,
7896
count))
7997

8098
# translate in three reading frames:
81-
seq = "%s%s%s" % (prefix, variant, suffix)
99+
seq = "%s%s%s" % (prefix, variant_seq, suffix)
82100
for offset in range(3):
83101
dna = skbio.DNA(seq[offset:])
84102
print("\t\tframe=%d: %s" % (offset, dna.translate()))

0 commit comments

Comments
 (0)