Skip to content

Commit

Permalink
Fix --pick ac
Browse files Browse the repository at this point in the history
Some base/comp variants were being dropped
  • Loading branch information
ACEnglish committed Feb 13, 2025
1 parent 6051b02 commit e6afa7e
Show file tree
Hide file tree
Showing 13 changed files with 57 additions and 47 deletions.
4 changes: 2 additions & 2 deletions imgs/coverage.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 6 additions & 6 deletions imgs/pylint.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified repo_utils/answer_key/bench/bench12_gtcomp/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench12_gtcomp/fn.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench12_gtcomp/fp.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench12_gtcomp/fp.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/tp-base.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/tp-base.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_gtcomp_edgecase1/fn.vcf.gz
Binary file not shown.
Binary file not shown.
87 changes: 48 additions & 39 deletions truvari/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,56 +650,65 @@ def pick_ac_matches(match_matrix):
match_matrix = np.ravel(match_matrix)
match_matrix.sort()
used_comp = Counter()
written_comp = defaultdict(bool)
used_base = Counter()
written_base = defaultdict(bool)

# Make a pass for TPs only
for match in match_matrix[::-1]:
# No more matches to find
if base_cnt == 0 and comp_cnt == 0:
break
if not match.state:
continue

b_key = str(match.base)
c_key = str(match.comp)
# This is a trick

base_is_used = used_base[b_key] >= match.base_gt_count
comp_is_used = used_comp[c_key] >= match.comp_gt_count
# Only write the comp (FP)
if base_cnt == 0 and not comp_is_used:
if base_is_used or comp_is_used:
match.state = False
match.multi = True
continue

# How often this variant has been used is updated
used_comp[c_key] += match.base_gt_count
used_base[b_key] += match.comp_gt_count

# Only write once
if not written_comp[c_key]:
written_comp[c_key] = True
to_process = copy.copy(match)
to_process.base = None
to_process.multi = to_process.state
to_process.state = False
comp_cnt -= 1
if used_comp[c_key] == 0: # Only write as F if it hasn't been a T
ret.append(to_process)
used_comp[c_key] = 9
# Only write the base (FN)
elif comp_cnt == 0 and not base_is_used:
ret.append(to_process)
if not written_base[b_key]:
written_base[b_key] = True
to_process = copy.copy(match)
to_process.comp = None
to_process.multi = to_process.state
to_process.state = False
base_cnt -= 1
if used_base[b_key] == 0: # Only write as F if it hasn't been a T
ret.append(to_process)
used_base[b_key] = 9
# Write both (any state)
elif not base_is_used and not comp_is_used:
ret.append(to_process)

# Make a pass for all remaining FP/FNs
for match in match_matrix[::-1]:
if match.state:
continue

b_key = str(match.base)
c_key = str(match.comp)

# Only write once
if not written_comp[c_key]:
written_comp[c_key] = True
to_process = copy.copy(match)
to_process.base = None
ret.append(to_process)

if not written_base[b_key]:
written_base[b_key] = True
to_process = copy.copy(match)
# Don't write twice
if used_base[b_key] != 0:
to_process.base = None
if used_comp[c_key] != 0:
to_process.comp = None

used_base[b_key] += match.comp_gt_count
used_comp[c_key] += match.base_gt_count
# All used up
if used_base[b_key] >= match.base_gt_count:
base_cnt -= 1
if used_comp[c_key] >= match.comp_gt_count:
comp_cnt -= 1

# Safety edge case check
if to_process.base is not None or to_process.comp is not None:
ret.append(to_process)
to_process.comp = None
ret.append(to_process)

if len(written_comp) >= comp_cnt and len(written_base) >= base_cnt:
break

return ret


Expand Down
1 change: 1 addition & 0 deletions truvari/region_vcf_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def region_filter_fetch(vcf, tree, with_region=False):
chrom, vcf.filename)


# pylint: disable=too-many-statements
def region_filter_stream(vcf, tree, inside=True, with_region=False):
"""
Given a VariantRecord iter and defaultdict(IntervalTree),
Expand Down

0 comments on commit e6afa7e

Please sign in to comment.