Skip to content

Commit

Permalink
mosdepth missing values imputed
Browse files Browse the repository at this point in the history
Changes:
 - Mosdepth output has missing values in *.{region,global}.dist.txt
 - This change to module fills any missing values with the next value
 - e.g., if there is 100% at 100X and 80% at 80X, the value at 90X will be recorded as 80X
 - This may underestimate coverage slightly but it's not clear from MosDepth docs how it should be handled.
 - See brentp/mosdepth#190
  • Loading branch information
Adam Talbot committed Dec 7, 2022
1 parent 45c5408 commit 2d71020
Showing 1 changed file with 22 additions and 4 deletions.
26 changes: 22 additions & 4 deletions multiqc/modules/mosdepth/mosdepth.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,30 @@ def parse_cov_dist(self):
return genstats, cumcov_dist_data, cov_dist_data, xmax, perchrom_avg_data

def genstats_cov_thresholds(self, genstats, genstats_headers, cumcov_dist_data, threshs, hidden_threshs):
def recursive_get_value(d: OrderedDict, t: int) -> int:
"""
If the depth threshold (t) is not in the OrderedDict, iterate up the depth values.
This means the % at the threshold will be estimated slightly lower but prevents zero values.
"""
depths = list(d.keys())
if t in d:
return d[t]
else:
greater_than_t = [x for x in depths if x > t]
if len(greater_than_t) == 0:
# No depths available greater than t
log.debug(f"No values found for threshold {t}, assuming 0%")
return 0
else:
greater_than_t.sort()
return recursive_get_value(d, greater_than_t[0])

for s_name, d in cumcov_dist_data.items():
dist_subset = {t: data for t, data in d.items() if t in threshs}
for t in threshs:
if int(t) in dist_subset:
genstats[s_name][f"{t}_x_pc"] = dist_subset[t]
else:
try:
genstats[s_name][f"{t}_x_pc"] = recursive_get_value(d, int(t))
except KeyError:
# If value doesn't exist, use zero
genstats[s_name][f"{t}_x_pc"] = 0

for t in threshs:
Expand Down

0 comments on commit 2d71020

Please sign in to comment.