From ab353f845d13e0ef2904b58f30ce8b3476dde02e Mon Sep 17 00:00:00 2001 From: Sam W Date: Thu, 23 Jan 2025 12:29:31 +0000 Subject: [PATCH] Temporarily fix high --normalisation issue until maptide refactor (#157) --- artic/align_trim.py | 54 ++++++++++++++++++++++++++++++---------- artic/make_depth_mask.py | 11 ++++++-- artic/minion.py | 13 ++++++++-- artic/pipeline.py | 2 +- setup.cfg | 2 +- 5 files changed, 63 insertions(+), 19 deletions(-) diff --git a/artic/align_trim.py b/artic/align_trim.py index 882b68e1..4a77c1aa 100755 --- a/artic/align_trim.py +++ b/artic/align_trim.py @@ -39,14 +39,18 @@ def find_primer(bed, pos, direction, chrom, threshold=35): primer_distances = [ (abs(p["start"] - pos), p["start"] - pos, p) for p in bed - if (p["direction"] == direction) and (pos >= (p["start"] - threshold)) and chrom == p["chrom"] + if (p["direction"] == direction) + and (pos >= (p["start"] - threshold)) + and chrom == p["chrom"] ] else: primer_distances = [ (abs(p["end"] - pos), p["end"] - pos, p) for p in bed - if (p["direction"] == direction) and (pos <= (p["end"] + threshold)) and chrom == p["chrom"] + if (p["direction"] == direction) + and (pos <= (p["end"] + threshold)) + and chrom == p["chrom"] ] if not primer_distances: @@ -206,9 +210,21 @@ def handle_segment( # locate the nearest primers to this alignment segment # p1 = find_primer(bed, segment.reference_start, "+", segment.reference_name, args.primer_match_threshold) - p1 = find_primer(bed=bed, pos=segment.reference_start, direction="+", chrom=segment.reference_name, threshold=args.primer_match_threshold) + p1 = find_primer( + bed=bed, + pos=segment.reference_start, + direction="+", + chrom=segment.reference_name, + threshold=args.primer_match_threshold, + ) # p2 = find_primer(bed, segment.reference_end, "-", segment.reference_name, args.primer_match_threshold) - p2 = find_primer(bed=bed, pos=segment.reference_end, direction="-", chrom=segment.reference_name, threshold=args.primer_match_threshold) + p2 = find_primer( + bed=bed, + pos=segment.reference_end, + direction="-", + chrom=segment.reference_name, + threshold=args.primer_match_threshold, + ) if not p1 or not p2: if args.verbose: @@ -361,7 +377,9 @@ def generate_amplicons(bed: list): for chrom, amplicons_dict in amplicons.items(): for amplicon in amplicons_dict: if not all([x in amplicons_dict[amplicon] for x in ["p_start", "p_end"]]): - raise ValueError(f"Primer scheme for amplicon {amplicon} for reference {chrom} is incomplete") + raise ValueError( + f"Primer scheme for amplicon {amplicon} for reference {chrom} is incomplete" + ) # Check if primer runs accross reference start / end -> circular virus amplicons_dict[amplicon]["circular"] = ( @@ -408,7 +426,9 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool = (amplicons[chrom][amplicon]["length"],), normalise, dtype=int ) - amplicon_depth = np.zeros((amplicons[chrom][amplicon]["length"],), dtype=int) + amplicon_depth = np.zeros( + (amplicons[chrom][amplicon]["length"],), dtype=int + ) if not segments: if verbose: @@ -425,12 +445,16 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool = for segment in segments: test_depths = np.copy(amplicon_depth) - relative_start = segment.reference_start - amplicons[chrom][amplicon]["p_start"] + relative_start = ( + segment.reference_start - amplicons[chrom][amplicon]["p_start"] + ) if relative_start < 0: relative_start = 0 - relative_end = segment.reference_end - amplicons[chrom][amplicon]["p_start"] + relative_end = ( + segment.reference_end - amplicons[chrom][amplicon]["p_start"] + ) test_depths[relative_start:relative_end] += 1 @@ -442,7 +466,7 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool = output_segments.append(segment) mean_depths[(chrom, amplicon)] = np.mean(amplicon_depth) - + return output_segments, mean_depths @@ -519,7 +543,9 @@ def go(args): trimmed_segments[trimmed_segment.reference_name].setdefault(amplicon, []) if trimmed_segment: - trimmed_segments[trimmed_segment.reference_name][amplicon].append(trimmed_segment) + trimmed_segments[trimmed_segment.reference_name][amplicon].append( + trimmed_segment + ) # normalise if requested if args.normalise: @@ -536,10 +562,12 @@ def go(args): for output_segment in output_segments: outfile.write(output_segment) + else: - for amplicon, segments in trimmed_segments.items(): - for segment in segments: - outfile.write(segment) + for chrom, amplicon_dict in trimmed_segments.items(): + for amplicon, segments in amplicon_dict.items(): + for segment in segments: + outfile.write(segment) # close up the file handles infile.close() diff --git a/artic/make_depth_mask.py b/artic/make_depth_mask.py index 579eca18..6ec6788b 100755 --- a/artic/make_depth_mask.py +++ b/artic/make_depth_mask.py @@ -65,9 +65,13 @@ def collect_depths(bamfile, refName, minDepth, ignoreDeletions, warnRGcov): # generate the pileup for pileupcolumn in bamFile.pileup( - refName, start=0, stop=bamFile.get_reference_length(refName), max_depth=10000, truncate=False, min_base_quality=0 + refName, + start=0, + stop=bamFile.get_reference_length(refName), + max_depth=100000000, + truncate=False, + min_base_quality=0, ): - # process the pileup column for pileupread in pileupcolumn.pileups: @@ -78,13 +82,16 @@ def collect_depths(bamfile, refName, minDepth, ignoreDeletions, warnRGcov): # process the pileup read if pileupread.is_refskip: continue + if pileupread.is_del: if not ignoreDeletions: depths[pileupcolumn.pos] += 1 rgDepths[rg][pileupcolumn.pos] += 1 + elif not pileupread.is_del: depths[pileupcolumn.pos] += 1 rgDepths[rg][pileupcolumn.pos] += 1 + else: raise Exception("unhandled pileup read encountered") diff --git a/artic/minion.py b/artic/minion.py index 33b19728..c5cba46d 100755 --- a/artic/minion.py +++ b/artic/minion.py @@ -44,6 +44,15 @@ def run(parser, args): ) raise SystemExit(1) + if args.normalise > 10000: + print( + colored.red( + "--normalise appears to be an aribtrarily high value, if you wish to not normalise your BAM files set '--normalise 0' to disable normalisation entirely" + ), + file=sys.stderr, + ) + raise SystemExit(1) + if args.model_dir: model_path = args.model_dir else: @@ -169,7 +178,7 @@ def run(parser, args): cmds.append(f"samtools index {args.sample}.{p}.trimmed.rg.sorted.bam") cmds.append( - f"run_clair3.sh --enable_long_indel --chunk_size=10000 --haploid_precise --no_phasing_for_fa --bam_fn='{args.sample}.{p}.trimmed.rg.sorted.bam' --ref_fn='{ref}' --output='{args.sample}_rg_{p}' --threads='{args.threads}' --platform='ont' --model_path='{full_model_path}' --include_all_ctgs --min_coverage='{args.min_depth}'" + f"run_clair3.sh --enable_long_indel --chunk_size=10000 --haploid_precise --no_phasing_for_fa --bam_fn='{args.sample}.{p}.trimmed.rg.sorted.bam' --ref_fn='{ref}' --output='{args.sample}_rg_{p}' --threads='{args.threads}' --platform='ont' --model_path='{full_model_path}' --include_all_ctgs" ) cmds.append( @@ -199,7 +208,7 @@ def run(parser, args): # 9) get the depth of coverage for each readgroup, create a coverage mask and plots, and add failed variants to the coverage mask (artic_mask must be run before bcftools consensus) cmds.append( - f"artic_make_depth_mask --store-rg-depths {ref} {args.sample}.primertrimmed.rg.sorted.bam {args.sample}.coverage_mask.txt" + f"artic_make_depth_mask --depth {args.min_depth} --store-rg-depths {ref} {args.sample}.primertrimmed.rg.sorted.bam {args.sample}.coverage_mask.txt" ) cmds.append( diff --git a/artic/pipeline.py b/artic/pipeline.py index f1f012ea..55098f83 100755 --- a/artic/pipeline.py +++ b/artic/pipeline.py @@ -102,7 +102,7 @@ def init_pipeline_parser(): "--min-depth", type=int, default=20, - help="Minimum depth required to call a variant (default: %(default)d)", + help="Minimum coverage required for a position to be included in the consensus sequence (default: %(default)d)", ) parser_minion.add_argument( "--threads", diff --git a/setup.cfg b/setup.cfg index 5022515c..2f2a1c61 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = artic -version = 1.6.0 +version = 1.6.1 author = Nick Loman author_email = n.j.loman@bham.ac.uk maintainer = Sam Wilkinson