From ab353f845d13e0ef2904b58f30ce8b3476dde02e Mon Sep 17 00:00:00 2001
From: Sam W <s.a.j.wilkinson@bham.ac.uk>
Date: Thu, 23 Jan 2025 12:29:31 +0000
Subject: [PATCH] Temporarily fix high --normalisation issue until maptide
 refactor (#157)

---
 artic/align_trim.py      | 54 ++++++++++++++++++++++++++++++----------
 artic/make_depth_mask.py | 11 ++++++--
 artic/minion.py          | 13 ++++++++--
 artic/pipeline.py        |  2 +-
 setup.cfg                |  2 +-
 5 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/artic/align_trim.py b/artic/align_trim.py
index 882b68e1..4a77c1aa 100755
--- a/artic/align_trim.py
+++ b/artic/align_trim.py
@@ -39,14 +39,18 @@ def find_primer(bed, pos, direction, chrom, threshold=35):
         primer_distances = [
             (abs(p["start"] - pos), p["start"] - pos, p)
             for p in bed
-            if (p["direction"] == direction) and (pos >= (p["start"] - threshold)) and chrom == p["chrom"]
+            if (p["direction"] == direction)
+            and (pos >= (p["start"] - threshold))
+            and chrom == p["chrom"]
         ]
 
     else:
         primer_distances = [
             (abs(p["end"] - pos), p["end"] - pos, p)
             for p in bed
-            if (p["direction"] == direction) and (pos <= (p["end"] + threshold)) and chrom == p["chrom"]
+            if (p["direction"] == direction)
+            and (pos <= (p["end"] + threshold))
+            and chrom == p["chrom"]
         ]
 
     if not primer_distances:
@@ -206,9 +210,21 @@ def handle_segment(
 
     # locate the nearest primers to this alignment segment
     # p1 = find_primer(bed, segment.reference_start, "+", segment.reference_name, args.primer_match_threshold)
-    p1 = find_primer(bed=bed, pos=segment.reference_start, direction="+", chrom=segment.reference_name, threshold=args.primer_match_threshold)
+    p1 = find_primer(
+        bed=bed,
+        pos=segment.reference_start,
+        direction="+",
+        chrom=segment.reference_name,
+        threshold=args.primer_match_threshold,
+    )
     # p2 = find_primer(bed, segment.reference_end, "-", segment.reference_name, args.primer_match_threshold)
-    p2 = find_primer(bed=bed, pos=segment.reference_end, direction="-", chrom=segment.reference_name, threshold=args.primer_match_threshold)
+    p2 = find_primer(
+        bed=bed,
+        pos=segment.reference_end,
+        direction="-",
+        chrom=segment.reference_name,
+        threshold=args.primer_match_threshold,
+    )
 
     if not p1 or not p2:
         if args.verbose:
@@ -361,7 +377,9 @@ def generate_amplicons(bed: list):
     for chrom, amplicons_dict in amplicons.items():
         for amplicon in amplicons_dict:
             if not all([x in amplicons_dict[amplicon] for x in ["p_start", "p_end"]]):
-                raise ValueError(f"Primer scheme for amplicon {amplicon} for reference {chrom} is incomplete")
+                raise ValueError(
+                    f"Primer scheme for amplicon {amplicon} for reference {chrom} is incomplete"
+                )
 
             # Check if primer runs accross reference start / end -> circular virus
             amplicons_dict[amplicon]["circular"] = (
@@ -408,7 +426,9 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool =
                 (amplicons[chrom][amplicon]["length"],), normalise, dtype=int
             )
 
-            amplicon_depth = np.zeros((amplicons[chrom][amplicon]["length"],), dtype=int)
+            amplicon_depth = np.zeros(
+                (amplicons[chrom][amplicon]["length"],), dtype=int
+            )
 
             if not segments:
                 if verbose:
@@ -425,12 +445,16 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool =
             for segment in segments:
                 test_depths = np.copy(amplicon_depth)
 
-                relative_start = segment.reference_start - amplicons[chrom][amplicon]["p_start"]
+                relative_start = (
+                    segment.reference_start - amplicons[chrom][amplicon]["p_start"]
+                )
 
                 if relative_start < 0:
                     relative_start = 0
 
-                relative_end = segment.reference_end - amplicons[chrom][amplicon]["p_start"]
+                relative_end = (
+                    segment.reference_end - amplicons[chrom][amplicon]["p_start"]
+                )
 
                 test_depths[relative_start:relative_end] += 1
 
@@ -442,7 +466,7 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool =
                     output_segments.append(segment)
 
             mean_depths[(chrom, amplicon)] = np.mean(amplicon_depth)
-        
+
     return output_segments, mean_depths
 
 
@@ -519,7 +543,9 @@ def go(args):
         trimmed_segments[trimmed_segment.reference_name].setdefault(amplicon, [])
 
         if trimmed_segment:
-            trimmed_segments[trimmed_segment.reference_name][amplicon].append(trimmed_segment)
+            trimmed_segments[trimmed_segment.reference_name][amplicon].append(
+                trimmed_segment
+            )
 
     # normalise if requested
     if args.normalise:
@@ -536,10 +562,12 @@ def go(args):
 
         for output_segment in output_segments:
             outfile.write(output_segment)
+
     else:
-        for amplicon, segments in trimmed_segments.items():
-            for segment in segments:
-                outfile.write(segment)
+        for chrom, amplicon_dict in trimmed_segments.items():
+            for amplicon, segments in amplicon_dict.items():
+                for segment in segments:
+                    outfile.write(segment)
 
     # close up the file handles
     infile.close()
diff --git a/artic/make_depth_mask.py b/artic/make_depth_mask.py
index 579eca18..6ec6788b 100755
--- a/artic/make_depth_mask.py
+++ b/artic/make_depth_mask.py
@@ -65,9 +65,13 @@ def collect_depths(bamfile, refName, minDepth, ignoreDeletions, warnRGcov):
 
     # generate the pileup
     for pileupcolumn in bamFile.pileup(
-        refName, start=0, stop=bamFile.get_reference_length(refName), max_depth=10000, truncate=False, min_base_quality=0
+        refName,
+        start=0,
+        stop=bamFile.get_reference_length(refName),
+        max_depth=100000000,
+        truncate=False,
+        min_base_quality=0,
     ):
-
         # process the pileup column
         for pileupread in pileupcolumn.pileups:
 
@@ -78,13 +82,16 @@ def collect_depths(bamfile, refName, minDepth, ignoreDeletions, warnRGcov):
             # process the pileup read
             if pileupread.is_refskip:
                 continue
+
             if pileupread.is_del:
                 if not ignoreDeletions:
                     depths[pileupcolumn.pos] += 1
                     rgDepths[rg][pileupcolumn.pos] += 1
+
             elif not pileupread.is_del:
                 depths[pileupcolumn.pos] += 1
                 rgDepths[rg][pileupcolumn.pos] += 1
+
             else:
                 raise Exception("unhandled pileup read encountered")
 
diff --git a/artic/minion.py b/artic/minion.py
index 33b19728..c5cba46d 100755
--- a/artic/minion.py
+++ b/artic/minion.py
@@ -44,6 +44,15 @@ def run(parser, args):
         )
         raise SystemExit(1)
 
+    if args.normalise > 10000:
+        print(
+            colored.red(
+                "--normalise appears to be an aribtrarily high value, if you wish to not normalise your BAM files set '--normalise 0' to disable normalisation entirely"
+            ),
+            file=sys.stderr,
+        )
+        raise SystemExit(1)
+
     if args.model_dir:
         model_path = args.model_dir
     else:
@@ -169,7 +178,7 @@ def run(parser, args):
             cmds.append(f"samtools index {args.sample}.{p}.trimmed.rg.sorted.bam")
 
             cmds.append(
-                f"run_clair3.sh --enable_long_indel --chunk_size=10000 --haploid_precise --no_phasing_for_fa --bam_fn='{args.sample}.{p}.trimmed.rg.sorted.bam' --ref_fn='{ref}' --output='{args.sample}_rg_{p}' --threads='{args.threads}' --platform='ont' --model_path='{full_model_path}' --include_all_ctgs --min_coverage='{args.min_depth}'"
+                f"run_clair3.sh --enable_long_indel --chunk_size=10000 --haploid_precise --no_phasing_for_fa --bam_fn='{args.sample}.{p}.trimmed.rg.sorted.bam' --ref_fn='{ref}' --output='{args.sample}_rg_{p}' --threads='{args.threads}' --platform='ont' --model_path='{full_model_path}' --include_all_ctgs"
             )
 
             cmds.append(
@@ -199,7 +208,7 @@ def run(parser, args):
 
     # 9) get the depth of coverage for each readgroup, create a coverage mask and plots, and add failed variants to the coverage mask (artic_mask must be run before bcftools consensus)
     cmds.append(
-        f"artic_make_depth_mask --store-rg-depths {ref} {args.sample}.primertrimmed.rg.sorted.bam {args.sample}.coverage_mask.txt"
+        f"artic_make_depth_mask --depth {args.min_depth} --store-rg-depths {ref} {args.sample}.primertrimmed.rg.sorted.bam {args.sample}.coverage_mask.txt"
     )
 
     cmds.append(
diff --git a/artic/pipeline.py b/artic/pipeline.py
index f1f012ea..55098f83 100755
--- a/artic/pipeline.py
+++ b/artic/pipeline.py
@@ -102,7 +102,7 @@ def init_pipeline_parser():
         "--min-depth",
         type=int,
         default=20,
-        help="Minimum depth required to call a variant (default: %(default)d)",
+        help="Minimum coverage required for a position to be included in the consensus sequence (default: %(default)d)",
     )
     parser_minion.add_argument(
         "--threads",
diff --git a/setup.cfg b/setup.cfg
index 5022515c..2f2a1c61 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = artic
-version = 1.6.0
+version = 1.6.1
 author = Nick Loman
 author_email = n.j.loman@bham.ac.uk
 maintainer = Sam Wilkinson