16
16
consumesQuery = [True , True , False , False , True , False , False , True ]
17
17
18
18
19
- def find_primer (bed , pos , direction , threshold = 20 ):
19
+ def find_primer (bed , pos , direction , chrom , threshold = 35 ):
20
20
"""Given a reference position and a direction of travel, walk out and find the nearest primer site.
21
21
22
22
Parameters
@@ -39,14 +39,14 @@ def find_primer(bed, pos, direction, threshold=20):
39
39
primer_distances = [
40
40
(abs (p ["start" ] - pos ), p ["start" ] - pos , p )
41
41
for p in bed
42
- if (p ["direction" ] == direction ) and (pos >= (p ["start" ] - threshold ))
42
+ if (p ["direction" ] == direction ) and (pos >= (p ["start" ] - threshold )) and chrom == p [ "chrom" ]
43
43
]
44
44
45
45
else :
46
46
primer_distances = [
47
47
(abs (p ["end" ] - pos ), p ["end" ] - pos , p )
48
48
for p in bed
49
- if (p ["direction" ] == direction ) and (pos <= (p ["end" ] + threshold ))
49
+ if (p ["direction" ] == direction ) and (pos <= (p ["end" ] + threshold )) and chrom == p [ "chrom" ]
50
50
]
51
51
52
52
if not primer_distances :
@@ -205,8 +205,10 @@ def handle_segment(
205
205
return False
206
206
207
207
# locate the nearest primers to this alignment segment
208
- p1 = find_primer (bed , segment .reference_start , "+" , args .primer_match_threshold )
209
- p2 = find_primer (bed , segment .reference_end , "-" , args .primer_match_threshold )
208
+ # p1 = find_primer(bed, segment.reference_start, "+", segment.reference_name, args.primer_match_threshold)
209
+ p1 = find_primer (bed = bed , pos = segment .reference_start , direction = "+" , chrom = segment .reference_name , threshold = args .primer_match_threshold )
210
+ # p2 = find_primer(bed, segment.reference_end, "-", segment.reference_name, args.primer_match_threshold)
211
+ p2 = find_primer (bed = bed , pos = segment .reference_end , direction = "-" , chrom = segment .reference_name , threshold = args .primer_match_threshold )
210
212
211
213
if not p1 or not p2 :
212
214
if args .verbose :
@@ -235,6 +237,7 @@ def handle_segment(
235
237
if args .report :
236
238
# update the report with this alignment segment + primer details
237
239
report = {
240
+ "chrom" : segment .reference_name ,
238
241
"QueryName" : segment .query_name ,
239
242
"ReferenceStart" : segment .reference_start ,
240
243
"ReferenceEnd" : segment .reference_end ,
@@ -342,32 +345,33 @@ def generate_amplicons(bed: list):
342
345
343
346
amplicon = primer ["Primer_ID" ].split ("_" )[1 ]
344
347
345
- amplicons .setdefault (amplicon , {})
348
+ amplicons .setdefault (primer ["chrom" ], {})
349
+ amplicons [primer ["chrom" ]].setdefault (amplicon , {})
346
350
347
351
if primer ["direction" ] == "+" :
348
- amplicons [amplicon ]["p_start" ] = primer ["start" ]
349
- amplicons [amplicon ]["start" ] = primer ["end" ] + 1
352
+ amplicons [primer [ "chrom" ]][ amplicon ]["p_start" ] = primer ["start" ]
353
+ amplicons [primer [ "chrom" ]][ amplicon ]["start" ] = primer ["end" ] + 1
350
354
351
355
elif primer ["direction" ] == "-" :
352
- amplicons [amplicon ]["p_end" ] = primer ["end" ]
353
- amplicons [amplicon ]["end" ] = primer ["start" ] - 1
356
+ amplicons [primer [ "chrom" ]][ amplicon ]["p_end" ] = primer ["end" ]
357
+ amplicons [primer [ "chrom" ]][ amplicon ]["end" ] = primer ["start" ] - 1
354
358
355
359
else :
356
360
raise ValueError ("Primer direction not recognised" )
361
+ for chrom , amplicons_dict in amplicons .items ():
362
+ for amplicon in amplicons_dict :
363
+ if not all ([x in amplicons_dict [amplicon ] for x in ["p_start" , "p_end" ]]):
364
+ raise ValueError (f"Primer scheme for amplicon { amplicon } for reference { chrom } is incomplete" )
365
+
366
+ # Check if primer runs accross reference start / end -> circular virus
367
+ amplicons_dict [amplicon ]["circular" ] = (
368
+ amplicons_dict [amplicon ]["p_start" ] > amplicons_dict [amplicon ]["p_end" ]
369
+ )
357
370
358
- for amplicon in amplicons :
359
- if not all ([x in amplicons [amplicon ] for x in ["p_start" , "p_end" ]]):
360
- raise ValueError (f"Primer scheme for amplicon { amplicon } is incomplete" )
361
-
362
- # Check if primer runs accross reference start / end -> circular virus
363
- amplicons [amplicon ]["circular" ] = (
364
- amplicons [amplicon ]["p_start" ] > amplicons [amplicon ]["p_end" ]
365
- )
366
-
367
- # Calculate amplicon length considering that the "length" may be negative if the genome is circular
368
- amplicons [amplicon ]["length" ] = abs (
369
- amplicons [amplicon ]["p_end" ] - amplicons [amplicon ]["p_start" ]
370
- )
371
+ # Calculate amplicon length considering that the "length" may be negative if the genome is circular
372
+ amplicons_dict [amplicon ]["length" ] = abs (
373
+ amplicons_dict [amplicon ]["p_end" ] - amplicons_dict [amplicon ]["p_start" ]
374
+ )
371
375
372
376
return amplicons
373
377
@@ -392,51 +396,53 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool =
392
396
393
397
output_segments = []
394
398
395
- mean_depths = {x : 0 for x in amplicons }
399
+ # mean_depths = {x: {} for x in amplicons}
400
+ mean_depths = {}
396
401
397
- for amplicon , segments in trimmed_segments .items ():
398
- if amplicon not in amplicons :
399
- raise ValueError (f"Segment { amplicon } not found in primer scheme file" )
400
-
401
- desired_depth = np .full_like (
402
- (amplicons [amplicon ]["length" ],), normalise , dtype = int
403
- )
402
+ for chrom , amplicon_dict in trimmed_segments .items ():
403
+ for amplicon , segments in amplicon_dict .items ():
404
+ if amplicon not in amplicons [chrom ]:
405
+ raise ValueError (f"Segment { amplicon } not found in primer scheme file" )
404
406
405
- amplicon_depth = np .zeros ((amplicons [amplicon ]["length" ],), dtype = int )
407
+ desired_depth = np .full_like (
408
+ (amplicons [chrom ][amplicon ]["length" ],), normalise , dtype = int
409
+ )
406
410
407
- if not segments :
408
- if verbose :
409
- print (
410
- f"No segments assigned to amplicon { amplicon } , skipping" ,
411
- file = sys .stderr ,
412
- )
413
- continue
411
+ amplicon_depth = np .zeros ((amplicons [chrom ][amplicon ]["length" ],), dtype = int )
414
412
415
- random .shuffle (segments )
413
+ if not segments :
414
+ if verbose :
415
+ print (
416
+ f"No segments assigned to amplicon { amplicon } , skipping" ,
417
+ file = sys .stderr ,
418
+ )
419
+ continue
416
420
417
- distance = np . mean ( np . abs ( amplicon_depth - desired_depth ) )
421
+ random . shuffle ( segments )
418
422
419
- for segment in segments :
420
- test_depths = np .copy (amplicon_depth )
423
+ distance = np .mean (np .abs (amplicon_depth - desired_depth ))
421
424
422
- relative_start = segment .reference_start - amplicons [amplicon ]["p_start" ]
425
+ for segment in segments :
426
+ test_depths = np .copy (amplicon_depth )
423
427
424
- if relative_start < 0 :
425
- relative_start = 0
428
+ relative_start = segment .reference_start - amplicons [chrom ][amplicon ]["p_start" ]
426
429
427
- relative_end = segment .reference_end - amplicons [amplicon ]["p_start" ]
430
+ if relative_start < 0 :
431
+ relative_start = 0
428
432
429
- test_depths [ relative_start : relative_end ] += 1
433
+ relative_end = segment . reference_end - amplicons [ chrom ][ amplicon ][ "p_start" ]
430
434
431
- test_distance = np . mean ( np . abs ( test_depths - desired_depth ))
435
+ test_depths [ relative_start : relative_end ] += 1
432
436
433
- if test_distance < distance :
434
- amplicon_depth = test_depths
435
- distance = test_distance
436
- output_segments .append (segment )
437
+ test_distance = np .mean (np .abs (test_depths - desired_depth ))
437
438
438
- mean_depths [amplicon ] = np .mean (amplicon_depth )
439
+ if test_distance < distance :
440
+ amplicon_depth = test_depths
441
+ distance = test_distance
442
+ output_segments .append (segment )
439
443
444
+ mean_depths [(chrom , amplicon )] = np .mean (amplicon_depth )
445
+
440
446
return output_segments , mean_depths
441
447
442
448
@@ -449,6 +455,7 @@ def go(args):
449
455
if args .report :
450
456
reportfh = open (args .report , "w" )
451
457
report_headers = [
458
+ "chrom" ,
452
459
"QueryName" ,
453
460
"ReferenceStart" ,
454
461
"ReferenceEnd" ,
@@ -469,6 +476,7 @@ def go(args):
469
476
# open the primer scheme and get the pools
470
477
bed = read_bed_file (args .bedfile )
471
478
pools = set ([row ["PoolName" ] for row in bed ])
479
+ chroms = set ([row ["chrom" ] for row in bed ])
472
480
pools .add ("unmatched" )
473
481
474
482
# open the input SAM file and process read groups
@@ -484,7 +492,7 @@ def go(args):
484
492
# prepare the alignment outfile
485
493
outfile = pysam .AlignmentFile ("-" , "wh" , header = bam_header )
486
494
487
- trimmed_segments = {}
495
+ trimmed_segments = {x : {} for x in chroms }
488
496
489
497
# iterate over the alignment segments in the input SAM file
490
498
for segment in infile :
@@ -508,10 +516,10 @@ def go(args):
508
516
509
517
# unpack the trimming tuple since segment passed trimming
510
518
amplicon , trimmed_segment = trimming_tuple
511
- trimmed_segments .setdefault (amplicon , [])
519
+ trimmed_segments [ trimmed_segment . reference_name ] .setdefault (amplicon , [])
512
520
513
521
if trimmed_segment :
514
- trimmed_segments [amplicon ].append (trimmed_segment )
522
+ trimmed_segments [trimmed_segment . reference_name ][ amplicon ].append (trimmed_segment )
515
523
516
524
# normalise if requested
517
525
if args .normalise :
@@ -522,9 +530,9 @@ def go(args):
522
530
# write mean amplicon depths to file
523
531
if args .amp_depth_report :
524
532
with open (args .amp_depth_report , "w" ) as amp_depth_report_fh :
525
- amp_depth_report_fh .write ("amplicon \t mean_depth\n " )
526
- for amplicon , depth in mean_amp_depths .items ():
527
- amp_depth_report_fh .write (f"{ amplicon } \t { depth } \n " )
533
+ amp_depth_report_fh .write ("chrom \t amplicon \t mean_depth\n " )
534
+ for ( chrom , amplicon ) , depth in mean_amp_depths .items ():
535
+ amp_depth_report_fh .write (f"{ chrom } \t { amplicon } \t { depth } \n " )
528
536
529
537
for output_segment in output_segments :
530
538
outfile .write (output_segment )
@@ -554,7 +562,7 @@ def main():
554
562
parser .add_argument (
555
563
"--primer-match-threshold" ,
556
564
type = int ,
557
- default = 5 ,
565
+ default = 35 ,
558
566
help = "Fuzzy match primer positions within this threshold" ,
559
567
)
560
568
parser .add_argument ("--report" , type = str , help = "Output report to file" )
0 commit comments