Skip to content

Commit

Permalink
feat: remove pysam and bwa warmup hack
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Oct 4, 2024
1 parent c434b1d commit be29391
Showing 1 changed file with 21 additions and 20 deletions.
41 changes: 21 additions & 20 deletions prymer/offtarget/bwa.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@
from typing import cast

import pysam
from fgpyo import sam
from fgpyo import sequence
from fgpyo.sam import Cigar
from pysam import AlignedSegment
from pysam import AlignmentHeader

from prymer.api import coordmath
from prymer.util.executable_runner import ExecutableRunner
Expand Down Expand Up @@ -201,6 +201,7 @@ class BwaAlnInteractive(ExecutableRunner):
reverse_complement: reverse complement each query sequence before alignment.
include_alt_hits: if True include hits to references with names ending in _alt, otherwise
do not include them.
header: the SAM alignment header.
"""

def __init__(
Expand Down Expand Up @@ -284,20 +285,20 @@ def __init__(

super().__init__(command=command)

# HACK ALERT
# This is a hack. By trial and error, pysam.AlignmentFile() will block reading unless
# there's at least a few records due to htslib wanting to read a few records for format
# auto-detection. Lame. So a hundred queries are sent to the aligner to align enable the
# htslib auto-detection to complete, and for us to be able to read using pysam.
num_warmup: int = 100
for i in range(num_warmup):
query = Query(id=f"ignoreme:{i}", bases="A" * 100)
fastq_str = query.to_fastq(reverse_complement=self.reverse_complement)
self._subprocess.stdin.write(fastq_str)
# Send a sentinel record to be aligned so we know when bwa is done outputting a header.
self._subprocess.stdin.write(Query(id="ignore-me", bases="A").to_fastq())

self.__signal_bwa() # forces the input to be sent to the underlying process.
self._reader = sam.reader(path=self._subprocess.stdout, file_type=sam.SamFileType.SAM)
for _ in range(num_warmup):
next(self._reader)

header = []
for line in self._subprocess.stdout:
if line.startswith("@"):
header.append(line)
if line.startswith("ignore-me"):
break

self.header = AlignmentHeader.from_text("".join(header))


def __signal_bwa(self) -> None:
"""Signals BWA to process the queries"""
Expand Down Expand Up @@ -334,13 +335,17 @@ def map_all(self, queries: list[Query]) -> list[BwaResult]:
for query in queries:
fastq_str = query.to_fastq(reverse_complement=self.reverse_complement)
self._subprocess.stdin.write(fastq_str)
self.__signal_bwa() # forces the input to be sent to the underlying process.

# Force the input to be sent to the underlying process.
self.__signal_bwa()

# Read back the results
results: list[BwaResult] = []
for query in queries:
# get the next alignment and convert to a result
results.append(self._to_result(query=query, rec=next(self._reader)))
line: str = next(self._subprocess.stdout).strip()
alignment = AlignedSegment.fromstring(line, self.header)
results.append(self._to_result(query=query, rec=alignment))

return results

Expand Down Expand Up @@ -412,7 +417,3 @@ def to_hits(self, rec: AlignedSegment) -> list[BwaHit]:
hits = [hit for hit in hits if not hit.refname.endswith("_alt")]

return hits

def close(self) -> None:
self._reader.close()
super().close()

0 comments on commit be29391

Please sign in to comment.