Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into cv_docs_upgrade
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Oct 4, 2024
2 parents d504f30 + a132a52 commit c0e418f
Show file tree
Hide file tree
Showing 16 changed files with 191 additions and 85 deletions.
9 changes: 0 additions & 9 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,6 @@ jobs:
matrix:
PYTHON_VERSION: ["3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Checkout fulcrumgenomics/bwa
uses: actions/checkout@v4
with:
repository: fulcrumgenomics/bwa
ref: interactive_aln
path: bwa
fetch-depth: 0

- name: Set up Python ${{ matrix.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ jobs:
python-version: ${{ matrix.python }}

- name: Build wheels
run: pip wheel -w wheelhouse .
run: pip wheel --no-deps -w wheelhouse .

- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: prymer-wheels-${{ matrix.python }}
path: ./wheelhouse/*.whl
path: ./wheelhouse/prymer*.whl
if-no-files-found: error
2 changes: 1 addition & 1 deletion prymer/api/picking.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
from prymer.api.primer_pair import PrimerPair
from prymer.api.span import Span
from prymer.ntthal import NtThermoAlign
from prymer.offtarget.offtarget_detector import OffTargetDetector
from prymer.offtarget import OffTargetDetector


@dataclass(frozen=True, init=True, slots=True)
Expand Down
2 changes: 2 additions & 0 deletions prymer/offtarget/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from prymer.offtarget.bwa import BwaHit
from prymer.offtarget.bwa import BwaResult
from prymer.offtarget.bwa import Query
from prymer.offtarget.offtarget_detector import OffTargetDetector
from prymer.offtarget.offtarget_detector import OffTargetResult

__all__ = [
Expand All @@ -10,4 +11,5 @@
"BwaHit",
"BwaResult",
"OffTargetResult",
"OffTargetDetector",
]
15 changes: 6 additions & 9 deletions prymer/offtarget/bwa.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
>>> bwa.map_all(queries=[query])
[BwaResult(query=Query(id='NA', bases='AAAAAA'), hit_count=3968, hits=[])]
>>> bwa.close()
True
```
""" # noqa: E501
Expand Down Expand Up @@ -294,15 +295,11 @@ def __init__(

super().__init__(command=command)

# Send a sentinel record to be aligned so we know when bwa is done outputting a header.
self._subprocess.stdin.write(Query(id="ignore", bases="A").to_fastq())
self.__signal_bwa() # forces the input to be sent to the underlying process.

header = []
for line in self._subprocess.stdout:
if line.startswith("@"):
header.append(line)
if line.startswith("ignore"):
if line.startswith("@PG"):
break

self.header = AlignmentHeader.from_text("".join(header))
Expand Down Expand Up @@ -341,13 +338,16 @@ def map_all(self, queries: list[Query]) -> list[BwaResult]:
for query in queries:
fastq_str = query.to_fastq(reverse_complement=self.reverse_complement)
self._subprocess.stdin.write(fastq_str)
self.__signal_bwa() # forces the input to be sent to the underlying process.

# Force the input to be sent to the underlying process.
self.__signal_bwa()

# Read back the results
results: list[BwaResult] = []
for query in queries:
# get the next alignment and convert to a result
line: str = next(self._subprocess.stdout).strip()
assert not line.startswith("@"), f"SAM record must not start with '@'! {line}"
alignment = AlignedSegment.fromstring(line, self.header)
results.append(self._to_result(query=query, rec=alignment))

Expand Down Expand Up @@ -421,6 +421,3 @@ def to_hits(self, rec: AlignedSegment) -> list[BwaHit]:
hits = [hit for hit in hits if not hit.refname.endswith("_alt")]

return hits

def close(self) -> None:
super().close()
135 changes: 113 additions & 22 deletions prymer/offtarget/offtarget_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,14 @@ class OffTargetResult:
"""Information obtained by running a single primer pair through the off-target detector.
Attributes:
primer_pair: the primer submitted
passes: True if the primer pair passes all checks, False otherwise
primer_pair: the primer pair submitted
passes: True if neither primer exceeds the specified maximum number of hits, and the primer
pair would generate an acceptable number of amplicons in the reference genome (i.e.
`min_primer_pair_hits <= num_amplicons <= max_primer_pair_hits`). False otherwise.
cached: True if this result is part of a cache, False otherwise. This is useful for testing
spans: the set of mappings of the primer pair to the genome or an empty list if mappings
were not retained
spans: The set of mappings of the primer pair to the genome (i.e., the region spanned by the
inferred amplicon). This list will be empty if the generating [[OffTargetDetector]] was
constructed with `keep_spans=False`.
left_primer_spans: the list of mappings for the left primer, independent of the pair
mappings, or an empty list
right_primer_spans: the list of mappings for the right primer, independent of the pair
Expand All @@ -126,12 +129,25 @@ class OffTargetDetector(AbstractContextManager):
"""A class for detecting off-target mappings of primers and primer pairs that uses a custom
version of "bwa aln" named "bwa-aln-interactive".
The off-target detection is faster and more sensitive than traditional isPCR and in addition can
correctly detect primers that are repetitive and contain many thousands or millions of mappings
to the genome.
`OffTargetDetector` uses a [custom, interactive
version](https://github.com/fulcrumgenomics/bwa-aln-interactive/) of `bwa aln` to perform
off-target detection. This approach is faster and more sensitive than traditional isPCR and in
addition can correctly detect primers that are repetitive and contain many thousands or millions
of mappings to the genome.
Note that while this class invokes BWA with multiple threads, it is not itself thread-safe.
Only one thread at a time should invoke methods on this class without external synchronization.
Off-target detection may be performed for individual primers and/or primer pairs.
To detect off-target hits for individual primers, use the `OffTargetDetector.filters()` method,
which will remove any primers that have more than the specified number of maximum hits against
the reference.
To detect off-target amplification of primer pairs, use the `OffTargetDetector.check_one()` or
`OffTargetDetector.check_all()` methods. These methods screen the individual primers in each
pair for off-target hits, and verify that the possible amplicons inferred by the primers'
alignments does not exceed the specified maximum number of primer pair hits.
"""

def __init__(
Expand All @@ -143,20 +159,40 @@ def __init__(
max_mismatches_in_three_prime_region: int,
max_mismatches: int,
max_amplicon_size: int,
min_primer_pair_hits: int = 1,
cache_results: bool = True,
threads: Optional[int] = None,
keep_spans: bool = True,
keep_primer_spans: bool = True,
executable: str | Path = "bwa-aln-interactive",
) -> None:
"""
Initialize an [[OffTargetDetector]].
This class accepts a large number of parameters to control the behavior of the off-target
detection. The parameters are used in the various aspects of off-target detection as
follows:
1. Alignment (off-target hit detection): `ref`, `executable`, `threads`,
`three_prime_region_length`, `max_mismatches_in_three_prime_region`, `max_mismatches`,
and `max_primer_hits`.
2. Filtering of individual primers: `max_primer_hits`.
3. Checking of primer pairs: `max_primer_hits`, `min_primer_pair_hits`,
`max_primer_pair_hits`, and `max_amplicon_size`.
Args:
ref: the reference genome fasta file (must be indexed with BWA)
max_primer_hits: the maximum number of hits an individual primer can have in the genome
before it is considered an invalid primer, and all primer pairs containing the
primer failed.
max_primer_pair_hits: the maximum number of amplicons a primer pair can make and be
considered passing
min_primer_pair_hits: The minimum number of amplicons a primer pair can make and be
considered passing. (In most cases, this is the number of amplicons a primer pair is
expected to generate.) The default is 1, which is appropriate when the primer pair
is being evaluated for off-target hits against the same reference genome from which
the primers were generated. If the primer pair was generated from a different
reference sequence, it may be appropriate to set this value to 0.
three_prime_region_length: the number of bases at the 3' end of the primer in which the
parameter max_mismatches_in_three_prime_region is evaluated
max_mismatches_in_three_prime_region: the maximum number of mismatches that are
Expand All @@ -167,10 +203,10 @@ def __init__(
max_amplicon_size: the maximum amplicon size to consider amplifiable
cache_results: if True, cache results for faster re-querying
threads: the number of threads to use when invoking bwa
keep_spans: if True, [[OffTargetResult]] objects will have amplicon spans
keep_spans: if True, [[OffTargetResult]] objects will be reported with amplicon spans
populated, otherwise not
keep_primer_spans: if True, [[OffTargetResult]] objects will have left and right
primer spans
keep_primer_spans: if True, [[OffTargetResult]] objects will be reported with left and
right primer spans
executable: string or Path representation of the `bwa` executable path
"""
self._primer_cache: dict[str, BwaResult] = {}
Expand All @@ -187,31 +223,68 @@ def __init__(
)
self._max_primer_hits = max_primer_hits
self._max_primer_pair_hits: int = max_primer_pair_hits
self._min_primer_pair_hits: int = min_primer_pair_hits
self._max_amplicon_size: int = max_amplicon_size
self._cache_results: bool = cache_results
self._keep_spans: bool = keep_spans
self._keep_primer_spans: bool = keep_primer_spans

def filter(self, primers: list[PrimerType]) -> list[PrimerType]:
"""Filters an iterable of Primers to return only those that have less than
`max_primer_hits` mappings to the genome."""
"""
Remove primers that have more than `max_primer_hits` mappings to the genome.
This method maps each primer to the specified reference with `bwa aln` to search for
off-target hits. Note that when the reference includes the sequence from which the primers
were designed, the on-target hit will be included in the hit count. `max_primer_hits` should
be set to at least 1 in this case.
Args:
primers: A list of primers to filter.
Returns:
The input primers, filtered to remove any primers with more than `max_primer_hits`
mappings to the genome.
"""
results: dict[str, BwaResult] = self.mappings_of(primers)
return [
primer for primer in primers if results[primer.bases].hit_count <= self._max_primer_hits
]

def check_one(self, primer_pair: PrimerPair) -> OffTargetResult:
"""Checks a PrimerPair for off-target sites in the genome at which it might amplify."""
"""
Check an individual primer pair for off-target amplification.
See `OffTargetDetector.check_all()` for details.
Args:
primer_pair: The primer pair to check.
Returns:
The results of the off-target detection.
See `OffTargetResult` for details regarding the attributes included in the result.
"""
result: dict[PrimerPair, OffTargetResult] = self.check_all([primer_pair])
return result[primer_pair]

def check_all(self, primer_pairs: list[PrimerPair]) -> dict[PrimerPair, OffTargetResult]:
"""Checks a collection of primer pairs for off-target sites, returning a dictionary of
`PrimerPair`s to `OffTargetResult`.
"""
Check a collection of primer pairs for off-target amplification.
This method maps each primer to the specified reference with `bwa aln` to search for
off-target hits. Possible amplicons are identified from the pairwise combinations of these
alignments, up to the specified `max_amplicon_size`.
Primer pairs are marked as passing if both of the following are true:
1. Each primer has no more than `max_primer_hits` alignments to the genome.
2. The size of the set of possible amplicons does not exceed `max_primer_pair_hits`, and is
at least `min_primer_pair_hits`.
Args:
primer_pairs: The primer pairs to check.
Returns:
a Map containing all given primer pairs as keys with the values being the result of
off-target checking.
A dictionary mapping each checked primer pair to its off-target detection results.
See `OffTargetResult` for details regarding the attributes included in each result.
"""

primer_pair_results: dict[PrimerPair, OffTargetResult] = {}
Expand Down Expand Up @@ -271,7 +344,7 @@ def _build_off_target_result(
amplicons = self._to_amplicons(p1.hits, p2.hits, self._max_amplicon_size)
result = OffTargetResult(
primer_pair=primer_pair,
passes=0 < len(amplicons) <= self._max_primer_pair_hits,
passes=self._min_primer_pair_hits <= len(amplicons) <= self._max_primer_pair_hits,
spans=amplicons if self._keep_spans else [],
left_primer_spans=(
[self._hit_to_span(h) for h in p1.hits] if self._keep_primer_spans else []
Expand All @@ -287,10 +360,27 @@ def _build_off_target_result(
return result

def mappings_of(self, primers: list[PrimerType]) -> dict[str, BwaResult]:
"""Function to take a set of primers and map any that are not cached, and return mappings
for all of them. Note: the genomics sequence of the returned mappings are on the opposite
strand of that of the strand of the primer. I.e. we map the complementary bases (reversed)
to that of the primer."""
"""
Map primers to the reference genome using `bwa aln`.
Alignment results may be optionally cached for efficiency. Set `cache_results` to `True`
when instantiating the [[OffTargetDetector]] to enable caching.
Any primers without cached results, or all primers when `cache_results` is `False`, will be
mapped to the reference genome using `bwa aln` and the specified alignment parameters.
**Note**: The reverse complement of each primer sequence is used for mapping. The `query`
sequence in each `BwaResult` will match the input primer sequence, as will the sequences
used as keys in the output dictionary. However, the coordinates reported in each `BwaHit`
associated with a result will correspond to complementary sequence.
Args:
primers: A list of primers to map.
Returns:
A dictionary mapping each primer's sequence to its alignment results.
See `BwaResult` for details regarding the attributes included in each result.
"""

primers_to_map: list[PrimerType]
if not self._cache_results:
Expand Down Expand Up @@ -353,4 +443,5 @@ def __exit__(
traceback: Optional[TracebackType],
) -> None:
"""Gracefully terminates any running subprocesses."""
super().__exit__(exc_type, exc_value, traceback)
self.close()
4 changes: 4 additions & 0 deletions prymer/primer3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from prymer.primer3.primer3_failure_reason import Primer3FailureReason
from prymer.primer3.primer3_input import Primer3Input
from prymer.primer3.primer3_input_tag import Primer3InputTag
from prymer.primer3.primer3_parameters import Primer3Parameters
from prymer.primer3.primer3_task import DesignLeftPrimersTask
from prymer.primer3.primer3_task import DesignPrimerPairsTask
from prymer.primer3.primer3_task import DesignRightPrimersTask
from prymer.primer3.primer3_weights import Primer3Weights

__all__ = [
"Primer3",
Expand All @@ -18,4 +20,6 @@
"DesignLeftPrimersTask",
"DesignPrimerPairsTask",
"DesignRightPrimersTask",
"Primer3Parameters",
"Primer3Weights",
]
2 changes: 1 addition & 1 deletion prymer/primer3/primer3.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
parameters and target region.
```python
>>> from prymer.primer3.primer3_parameters import Primer3Parameters
>>> from prymer.primer3 import Primer3Parameters
>>> from prymer.api import MinOptMax
>>> target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE)
>>> params = Primer3Parameters( \
Expand Down
4 changes: 3 additions & 1 deletion prymer/util/executable_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import os
import shutil
import subprocess
from contextlib import AbstractContextManager
from pathlib import Path
from types import TracebackType
from typing import Optional
from typing import Self


class ExecutableRunner:
class ExecutableRunner(AbstractContextManager):
"""
Base class for interaction with subprocess for all command-line tools. The base class supports
use of the context management protocol and performs basic validation of executable paths.
Expand Down Expand Up @@ -67,6 +68,7 @@ def __exit__(
traceback: Optional[TracebackType],
) -> None:
"""Gracefully terminates any running subprocesses."""
super().__exit__(exc_type, exc_value, traceback)
self.close()

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "prymer"
version = "2.0.1-dev"
version = "2.2.0"
description = "Python primer design library"
readme = "README.md"
authors = [
Expand Down
Loading

0 comments on commit c0e418f

Please sign in to comment.