Skip to content

Commit

Permalink
fix: Pad target region by max_amplicon_length (#23)
Browse files Browse the repository at this point in the history
Closes #16 
Closes #17 

This PR fixes the padding of the target region to be consistent with the
original `fgprimer` implementation. (My bad 🙃 )


https://github.com/fulcrumgenomics/fgprimer/blob/6cf2542e927ced37dd0dce4c335de8dff07789c7/src/main/scala/com/fulcrumgenomics/primerdesign/primer3/Primer3.scala#L89-L93

Other changes
- Validate that the max amplicon length permits sufficient space around
the target region for primers to be designed
- Renamed `region` to `design_region`, to better reflect the
target/design region distinction
- Renamed `_pad_target_region` to `_create_design_region`, for the same
reason, and clarified its documentation
- Added `min_primer_length` property to `Primer3Parameters`
(`max_primer_length` already existed)
  • Loading branch information
msto authored Sep 19, 2024
1 parent 45c3b42 commit dd844f5
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 31 deletions.
74 changes: 53 additions & 21 deletions prymer/primer3/primer3.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@
```python
>>> for failure in left_result.failures: \
print(failure)
Primer3Failure(reason=<Primer3FailureReason.HIGH_TM: 'high tm'>, count=171)
Primer3Failure(reason=<Primer3FailureReason.GC_CONTENT: 'GC content failed'>, count=26)
Primer3Failure(reason=<Primer3FailureReason.HIGH_TM: 'high tm'>, count=406)
Primer3Failure(reason=<Primer3FailureReason.GC_CONTENT: 'GC content failed'>, count=91)
```
Expand Down Expand Up @@ -372,12 +372,13 @@ def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa:
f"terminated, return code {self._subprocess.returncode}"
)

region: Span = self._pad_target_region(
target=design_input.target,
design_region: Span = self._create_design_region(
target_region=design_input.target,
max_amplicon_length=design_input.params.max_amplicon_length,
min_primer_length=design_input.params.min_primer_length,
)

soft_masked, hard_masked = self.get_design_sequences(region)
soft_masked, hard_masked = self.get_design_sequences(design_region)
global_primer3_params = {
Primer3InputTag.PRIMER_FIRST_BASE_INDEX: 1,
Primer3InputTag.PRIMER_EXPLAIN_FLAG: 1,
Expand All @@ -386,7 +387,7 @@ def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa:

assembled_primer3_tags = {
**global_primer3_params,
**design_input.to_input_tags(design_region=region),
**design_input.to_input_tags(design_region=design_region),
}

# Submit inputs to primer3
Expand Down Expand Up @@ -444,7 +445,7 @@ def primer3_error(message: str) -> None:
all_pair_results: list[PrimerPair] = Primer3._build_primer_pairs(
design_input=design_input,
design_results=primer3_results,
design_region=region,
design_region=design_region,
unmasked_design_seq=soft_masked,
)
return Primer3._assemble_primer_pairs(
Expand All @@ -457,7 +458,7 @@ def primer3_error(message: str) -> None:
all_single_results = Primer3._build_primers(
design_input=design_input,
design_results=primer3_results,
design_region=region,
design_region=design_region,
design_task=design_input.task,
unmasked_design_seq=soft_masked,
)
Expand Down Expand Up @@ -710,21 +711,52 @@ def _build_failures(
by_fail_count[Primer3FailureReason.LONG_DINUC] = num_dinuc_failures
return [Primer3Failure(reason, count) for reason, count in by_fail_count.most_common()]

def _pad_target_region(self, target: Span, max_amplicon_length: int) -> Span:
def _create_design_region(
self,
target_region: Span,
max_amplicon_length: int,
min_primer_length: int,
) -> Span:
"""
If the target region is smaller than the max amplicon length, pad to fit.
Construct a design region surrounding the target region.
The target region is padded on both sides by the maximum amplicon length, minus the length
of the target region itself.
If the target region cannot be padded by at least the minimum primer length on both sides,
a `ValueError` is raised.
Raises:
ValueError: If the target region is too large to be padded.
When the max amplicon length is odd, the left side of the target region will be padded with
one more base than the right side.
"""
contig_length: int = self._dict[target.refname].length
padding_right: int = max(0, int((max_amplicon_length - target.length) / 2))
padding_left: int = max(0, max_amplicon_length - target.length - padding_right)

region: Span = replace(
target,
start=max(1, target.start - padding_left),
end=min(target.end + padding_right, contig_length),
# Pad the target region on both sides by the maximum amplicon length (minus the length of
# the target). This ensures that the design region covers the complete window of potentially
# valid primer pairs.
padding: int = max_amplicon_length - target_region.length

# Apply the padding, ensuring that we don't run out-of-bounds on the target contig.
contig_length: int = self._dict[target_region.refname].length
design_start: int = max(1, target_region.start - padding)
design_end: int = min(target_region.end + padding, contig_length)

# Validate that our design window includes sufficient space for a primer to be designed on
# each side of the target region.
left_design_window: int = target_region.start - design_start
right_design_window: int = design_end - target_region.end
if left_design_window < min_primer_length or right_design_window < min_primer_length:
raise ValueError(
f"Target region {target_region} exceeds the maximum size compatible with a "
f"maximum amplicon length of {max_amplicon_length} and a minimum primer length of "
f"{min_primer_length}. The maximum amplicon length should exceed the length of "
"the target region by at least twice the minimum primer length."
)

# Return the validated design region.
design_region: Span = replace(
target_region,
start=design_start,
end=design_end,
)

return region
return design_region
5 changes: 5 additions & 0 deletions prymer/primer3/primer3_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,8 @@ def max_amplicon_length(self) -> int:
def max_primer_length(self) -> int:
"""Max primer length"""
return int(self.primer_sizes.max)

@property
def min_primer_length(self) -> int:
"""Minimum primer length."""
return int(self.primer_sizes.min)
45 changes: 35 additions & 10 deletions tests/primer3/test_primer3.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,23 +558,48 @@ def test_primer3_result_as_primer_pair_result_exception(


@pytest.mark.parametrize("max_amplicon_length", [100, 101])
def test_pad_target_region(max_amplicon_length: int, genome_ref: Path) -> None:
def test_create_design_region(max_amplicon_length: int, genome_ref: Path) -> None:
"""If the target region is shorter than the max amplicon length, it should be padded to fit."""
target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE)
target_region = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE)

with Primer3(genome_fasta=genome_ref) as designer:
padded_region: Span = designer._pad_target_region(
target=target, max_amplicon_length=max_amplicon_length
design_region: Span = designer._create_design_region(
target_region=target_region,
max_amplicon_length=max_amplicon_length,
min_primer_length=10,
)

assert padded_region.length == max_amplicon_length
assert design_region.length == 2 * max_amplicon_length - target_region.length


def test_pad_target_region_doesnt_pad(genome_ref: Path) -> None:
"""If the target region is larger than the max amplicon length, no padding should occur."""
target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE)
def test_create_design_region_raises_when_target_region_exceeds_max_amplicon_length(
genome_ref: Path,
) -> None:
"""
`_create_design_region()` should raise a ValueError when the target region is larger than the
max amplicon length.
"""
target_region = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE)

with Primer3(genome_fasta=genome_ref) as designer:
padded_region: Span = designer._pad_target_region(target=target, max_amplicon_length=10)
with pytest.raises(ValueError, match="exceeds the maximum size"):
designer._create_design_region(
target_region=target_region, max_amplicon_length=10, min_primer_length=10
)


assert padded_region == target
def test_create_design_region_raises_when_primers_would_not_fit_in_design_region(
genome_ref: Path,
) -> None:
"""
`_create_design_region()` should raise a ValueError when the design region does not include
sufficient space flanking the target for a primer to be designed. (i.e. when this space is less
than the specified minimum primer length.)
"""
target_region = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE)

with Primer3(genome_fasta=genome_ref) as designer:
with pytest.raises(ValueError, match="exceeds the maximum size"):
designer._create_design_region(
target_region=target_region, max_amplicon_length=55, min_primer_length=10
)

0 comments on commit dd844f5

Please sign in to comment.