Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add sus scrofa #1312 #1672

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions stdpopsim/catalog/SusScr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
Catalog definitions for SusScr (Ensembl ID='sus_scrofa')
"""

from . import species # noqa: F401
30 changes: 30 additions & 0 deletions stdpopsim/catalog/SusScr/genome_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# File autogenerated from Ensembl REST API. Do not edit.
data = {
"assembly_accession": "GCA_000003025.6",
"assembly_name": "Sscrofa11.1",
"chromosomes": {
"1": {"length": 274330532, "synonyms": ["chr1"]},
"2": {"length": 151935994, "synonyms": ["chr2"]},
"3": {"length": 132848913, "synonyms": ["chr3"]},
"4": {"length": 130910915, "synonyms": ["chr4"]},
"5": {"length": 104526007, "synonyms": ["chr5"]},
"6": {"length": 170843587, "synonyms": ["chr6"]},
"7": {"length": 121844099, "synonyms": ["chr7"]},
"8": {"length": 138966237, "synonyms": ["chr8"]},
"9": {"length": 139512083, "synonyms": ["chr9"]},
"10": {"length": 69359453, "synonyms": ["chr10"]},
"11": {"length": 79169978, "synonyms": ["chr11"]},
"12": {"length": 61602749, "synonyms": ["chr12"]},
"13": {"length": 208334590, "synonyms": ["chr13"]},
"14": {"length": 141755446, "synonyms": ["chr14"]},
"15": {"length": 140412725, "synonyms": ["chr15"]},
"16": {"length": 79944280, "synonyms": ["chr16"]},
"17": {"length": 63494081, "synonyms": ["chr17"]},
"18": {"length": 55982971, "synonyms": ["chr18"]},
"X": {"length": 125939595, "synonyms": ["chrX"]},
"Y": {"length": 43547828, "synonyms": ["chrY"]},
"MT": {"length": 16613, "synonyms": []},
},
"assembly_source": "ensembl",
"assembly_build_version": "113",
}
144 changes: 144 additions & 0 deletions stdpopsim/catalog/SusScr/species.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import stdpopsim

from . import genome_data

# Revisiting the Evolutionary History of Pigs via De Novo Mutation Rate
# Estimation in a Three-Generation Pedigree
_ZhangEtAl = stdpopsim.Citation(
doi="https://doi.org/10.1016/j.gpb.2022.02.001",
year=2022,
author="Zhang et al.",
reasons={
stdpopsim.CiteReason.MUT_RATE, # on page 1042
stdpopsim.CiteReason.POP_SIZE, # on page 1048
},
)

# Generation time in wild boar
_ServantyEtAl = stdpopsim.Citation(
doi="https://doi.org/10.1111/j.1365-2664.2011.02017.x",
year=2011,
author="Servanty et al.",
reasons={
stdpopsim.CiteReason.GEN_TIME, # on page 835
},
)

# Genetic variation in recombination rate in the pig
_JohnssonEtAl = stdpopsim.Citation(
doi="https://doi.org/10.1186/s12711-021-00643-0",
year=2021,
author="Johnsson et al.",
reasons={stdpopsim.CiteReason.REC_RATE},
# in Additional file 4: Table S3 (sex-averaged)
# Additional file 2: Table S1 holds male-specific values
# Additional file 3: Table S2 holds female-specific values
)

# An improved pig reference genome sequence to enable pig genetics and
# genomics research
_WarrEtAl = stdpopsim.Citation(
doi="https://doi.org/10.1093/gigascience/giaa051",
year=2020,
author="Warr et al.",
reasons={stdpopsim.CiteReason.ASSEMBLY},
)

# Ploidy
_ploidy = 2
_ploidy_contig = {str(i): _ploidy for i in range(1, 19)}
_ploidy_contig["X"] = _ploidy
_ploidy_contig["Y"] = 1
_ploidy_contig["MT"] = 1

# De-novo mutation rate
# from Zhang et al. (2022) on page 1042
_mutation_rate = 3.6e-9
_mutation_rate_contig = {str(i): _mutation_rate for i in range(1, 19)}
_mutation_rate_contig["X"] = _mutation_rate
_mutation_rate_contig["Y"] = _mutation_rate
_mutation_rate_contig["MT"] = _mutation_rate

# Recombination rate
# This mean rate along each chromosome was calculated from
# Johnsson et al. (2021) in Additional file 4: Table S3
_tmp = [
5.335247608983185e-09,
8.6057260999254e-09,
9.751141303348547e-09,
9.79460049114614e-09,
1.1992632296111e-08,
8.786173521918499e-09,
1.0887772285689678e-08,
8.923031414594611e-09,
9.337506174170301e-09,
1.654233205589542e-08,
1.2145120574910172e-08,
1.6905062453467094e-08,
6.162796630328349e-09,
8.721569375548237e-09,
8.114468762954893e-09,
1.1407464686635653e-08,
1.3779949516098884e-08,
1.3679923888648167e-08,
]
_recombination_rate_contig = {str(i): _tmp[i - 1] for i in range(1, 19)}
# Setting X-chromosome recombination rate as an average across autosomes.
_total_length = sum(
genome_data.data["chromosomes"][str(i)]["length"] for i in range(1, 19)
)
_weighted_recombination = sum(
_tmp[i - 1] * genome_data.data["chromosomes"][str(i)]["length"]
for i in range(1, 19)
)
_recombination_rate_autosome_mean = _weighted_recombination / _total_length
_recombination_rate_contig["X"] = _recombination_rate_autosome_mean
_recombination_rate_contig["Y"] = 0.0
_recombination_rate_contig["MT"] = 0.0

_genome = stdpopsim.Genome.from_data(
genome_data=genome_data.data,
ploidy=_ploidy_contig,
mutation_rate=_mutation_rate_contig,
recombination_rate=_recombination_rate_contig,
citations=[
_WarrEtAl, # ASSEMBLY
_JohnssonEtAl, # REC_RATE
_ZhangEtAl, # MUT_RATE
],
)

_species = stdpopsim.Species(
id="SusScr",
ensembl_id="sus_scrofa",
name="Sus scrofa",
common_name="Pig",
genome=_genome,
ploidy=_ploidy,
# Servanty et al. (2011) on page 837 write:
# "We Calculated generation time as the inverse of the relative elasticity of
# the population growth rate to a change in all recruitment parameters."
# For comparison, Groenen et al. (2012, 10.1038/nature11622) used a best guess
# of 5 years (supplement page 56) and this value is cited very often.
# To improve upon this best guess, Zhang et al. (2022) assumed a generation time
# of 3 years, as the age of parents at the first litter, but pigs have
# multiple parities in their lifetime, so 3 years must be an underestimate.
# Servanty et al. (2011) seems to be the most reliable source with some
# concrete data, though from a lightly hunted population, so this estimate
# is also likely a slight underestimate, but for the deep coalescent simulations
# we also expect that there has been some predation in nature in the past.
generation_time=3.6,
# Zhang et al. (2022) on page 1045 write:
# "... Ne of pigs (the maximum Ne was ~4x10^4; see Figure S6D) ..."
# and on page 1048 write:
# "The new estimated mutation rate also revealed a maximum Ne of
# 2.7x10^5 in pigs, ~6 times larger than that estimated previously
# [2,3] (Figure 3A, Figure S9)"
population_size=270_000,
citations=[
_ZhangEtAl, # POP_SIZE
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add _ServantyEtAl citation for GEN_TIME

_ServantyEtAl, # GEN_TIME
],
)

stdpopsim.register_species(_species)
133 changes: 133 additions & 0 deletions tests/test_SusScr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import pytest

import stdpopsim
from tests import test_species


class TestSpeciesData(test_species.SpeciesTestBase):

species = stdpopsim.get_species("SusScr")

def test_ensembl_id(self):
assert self.species.ensembl_id == "sus_scrofa"

def test_name(self):
assert self.species.name == "Sus scrofa"

def test_common_name(self):
assert self.species.common_name == "Pig"

def test_assembly_source(self):
assert self.species.genome.assembly_source == "ensembl"

def test_assembly_build_version(self):
assert self.species.genome.assembly_build_version == "113"

# QC Tests. These tests are performed by another contributor
# independently referring to the citations provided in the
# species definition, filling in the appropriate values
# and deleting the pytest "skip" annotations.
@pytest.mark.skip("Population size QC not done yet")
def test_qc_population_size(self):
assert self.species.population_size == -1

@pytest.mark.skip("Generation time QC not done yet")
def test_qc_generation_time(self):
assert self.species.generation_time == -1


class TestGenomeData(test_species.GenomeTestBase):

genome = stdpopsim.get_species("SusScr").genome

@pytest.mark.skip("Recombination rate QC not done yet")
@pytest.mark.parametrize(
["name", "rate"],
{
"1": -1,
"2": -1,
"3": -1,
"4": -1,
"5": -1,
"6": -1,
"7": -1,
"8": -1,
"9": -1,
"10": -1,
"11": -1,
"12": -1,
"13": -1,
"14": -1,
"15": -1,
"16": -1,
"17": -1,
"18": -1,
"X": -1,
"Y": -1,
"MT": -1,
}.items(),
)
def test_recombination_rate(self, name, rate):
assert rate == pytest.approx(
self.genome.get_chromosome(name).recombination_rate
)

@pytest.mark.skip("Mutation rate QC not done yet")
@pytest.mark.parametrize(
["name", "rate"],
{
"1": -1,
"2": -1,
"3": -1,
"4": -1,
"5": -1,
"6": -1,
"7": -1,
"8": -1,
"9": -1,
"10": -1,
"11": -1,
"12": -1,
"13": -1,
"14": -1,
"15": -1,
"16": -1,
"17": -1,
"18": -1,
"X": -1,
"Y": -1,
"MT": -1,
}.items(),
)
def test_mutation_rate(self, name, rate):
assert rate == pytest.approx(self.genome.get_chromosome(name).mutation_rate)

@pytest.mark.skip("Ploidy QC not done yet")
@pytest.mark.parametrize(
["name", "ploidy"],
{
"1": -1,
"2": -1,
"3": -1,
"4": -1,
"5": -1,
"6": -1,
"7": -1,
"8": -1,
"9": -1,
"10": -1,
"11": -1,
"12": -1,
"13": -1,
"14": -1,
"15": -1,
"16": -1,
"17": -1,
"18": -1,
"X": -1,
"Y": -1,
"MT": -1,
}.items(),
)
def test_chromosome_ploidy(self, name, ploidy):
assert ploidy == self.genome.get_chromosome(name).ploidy
Loading