-
Notifications
You must be signed in to change notification settings - Fork 93
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add sus scrofa #1312 #1672
Open
AprilYUZhang
wants to merge
2
commits into
popsim-consortium:main
Choose a base branch
from
AprilYUZhang:Sus_scrofa
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
add sus scrofa #1312 #1672
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
""" | ||
Catalog definitions for SusScr (Ensembl ID='sus_scrofa') | ||
""" | ||
|
||
from . import species # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# File autogenerated from Ensembl REST API. Do not edit. | ||
data = { | ||
"assembly_accession": "GCA_000003025.6", | ||
"assembly_name": "Sscrofa11.1", | ||
"chromosomes": { | ||
"1": {"length": 274330532, "synonyms": ["chr1"]}, | ||
"2": {"length": 151935994, "synonyms": ["chr2"]}, | ||
"3": {"length": 132848913, "synonyms": ["chr3"]}, | ||
"4": {"length": 130910915, "synonyms": ["chr4"]}, | ||
"5": {"length": 104526007, "synonyms": ["chr5"]}, | ||
"6": {"length": 170843587, "synonyms": ["chr6"]}, | ||
"7": {"length": 121844099, "synonyms": ["chr7"]}, | ||
"8": {"length": 138966237, "synonyms": ["chr8"]}, | ||
"9": {"length": 139512083, "synonyms": ["chr9"]}, | ||
"10": {"length": 69359453, "synonyms": ["chr10"]}, | ||
"11": {"length": 79169978, "synonyms": ["chr11"]}, | ||
"12": {"length": 61602749, "synonyms": ["chr12"]}, | ||
"13": {"length": 208334590, "synonyms": ["chr13"]}, | ||
"14": {"length": 141755446, "synonyms": ["chr14"]}, | ||
"15": {"length": 140412725, "synonyms": ["chr15"]}, | ||
"16": {"length": 79944280, "synonyms": ["chr16"]}, | ||
"17": {"length": 63494081, "synonyms": ["chr17"]}, | ||
"18": {"length": 55982971, "synonyms": ["chr18"]}, | ||
"X": {"length": 125939595, "synonyms": ["chrX"]}, | ||
"Y": {"length": 43547828, "synonyms": ["chrY"]}, | ||
"MT": {"length": 16613, "synonyms": []}, | ||
}, | ||
"assembly_source": "ensembl", | ||
"assembly_build_version": "113", | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
import stdpopsim | ||
|
||
from . import genome_data | ||
|
||
# Revisiting the Evolutionary History of Pigs via De Novo Mutation Rate | ||
# Estimation in a Three-Generation Pedigree | ||
_ZhangEtAl = stdpopsim.Citation( | ||
doi="https://doi.org/10.1016/j.gpb.2022.02.001", | ||
year=2022, | ||
author="Zhang et al.", | ||
reasons={ | ||
stdpopsim.CiteReason.MUT_RATE, # on page 1042 | ||
stdpopsim.CiteReason.POP_SIZE, # on page 1048 | ||
}, | ||
) | ||
|
||
# Generation time in wild boar | ||
_ServantyEtAl = stdpopsim.Citation( | ||
doi="https://doi.org/10.1111/j.1365-2664.2011.02017.x", | ||
year=2011, | ||
author="Servanty et al.", | ||
reasons={ | ||
stdpopsim.CiteReason.GEN_TIME, # on page 835 | ||
}, | ||
) | ||
|
||
# Genetic variation in recombination rate in the pig | ||
_JohnssonEtAl = stdpopsim.Citation( | ||
doi="https://doi.org/10.1186/s12711-021-00643-0", | ||
year=2021, | ||
author="Johnsson et al.", | ||
reasons={stdpopsim.CiteReason.REC_RATE}, | ||
# in Additional file 4: Table S3 (sex-averaged) | ||
# Additional file 2: Table S1 holds male-specific values | ||
# Additional file 3: Table S2 holds female-specific values | ||
) | ||
|
||
# An improved pig reference genome sequence to enable pig genetics and | ||
# genomics research | ||
_WarrEtAl = stdpopsim.Citation( | ||
doi="https://doi.org/10.1093/gigascience/giaa051", | ||
year=2020, | ||
author="Warr et al.", | ||
reasons={stdpopsim.CiteReason.ASSEMBLY}, | ||
) | ||
|
||
# Ploidy | ||
_ploidy = 2 | ||
_ploidy_contig = {str(i): _ploidy for i in range(1, 19)} | ||
_ploidy_contig["X"] = _ploidy | ||
_ploidy_contig["Y"] = 1 | ||
_ploidy_contig["MT"] = 1 | ||
|
||
# De-novo mutation rate | ||
# from Zhang et al. (2022) on page 1042 | ||
_mutation_rate = 3.6e-9 | ||
_mutation_rate_contig = {str(i): _mutation_rate for i in range(1, 19)} | ||
_mutation_rate_contig["X"] = _mutation_rate | ||
_mutation_rate_contig["Y"] = _mutation_rate | ||
_mutation_rate_contig["MT"] = _mutation_rate | ||
|
||
# Recombination rate | ||
# This mean rate along each chromosome was calculated from | ||
# Johnsson et al. (2021) in Additional file 4: Table S3 | ||
_tmp = [ | ||
5.335247608983185e-09, | ||
8.6057260999254e-09, | ||
9.751141303348547e-09, | ||
9.79460049114614e-09, | ||
1.1992632296111e-08, | ||
8.786173521918499e-09, | ||
1.0887772285689678e-08, | ||
8.923031414594611e-09, | ||
9.337506174170301e-09, | ||
1.654233205589542e-08, | ||
1.2145120574910172e-08, | ||
1.6905062453467094e-08, | ||
6.162796630328349e-09, | ||
8.721569375548237e-09, | ||
8.114468762954893e-09, | ||
1.1407464686635653e-08, | ||
1.3779949516098884e-08, | ||
1.3679923888648167e-08, | ||
] | ||
_recombination_rate_contig = {str(i): _tmp[i - 1] for i in range(1, 19)} | ||
# Setting X-chromosome recombination rate as an average across autosomes. | ||
_total_length = sum( | ||
genome_data.data["chromosomes"][str(i)]["length"] for i in range(1, 19) | ||
) | ||
_weighted_recombination = sum( | ||
_tmp[i - 1] * genome_data.data["chromosomes"][str(i)]["length"] | ||
for i in range(1, 19) | ||
) | ||
_recombination_rate_autosome_mean = _weighted_recombination / _total_length | ||
_recombination_rate_contig["X"] = _recombination_rate_autosome_mean | ||
_recombination_rate_contig["Y"] = 0.0 | ||
_recombination_rate_contig["MT"] = 0.0 | ||
|
||
_genome = stdpopsim.Genome.from_data( | ||
genome_data=genome_data.data, | ||
ploidy=_ploidy_contig, | ||
mutation_rate=_mutation_rate_contig, | ||
recombination_rate=_recombination_rate_contig, | ||
citations=[ | ||
_WarrEtAl, # ASSEMBLY | ||
_JohnssonEtAl, # REC_RATE | ||
_ZhangEtAl, # MUT_RATE | ||
], | ||
) | ||
|
||
_species = stdpopsim.Species( | ||
id="SusScr", | ||
ensembl_id="sus_scrofa", | ||
name="Sus scrofa", | ||
common_name="Pig", | ||
genome=_genome, | ||
ploidy=_ploidy, | ||
# Servanty et al. (2011) on page 837 write: | ||
# "We Calculated generation time as the inverse of the relative elasticity of | ||
# the population growth rate to a change in all recruitment parameters." | ||
# For comparison, Groenen et al. (2012, 10.1038/nature11622) used a best guess | ||
# of 5 years (supplement page 56) and this value is cited very often. | ||
# To improve upon this best guess, Zhang et al. (2022) assumed a generation time | ||
# of 3 years, as the age of parents at the first litter, but pigs have | ||
# multiple parities in their lifetime, so 3 years must be an underestimate. | ||
# Servanty et al. (2011) seems to be the most reliable source with some | ||
# concrete data, though from a lightly hunted population, so this estimate | ||
# is also likely a slight underestimate, but for the deep coalescent simulations | ||
# we also expect that there has been some predation in nature in the past. | ||
generation_time=3.6, | ||
# Zhang et al. (2022) on page 1045 write: | ||
# "... Ne of pigs (the maximum Ne was ~4x10^4; see Figure S6D) ..." | ||
# and on page 1048 write: | ||
# "The new estimated mutation rate also revealed a maximum Ne of | ||
# 2.7x10^5 in pigs, ~6 times larger than that estimated previously | ||
# [2,3] (Figure 3A, Figure S9)" | ||
population_size=270_000, | ||
citations=[ | ||
_ZhangEtAl, # POP_SIZE | ||
_ServantyEtAl, # GEN_TIME | ||
], | ||
) | ||
|
||
stdpopsim.register_species(_species) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
import pytest | ||
|
||
import stdpopsim | ||
from tests import test_species | ||
|
||
|
||
class TestSpeciesData(test_species.SpeciesTestBase): | ||
|
||
species = stdpopsim.get_species("SusScr") | ||
|
||
def test_ensembl_id(self): | ||
assert self.species.ensembl_id == "sus_scrofa" | ||
|
||
def test_name(self): | ||
assert self.species.name == "Sus scrofa" | ||
|
||
def test_common_name(self): | ||
assert self.species.common_name == "Pig" | ||
|
||
def test_assembly_source(self): | ||
assert self.species.genome.assembly_source == "ensembl" | ||
|
||
def test_assembly_build_version(self): | ||
assert self.species.genome.assembly_build_version == "113" | ||
|
||
# QC Tests. These tests are performed by another contributor | ||
# independently referring to the citations provided in the | ||
# species definition, filling in the appropriate values | ||
# and deleting the pytest "skip" annotations. | ||
@pytest.mark.skip("Population size QC not done yet") | ||
def test_qc_population_size(self): | ||
assert self.species.population_size == -1 | ||
|
||
@pytest.mark.skip("Generation time QC not done yet") | ||
def test_qc_generation_time(self): | ||
assert self.species.generation_time == -1 | ||
|
||
|
||
class TestGenomeData(test_species.GenomeTestBase): | ||
|
||
genome = stdpopsim.get_species("SusScr").genome | ||
|
||
@pytest.mark.skip("Recombination rate QC not done yet") | ||
@pytest.mark.parametrize( | ||
["name", "rate"], | ||
{ | ||
"1": -1, | ||
"2": -1, | ||
"3": -1, | ||
"4": -1, | ||
"5": -1, | ||
"6": -1, | ||
"7": -1, | ||
"8": -1, | ||
"9": -1, | ||
"10": -1, | ||
"11": -1, | ||
"12": -1, | ||
"13": -1, | ||
"14": -1, | ||
"15": -1, | ||
"16": -1, | ||
"17": -1, | ||
"18": -1, | ||
"X": -1, | ||
"Y": -1, | ||
"MT": -1, | ||
}.items(), | ||
) | ||
def test_recombination_rate(self, name, rate): | ||
assert rate == pytest.approx( | ||
self.genome.get_chromosome(name).recombination_rate | ||
) | ||
|
||
@pytest.mark.skip("Mutation rate QC not done yet") | ||
@pytest.mark.parametrize( | ||
["name", "rate"], | ||
{ | ||
"1": -1, | ||
"2": -1, | ||
"3": -1, | ||
"4": -1, | ||
"5": -1, | ||
"6": -1, | ||
"7": -1, | ||
"8": -1, | ||
"9": -1, | ||
"10": -1, | ||
"11": -1, | ||
"12": -1, | ||
"13": -1, | ||
"14": -1, | ||
"15": -1, | ||
"16": -1, | ||
"17": -1, | ||
"18": -1, | ||
"X": -1, | ||
"Y": -1, | ||
"MT": -1, | ||
}.items(), | ||
) | ||
def test_mutation_rate(self, name, rate): | ||
assert rate == pytest.approx(self.genome.get_chromosome(name).mutation_rate) | ||
|
||
@pytest.mark.skip("Ploidy QC not done yet") | ||
@pytest.mark.parametrize( | ||
["name", "ploidy"], | ||
{ | ||
"1": -1, | ||
"2": -1, | ||
"3": -1, | ||
"4": -1, | ||
"5": -1, | ||
"6": -1, | ||
"7": -1, | ||
"8": -1, | ||
"9": -1, | ||
"10": -1, | ||
"11": -1, | ||
"12": -1, | ||
"13": -1, | ||
"14": -1, | ||
"15": -1, | ||
"16": -1, | ||
"17": -1, | ||
"18": -1, | ||
"X": -1, | ||
"Y": -1, | ||
"MT": -1, | ||
}.items(), | ||
) | ||
def test_chromosome_ploidy(self, name, ploidy): | ||
assert ploidy == self.genome.get_chromosome(name).ploidy |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add
_ServantyEtAl
citation for GEN_TIME