Skip to content

Commit

Permalink
Refactored the methods to use the helper method 'constrain_column_typ…
Browse files Browse the repository at this point in the history
…es'.
  • Loading branch information
biomadeira committed Sep 19, 2017
1 parent eac69e0 commit c332ad4
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 69 deletions.
9 changes: 2 additions & 7 deletions prointvar/arpeggio.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from prointvar.utils import lazy_file_remover
from prointvar.utils import row_selector
from prointvar.utils import string_split
from prointvar.utils import constrain_column_types
from prointvar.library import arpeggio_types
from prointvar.library import arpeggio_col_renames

Expand Down Expand Up @@ -106,13 +107,7 @@ def parse_arpeggio_from_file(inputfile, excluded=(), add_res_split=True,
pass

# enforce some specific column types
for col in table:
if col in arpeggio_types:
try:
table[col] = table[col].astype(arpeggio_types[col])
except ValueError:
# there are some NaNs in there
pass
table = constrain_column_types(table, arpeggio_types)

if table.empty:
raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile))
Expand Down
9 changes: 2 additions & 7 deletions prointvar/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from prointvar.utils import get_rsa_class
from prointvar.utils import row_selector
from prointvar.utils import lazy_file_remover
from prointvar.utils import constrain_column_types
from prointvar.library import dssp_types

from prointvar.config import config
Expand Down Expand Up @@ -182,13 +183,7 @@ def parse_dssp_from_file(inputfile, excluded=(), add_full_chain=True, add_ss_red
pass

# enforce some specific column types
for col in table:
if col in dssp_types:
try:
table[col] = table[col].astype(dssp_types[col])
except ValueError:
# there are some NaNs in there
pass
table = constrain_column_types(table, dssp_types)

if table.empty:
raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile))
Expand Down
9 changes: 2 additions & 7 deletions prointvar/hbplus.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from prointvar.utils import row_selector
from prointvar.utils import lazy_file_remover
from prointvar.utils import constrain_column_types
from prointvar.library import hbplus_types

from prointvar.config import config
Expand Down Expand Up @@ -97,13 +98,7 @@ def fix_res_id(data, key):
pass

# enforce some specific column types
for col in table:
if col in hbplus_types:
try:
table[col] = table[col].astype(hbplus_types[col])
except ValueError:
# there are some NaNs in there
pass
table = constrain_column_types(table, hbplus_types)

if table.empty:
raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile))
Expand Down
8 changes: 2 additions & 6 deletions prointvar/msas.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from Bio import AlignIO

from prointvar.fetchers import fetch_uniprot_id_from_name
from prointvar.utils import constrain_column_types

logger = logging.getLogger("prointvar")

Expand Down Expand Up @@ -92,12 +93,7 @@ def parse_msa_sequences_from_file(inputfile, excluded=(), get_uniprot_id=False,

# enforce some specific column types
msa_types = {key: str for key in list(table) if key != 'Start' and key != 'End'}
for col in table:
try:
table[col] = table[col].astype(msa_types[col])
except (ValueError, KeyError):
# there are some NaNs in there
pass
table = constrain_column_types(table, msa_types)

if table.empty:
raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile))
Expand Down
17 changes: 3 additions & 14 deletions prointvar/pdbx.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from prointvar.utils import string_split
from prointvar.utils import get_new_pro_ids
from prointvar.utils import check_sequence
from prointvar.utils import constrain_column_types
from prointvar.library import mmcif_types
from prointvar.library import aa_default_atoms
from prointvar.library import aa_codes_3to1_extended
Expand Down Expand Up @@ -144,13 +145,7 @@ def parse_mmcif_atoms_from_file(inputfile, excluded=(), add_res_full=True,
logger.info("PDBx reset atom numbers...")

# enforce some specific column types
for col in table:
if col in mmcif_types:
try:
table[col] = table[col].astype(mmcif_types[col])
except ValueError:
# there are some NaNs in there
pass
table = constrain_column_types(table, mmcif_types)

if table.empty:
raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile))
Expand Down Expand Up @@ -279,13 +274,7 @@ def parse_pdb_atoms_from_file(inputfile, excluded=(), add_contacts=False,
logger.info("PDBx reset atom numbers...")

# enforce some specific column types
for col in table:
if col in mmcif_types:
try:
table[col] = table[col].astype(mmcif_types[col])
except ValueError:
# there are some NaNs in there
pass
table = constrain_column_types(table, mmcif_types)

if table.empty:
raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile))
Expand Down
9 changes: 2 additions & 7 deletions prointvar/sifts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from collections import OrderedDict

from prointvar.utils import row_selector
from prointvar.utils import constrain_column_types
from prointvar.library import sifts_types

logger = logging.getLogger("prointvar")
Expand Down Expand Up @@ -210,13 +211,7 @@ def parse_sifts_residues_from_file(inputfile, excluded=(),
table = pd.DataFrame(rows)

# enforce some specific column types
for col in table:
if col in sifts_types:
try:
table[col] = table[col].astype(sifts_types[col])
except ValueError:
# there are some NaNs in there
pass
table = constrain_column_types(table, sifts_types)

for c in list(table):
if '_regionId' in c:
Expand Down
9 changes: 2 additions & 7 deletions prointvar/stamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from prointvar.pdbx import PDBXwriter

from prointvar.utils import constrain_column_types
from prointvar.library import stamp_types

from prointvar.config import config
Expand Down Expand Up @@ -237,13 +238,7 @@ def parse_stamp_scan_scores_from_file(inputfile, excluded=()):
pass

# enforce some specific column types
for col in table:
if col in stamp_types:
try:
table[col] = table[col].astype(stamp_types[col])
except ValueError:
# there are some NaNs in there
pass
table = constrain_column_types(table, stamp_types)

if table.empty:
raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile))
Expand Down
17 changes: 3 additions & 14 deletions prointvar/variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from prointvar.utils import merging_down_by_key
from prointvar.utils import flatten_nested_structure
from prointvar.utils import refactor_key_val_singletons
from prointvar.utils import constrain_column_types
from prointvar.library import uni_ens_var_types
from prointvar.library import update_ensembl_to_uniprot

Expand Down Expand Up @@ -67,13 +68,7 @@ def flatten_uniprot_variants_ebi(data, excluded=()):
pass

# enforce some specific column types
for col in table:
if col in uni_ens_var_types:
try:
table[col] = table[col].astype(uni_ens_var_types[col])
except ValueError:
# there are some NaNs in there
pass
table = constrain_column_types(table, uni_ens_var_types)

# split multi id rows
table = splitting_up_by_key(table, key='xrefs_id')
Expand Down Expand Up @@ -116,13 +111,7 @@ def flatten_ensembl_variants(data, excluded=(), synonymous=True):
pass

# enforce some specific column types
for col in table:
if col in uni_ens_var_types:
try:
table[col] = table[col].astype(uni_ens_var_types[col])
except ValueError:
# there are some NaNs in there
pass
table = constrain_column_types(table, uni_ens_var_types)

# split multi id rows
table = splitting_up_by_key(table, key='xrefs_id')
Expand Down

0 comments on commit c332ad4

Please sign in to comment.