diff --git a/prointvar/arpeggio.py b/prointvar/arpeggio.py index d4652b7..cd77db6 100644 --- a/prointvar/arpeggio.py +++ b/prointvar/arpeggio.py @@ -39,6 +39,7 @@ from prointvar.utils import lazy_file_remover from prointvar.utils import row_selector from prointvar.utils import string_split +from prointvar.utils import constrain_column_types from prointvar.library import arpeggio_types from prointvar.library import arpeggio_col_renames @@ -106,13 +107,7 @@ def parse_arpeggio_from_file(inputfile, excluded=(), add_res_split=True, pass # enforce some specific column types - for col in table: - if col in arpeggio_types: - try: - table[col] = table[col].astype(arpeggio_types[col]) - except ValueError: - # there are some NaNs in there - pass + table = constrain_column_types(table, arpeggio_types) if table.empty: raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile)) diff --git a/prointvar/dssp.py b/prointvar/dssp.py index 975a3e2..eaf8e9f 100644 --- a/prointvar/dssp.py +++ b/prointvar/dssp.py @@ -24,6 +24,7 @@ from prointvar.utils import get_rsa_class from prointvar.utils import row_selector from prointvar.utils import lazy_file_remover +from prointvar.utils import constrain_column_types from prointvar.library import dssp_types from prointvar.config import config @@ -182,13 +183,7 @@ def parse_dssp_from_file(inputfile, excluded=(), add_full_chain=True, add_ss_red pass # enforce some specific column types - for col in table: - if col in dssp_types: - try: - table[col] = table[col].astype(dssp_types[col]) - except ValueError: - # there are some NaNs in there - pass + table = constrain_column_types(table, dssp_types) if table.empty: raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile)) diff --git a/prointvar/hbplus.py b/prointvar/hbplus.py index ba2d901..ab38a3c 100644 --- a/prointvar/hbplus.py +++ b/prointvar/hbplus.py @@ -19,6 +19,7 @@ from prointvar.utils import row_selector from prointvar.utils import lazy_file_remover +from prointvar.utils import constrain_column_types from prointvar.library import hbplus_types from prointvar.config import config @@ -97,13 +98,7 @@ def fix_res_id(data, key): pass # enforce some specific column types - for col in table: - if col in hbplus_types: - try: - table[col] = table[col].astype(hbplus_types[col]) - except ValueError: - # there are some NaNs in there - pass + table = constrain_column_types(table, hbplus_types) if table.empty: raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile)) diff --git a/prointvar/msas.py b/prointvar/msas.py index dbc7103..d0bc0d0 100644 --- a/prointvar/msas.py +++ b/prointvar/msas.py @@ -20,6 +20,7 @@ from Bio import AlignIO from prointvar.fetchers import fetch_uniprot_id_from_name +from prointvar.utils import constrain_column_types logger = logging.getLogger("prointvar") @@ -92,12 +93,7 @@ def parse_msa_sequences_from_file(inputfile, excluded=(), get_uniprot_id=False, # enforce some specific column types msa_types = {key: str for key in list(table) if key != 'Start' and key != 'End'} - for col in table: - try: - table[col] = table[col].astype(msa_types[col]) - except (ValueError, KeyError): - # there are some NaNs in there - pass + table = constrain_column_types(table, msa_types) if table.empty: raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile)) diff --git a/prointvar/pdbx.py b/prointvar/pdbx.py index f044421..02e0b65 100644 --- a/prointvar/pdbx.py +++ b/prointvar/pdbx.py @@ -25,6 +25,7 @@ from prointvar.utils import string_split from prointvar.utils import get_new_pro_ids from prointvar.utils import check_sequence +from prointvar.utils import constrain_column_types from prointvar.library import mmcif_types from prointvar.library import aa_default_atoms from prointvar.library import aa_codes_3to1_extended @@ -144,13 +145,7 @@ def parse_mmcif_atoms_from_file(inputfile, excluded=(), add_res_full=True, logger.info("PDBx reset atom numbers...") # enforce some specific column types - for col in table: - if col in mmcif_types: - try: - table[col] = table[col].astype(mmcif_types[col]) - except ValueError: - # there are some NaNs in there - pass + table = constrain_column_types(table, mmcif_types) if table.empty: raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile)) @@ -279,13 +274,7 @@ def parse_pdb_atoms_from_file(inputfile, excluded=(), add_contacts=False, logger.info("PDBx reset atom numbers...") # enforce some specific column types - for col in table: - if col in mmcif_types: - try: - table[col] = table[col].astype(mmcif_types[col]) - except ValueError: - # there are some NaNs in there - pass + table = constrain_column_types(table, mmcif_types) if table.empty: raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile)) diff --git a/prointvar/sifts.py b/prointvar/sifts.py index 56d1820..91a6b20 100644 --- a/prointvar/sifts.py +++ b/prointvar/sifts.py @@ -18,6 +18,7 @@ from collections import OrderedDict from prointvar.utils import row_selector +from prointvar.utils import constrain_column_types from prointvar.library import sifts_types logger = logging.getLogger("prointvar") @@ -210,13 +211,7 @@ def parse_sifts_residues_from_file(inputfile, excluded=(), table = pd.DataFrame(rows) # enforce some specific column types - for col in table: - if col in sifts_types: - try: - table[col] = table[col].astype(sifts_types[col]) - except ValueError: - # there are some NaNs in there - pass + table = constrain_column_types(table, sifts_types) for c in list(table): if '_regionId' in c: diff --git a/prointvar/stamp.py b/prointvar/stamp.py index 21ee0c7..4a58432 100644 --- a/prointvar/stamp.py +++ b/prointvar/stamp.py @@ -22,6 +22,7 @@ from prointvar.pdbx import PDBXwriter +from prointvar.utils import constrain_column_types from prointvar.library import stamp_types from prointvar.config import config @@ -237,13 +238,7 @@ def parse_stamp_scan_scores_from_file(inputfile, excluded=()): pass # enforce some specific column types - for col in table: - if col in stamp_types: - try: - table[col] = table[col].astype(stamp_types[col]) - except ValueError: - # there are some NaNs in there - pass + table = constrain_column_types(table, stamp_types) if table.empty: raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile)) diff --git a/prointvar/variants.py b/prointvar/variants.py index 1763d81..2440d22 100644 --- a/prointvar/variants.py +++ b/prointvar/variants.py @@ -28,6 +28,7 @@ from prointvar.utils import merging_down_by_key from prointvar.utils import flatten_nested_structure from prointvar.utils import refactor_key_val_singletons +from prointvar.utils import constrain_column_types from prointvar.library import uni_ens_var_types from prointvar.library import update_ensembl_to_uniprot @@ -67,13 +68,7 @@ def flatten_uniprot_variants_ebi(data, excluded=()): pass # enforce some specific column types - for col in table: - if col in uni_ens_var_types: - try: - table[col] = table[col].astype(uni_ens_var_types[col]) - except ValueError: - # there are some NaNs in there - pass + table = constrain_column_types(table, uni_ens_var_types) # split multi id rows table = splitting_up_by_key(table, key='xrefs_id') @@ -116,13 +111,7 @@ def flatten_ensembl_variants(data, excluded=(), synonymous=True): pass # enforce some specific column types - for col in table: - if col in uni_ens_var_types: - try: - table[col] = table[col].astype(uni_ens_var_types[col]) - except ValueError: - # there are some NaNs in there - pass + table = constrain_column_types(table, uni_ens_var_types) # split multi id rows table = splitting_up_by_key(table, key='xrefs_id')