From 7e4f556ffdc8580daea99bf971d945f33c36a210 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fa=CC=81bio=20Madeira?= Date: Tue, 19 Sep 2017 15:24:15 +0100 Subject: [PATCH] Refactored the methods to use the new helper method 'exclude_columns' . --- prointvar/arpeggio.py | 20 +++++--------------- prointvar/dssp.py | 10 +++------- prointvar/hbplus.py | 10 +++------- prointvar/msas.py | 9 ++------- prointvar/pdbx.py | 17 +++-------------- prointvar/stamp.py | 9 ++------- prointvar/variants.py | 19 +++++-------------- 7 files changed, 23 insertions(+), 71 deletions(-) diff --git a/prointvar/arpeggio.py b/prointvar/arpeggio.py index cd77db6..8575cad 100644 --- a/prointvar/arpeggio.py +++ b/prointvar/arpeggio.py @@ -40,6 +40,7 @@ from prointvar.utils import row_selector from prointvar.utils import string_split from prointvar.utils import constrain_column_types +from prointvar.utils import exclude_columns from prointvar.library import arpeggio_types from prointvar.library import arpeggio_col_renames @@ -98,13 +99,8 @@ def parse_arpeggio_from_file(inputfile, excluded=(), add_res_split=True, table = add_special_cont_types(inputfile, table) logger.info("Parsed special contact-types...") - if excluded is not None: - assert type(excluded) is tuple - try: - table = table.drop(list(excluded), axis=1) - except ValueError: - # most likely theses are not in there - pass + # excluding columns + table = exclude_columns(table, excluded=excluded) # enforce some specific column types table = constrain_column_types(table, arpeggio_types) @@ -179,14 +175,8 @@ def parse_arpeggio_spec_from_file(inputfile, excluded=(), add_res_split=True, if add_res_split: table = add_arpeggio_res_split(table) - if excluded is not None: - excluded = tuple([k for k in excluded if k in header]) - assert type(excluded) is tuple - try: - table = table.drop(list(excluded), axis=1) - except ValueError: - # most likely theses are not in there - pass + # excluding columns + table = exclude_columns(table, excluded=excluded) if table.empty: raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile)) diff --git a/prointvar/dssp.py b/prointvar/dssp.py index eaf8e9f..ace42ea 100644 --- a/prointvar/dssp.py +++ b/prointvar/dssp.py @@ -25,6 +25,7 @@ from prointvar.utils import row_selector from prointvar.utils import lazy_file_remover from prointvar.utils import constrain_column_types +from prointvar.utils import exclude_columns from prointvar.library import dssp_types from prointvar.config import config @@ -174,13 +175,8 @@ def parse_dssp_from_file(inputfile, excluded=(), add_full_chain=True, add_ss_red table['LINE'] = table.index + 1 logger.info("DSSP reset residue number...") - if excluded is not None: - assert type(excluded) is tuple - try: - table = table.drop(list(excluded), axis=1) - except ValueError: - # most likely theses are not in there - pass + # excluding columns + table = exclude_columns(table, excluded=excluded) # enforce some specific column types table = constrain_column_types(table, dssp_types) diff --git a/prointvar/hbplus.py b/prointvar/hbplus.py index ab38a3c..f6a318a 100644 --- a/prointvar/hbplus.py +++ b/prointvar/hbplus.py @@ -20,6 +20,7 @@ from prointvar.utils import row_selector from prointvar.utils import lazy_file_remover from prointvar.utils import constrain_column_types +from prointvar.utils import exclude_columns from prointvar.library import hbplus_types from prointvar.config import config @@ -89,13 +90,8 @@ def fix_res_id(data, key): table.INSCODE_D[table.INSCODE_D == "-"] = "?" table.INSCODE_A[table.INSCODE_A == "-"] = "?" - if excluded is not None: - assert type(excluded) is tuple - try: - table = table.drop(list(excluded), axis=1) - except ValueError: - # most likely theses are not in there - pass + # excluding columns + table = exclude_columns(table, excluded=excluded) # enforce some specific column types table = constrain_column_types(table, hbplus_types) diff --git a/prointvar/msas.py b/prointvar/msas.py index d0bc0d0..5521b05 100644 --- a/prointvar/msas.py +++ b/prointvar/msas.py @@ -21,6 +21,7 @@ from prointvar.fetchers import fetch_uniprot_id_from_name from prointvar.utils import constrain_column_types +from prointvar.utils import exclude_columns logger = logging.getLogger("prointvar") @@ -83,13 +84,7 @@ def parse_msa_sequences_from_file(inputfile, excluded=(), get_uniprot_id=False, table = pd.DataFrame(rows) # excluding columns - if excluded is not None: - assert type(excluded) is tuple - try: - table = table.drop(list(excluded), axis=1) - except ValueError: - # most likely theses are not in there - pass + table = exclude_columns(table, excluded=excluded) # enforce some specific column types msa_types = {key: str for key in list(table) if key != 'Start' and key != 'End'} diff --git a/prointvar/pdbx.py b/prointvar/pdbx.py index 02e0b65..af5a580 100644 --- a/prointvar/pdbx.py +++ b/prointvar/pdbx.py @@ -26,6 +26,7 @@ from prointvar.utils import get_new_pro_ids from prointvar.utils import check_sequence from prointvar.utils import constrain_column_types +from prointvar.utils import exclude_columns from prointvar.library import mmcif_types from prointvar.library import aa_default_atoms from prointvar.library import aa_codes_3to1_extended @@ -95,13 +96,7 @@ def parse_mmcif_atoms_from_file(inputfile, excluded=(), add_res_full=True, keep_default_na=False) # excluding columns - if excluded is not None: - assert type(excluded) is tuple - try: - table = table.drop(list(excluded), axis=1) - except ValueError: - # most likely theses are not in there - pass + table = exclude_columns(table, excluded=excluded) # if only first model (>1 in NMR structures) if first_model: @@ -221,13 +216,7 @@ def parse_pdb_atoms_from_file(inputfile, excluded=(), add_contacts=False, compression=None, converters=all_str, keep_default_na=False) # excluding columns - if excluded is not None: - assert type(excluded) is tuple - try: - table = table.drop(list(excluded), axis=1) - except ValueError: - # most likely theses are not in there - pass + table = exclude_columns(table, excluded=excluded) # if only first model (>1 in NMR structures) if first_model: diff --git a/prointvar/stamp.py b/prointvar/stamp.py index 4a58432..d00b1f0 100644 --- a/prointvar/stamp.py +++ b/prointvar/stamp.py @@ -23,6 +23,7 @@ from prointvar.pdbx import PDBXwriter from prointvar.utils import constrain_column_types +from prointvar.utils import exclude_columns from prointvar.library import stamp_types from prointvar.config import config @@ -229,13 +230,7 @@ def parse_stamp_scan_scores_from_file(inputfile, excluded=()): keep_default_na=False) # excluding columns - if excluded is not None: - assert type(excluded) is tuple - try: - table = table.drop(list(excluded), axis=1) - except ValueError: - # most likely theses are not in there - pass + table = exclude_columns(table, excluded=excluded) # enforce some specific column types table = constrain_column_types(table, stamp_types) diff --git a/prointvar/variants.py b/prointvar/variants.py index 2440d22..f265ccd 100644 --- a/prointvar/variants.py +++ b/prointvar/variants.py @@ -29,6 +29,7 @@ from prointvar.utils import flatten_nested_structure from prointvar.utils import refactor_key_val_singletons from prointvar.utils import constrain_column_types +from prointvar.utils import exclude_columns from prointvar.library import uni_ens_var_types from prointvar.library import update_ensembl_to_uniprot @@ -59,13 +60,8 @@ def flatten_uniprot_variants_ebi(data, excluded=()): table = pd.DataFrame(var_rows) - if excluded is not None: - assert type(excluded) is tuple - try: - table = table.drop(list(excluded), axis=1) - except ValueError: - # most likely theses are not in there - pass + # excluding columns + table = exclude_columns(table, excluded=excluded) # enforce some specific column types table = constrain_column_types(table, uni_ens_var_types) @@ -102,13 +98,8 @@ def flatten_ensembl_variants(data, excluded=(), synonymous=True): # rename columns table.rename(columns=update_ensembl_to_uniprot, inplace=True) - if excluded is not None: - assert type(excluded) is tuple - try: - table = table.drop(list(excluded), axis=1) - except ValueError: - # most likely theses are not in there - pass + # excluding columns + table = exclude_columns(table, excluded=excluded) # enforce some specific column types table = constrain_column_types(table, uni_ens_var_types)