Refactored the methods to use the new helper method 'exclude_columns' .

bartongroup · Sep 19, 2017 · 7e4f556 · 7e4f556
1 parent c29be7d
commit 7e4f556
Show file tree

Hide file tree

Showing 7 changed files with 23 additions and 71 deletions.
diff --git a/prointvar/arpeggio.py b/prointvar/arpeggio.py
@@ -40,6 +40,7 @@
 from prointvar.utils import row_selector
 from prointvar.utils import string_split
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import arpeggio_types
 from prointvar.library import arpeggio_col_renames
 
@@ -98,13 +99,8 @@ def parse_arpeggio_from_file(inputfile, excluded=(), add_res_split=True,
         table = add_special_cont_types(inputfile, table)
         logger.info("Parsed special contact-types...")
 
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, arpeggio_types)
@@ -179,14 +175,8 @@ def parse_arpeggio_spec_from_file(inputfile, excluded=(), add_res_split=True,
     if add_res_split:
         table = add_arpeggio_res_split(table)
 
-    if excluded is not None:
-        excluded = tuple([k for k in excluded if k in header])
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     if table.empty:
         raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile))

diff --git a/prointvar/dssp.py b/prointvar/dssp.py
@@ -25,6 +25,7 @@
 from prointvar.utils import row_selector
 from prointvar.utils import lazy_file_remover
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import dssp_types
 
 from prointvar.config import config
@@ -174,13 +175,8 @@ def parse_dssp_from_file(inputfile, excluded=(), add_full_chain=True, add_ss_red
         table['LINE'] = table.index + 1
         logger.info("DSSP reset residue number...")
 
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, dssp_types)

diff --git a/prointvar/hbplus.py b/prointvar/hbplus.py
@@ -20,6 +20,7 @@
 from prointvar.utils import row_selector
 from prointvar.utils import lazy_file_remover
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import hbplus_types
 
 from prointvar.config import config
@@ -89,13 +90,8 @@ def fix_res_id(data, key):
     table.INSCODE_D[table.INSCODE_D == "-"] = "?"
     table.INSCODE_A[table.INSCODE_A == "-"] = "?"
 
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, hbplus_types)

diff --git a/prointvar/msas.py b/prointvar/msas.py
@@ -21,6 +21,7 @@
 
 from prointvar.fetchers import fetch_uniprot_id_from_name
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 
 logger = logging.getLogger("prointvar")
 
@@ -83,13 +84,7 @@ def parse_msa_sequences_from_file(inputfile, excluded=(), get_uniprot_id=False,
     table = pd.DataFrame(rows)
 
     # excluding columns
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     msa_types = {key: str for key in list(table) if key != 'Start' and key != 'End'}

diff --git a/prointvar/pdbx.py b/prointvar/pdbx.py
@@ -26,6 +26,7 @@
 from prointvar.utils import get_new_pro_ids
 from prointvar.utils import check_sequence
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import mmcif_types
 from prointvar.library import aa_default_atoms
 from prointvar.library import aa_codes_3to1_extended
@@ -95,13 +96,7 @@ def parse_mmcif_atoms_from_file(inputfile, excluded=(), add_res_full=True,
                           keep_default_na=False)
 
     # excluding columns
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    table = exclude_columns(table, excluded=excluded)
 
     # if only first model (>1 in NMR structures)
     if first_model:
@@ -221,13 +216,7 @@ def parse_pdb_atoms_from_file(inputfile, excluded=(), add_contacts=False,
                         compression=None, converters=all_str, keep_default_na=False)
 
     # excluding columns
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    table = exclude_columns(table, excluded=excluded)
 
     # if only first model (>1 in NMR structures)
     if first_model:

diff --git a/prointvar/stamp.py b/prointvar/stamp.py
@@ -23,6 +23,7 @@
 from prointvar.pdbx import PDBXwriter
 
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import stamp_types
 
 from prointvar.config import config
@@ -229,13 +230,7 @@ def parse_stamp_scan_scores_from_file(inputfile, excluded=()):
                           keep_default_na=False)
 
     # excluding columns
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, stamp_types)

diff --git a/prointvar/variants.py b/prointvar/variants.py
@@ -29,6 +29,7 @@
 from prointvar.utils import flatten_nested_structure
 from prointvar.utils import refactor_key_val_singletons
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import uni_ens_var_types
 from prointvar.library import update_ensembl_to_uniprot
 
@@ -59,13 +60,8 @@ def flatten_uniprot_variants_ebi(data, excluded=()):
 
     table = pd.DataFrame(var_rows)
 
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, uni_ens_var_types)
@@ -102,13 +98,8 @@ def flatten_ensembl_variants(data, excluded=(), synonymous=True):
     # rename columns
     table.rename(columns=update_ensembl_to_uniprot, inplace=True)
 
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, uni_ens_var_types)