From 7e4f556ffdc8580daea99bf971d945f33c36a210 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fa=CC=81bio=20Madeira?= <fbiomadeira@gmail.com>
Date: Tue, 19 Sep 2017 15:24:15 +0100
Subject: [PATCH] Refactored the methods to use the new helper method
 'exclude_columns' .

---
 prointvar/arpeggio.py | 20 +++++---------------
 prointvar/dssp.py     | 10 +++-------
 prointvar/hbplus.py   | 10 +++-------
 prointvar/msas.py     |  9 ++-------
 prointvar/pdbx.py     | 17 +++--------------
 prointvar/stamp.py    |  9 ++-------
 prointvar/variants.py | 19 +++++--------------
 7 files changed, 23 insertions(+), 71 deletions(-)

diff --git a/prointvar/arpeggio.py b/prointvar/arpeggio.py
index cd77db6..8575cad 100644
--- a/prointvar/arpeggio.py
+++ b/prointvar/arpeggio.py
@@ -40,6 +40,7 @@
 from prointvar.utils import row_selector
 from prointvar.utils import string_split
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import arpeggio_types
 from prointvar.library import arpeggio_col_renames
 
@@ -98,13 +99,8 @@ def parse_arpeggio_from_file(inputfile, excluded=(), add_res_split=True,
         table = add_special_cont_types(inputfile, table)
         logger.info("Parsed special contact-types...")
 
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, arpeggio_types)
@@ -179,14 +175,8 @@ def parse_arpeggio_spec_from_file(inputfile, excluded=(), add_res_split=True,
     if add_res_split:
         table = add_arpeggio_res_split(table)
 
-    if excluded is not None:
-        excluded = tuple([k for k in excluded if k in header])
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     if table.empty:
         raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile))
diff --git a/prointvar/dssp.py b/prointvar/dssp.py
index eaf8e9f..ace42ea 100644
--- a/prointvar/dssp.py
+++ b/prointvar/dssp.py
@@ -25,6 +25,7 @@
 from prointvar.utils import row_selector
 from prointvar.utils import lazy_file_remover
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import dssp_types
 
 from prointvar.config import config
@@ -174,13 +175,8 @@ def parse_dssp_from_file(inputfile, excluded=(), add_full_chain=True, add_ss_red
         table['LINE'] = table.index + 1
         logger.info("DSSP reset residue number...")
 
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, dssp_types)
diff --git a/prointvar/hbplus.py b/prointvar/hbplus.py
index ab38a3c..f6a318a 100644
--- a/prointvar/hbplus.py
+++ b/prointvar/hbplus.py
@@ -20,6 +20,7 @@
 from prointvar.utils import row_selector
 from prointvar.utils import lazy_file_remover
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import hbplus_types
 
 from prointvar.config import config
@@ -89,13 +90,8 @@ def fix_res_id(data, key):
     table.INSCODE_D[table.INSCODE_D == "-"] = "?"
     table.INSCODE_A[table.INSCODE_A == "-"] = "?"
 
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, hbplus_types)
diff --git a/prointvar/msas.py b/prointvar/msas.py
index d0bc0d0..5521b05 100644
--- a/prointvar/msas.py
+++ b/prointvar/msas.py
@@ -21,6 +21,7 @@
 
 from prointvar.fetchers import fetch_uniprot_id_from_name
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 
 logger = logging.getLogger("prointvar")
 
@@ -83,13 +84,7 @@ def parse_msa_sequences_from_file(inputfile, excluded=(), get_uniprot_id=False,
     table = pd.DataFrame(rows)
 
     # excluding columns
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     msa_types = {key: str for key in list(table) if key != 'Start' and key != 'End'}
diff --git a/prointvar/pdbx.py b/prointvar/pdbx.py
index 02e0b65..af5a580 100644
--- a/prointvar/pdbx.py
+++ b/prointvar/pdbx.py
@@ -26,6 +26,7 @@
 from prointvar.utils import get_new_pro_ids
 from prointvar.utils import check_sequence
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import mmcif_types
 from prointvar.library import aa_default_atoms
 from prointvar.library import aa_codes_3to1_extended
@@ -95,13 +96,7 @@ def parse_mmcif_atoms_from_file(inputfile, excluded=(), add_res_full=True,
                           keep_default_na=False)
 
     # excluding columns
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    table = exclude_columns(table, excluded=excluded)
 
     # if only first model (>1 in NMR structures)
     if first_model:
@@ -221,13 +216,7 @@ def parse_pdb_atoms_from_file(inputfile, excluded=(), add_contacts=False,
                         compression=None, converters=all_str, keep_default_na=False)
 
     # excluding columns
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    table = exclude_columns(table, excluded=excluded)
 
     # if only first model (>1 in NMR structures)
     if first_model:
diff --git a/prointvar/stamp.py b/prointvar/stamp.py
index 4a58432..d00b1f0 100644
--- a/prointvar/stamp.py
+++ b/prointvar/stamp.py
@@ -23,6 +23,7 @@
 from prointvar.pdbx import PDBXwriter
 
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import stamp_types
 
 from prointvar.config import config
@@ -229,13 +230,7 @@ def parse_stamp_scan_scores_from_file(inputfile, excluded=()):
                           keep_default_na=False)
 
     # excluding columns
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, stamp_types)
diff --git a/prointvar/variants.py b/prointvar/variants.py
index 2440d22..f265ccd 100644
--- a/prointvar/variants.py
+++ b/prointvar/variants.py
@@ -29,6 +29,7 @@
 from prointvar.utils import flatten_nested_structure
 from prointvar.utils import refactor_key_val_singletons
 from prointvar.utils import constrain_column_types
+from prointvar.utils import exclude_columns
 from prointvar.library import uni_ens_var_types
 from prointvar.library import update_ensembl_to_uniprot
 
@@ -59,13 +60,8 @@ def flatten_uniprot_variants_ebi(data, excluded=()):
 
     table = pd.DataFrame(var_rows)
 
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, uni_ens_var_types)
@@ -102,13 +98,8 @@ def flatten_ensembl_variants(data, excluded=(), synonymous=True):
     # rename columns
     table.rename(columns=update_ensembl_to_uniprot, inplace=True)
 
-    if excluded is not None:
-        assert type(excluded) is tuple
-        try:
-            table = table.drop(list(excluded), axis=1)
-        except ValueError:
-            # most likely theses are not in there
-            pass
+    # excluding columns
+    table = exclude_columns(table, excluded=excluded)
 
     # enforce some specific column types
     table = constrain_column_types(table, uni_ens_var_types)