Skip to content

Commit

Permalink
Refactored the methods to use the new helper method 'exclude_columns' .
Browse files Browse the repository at this point in the history
  • Loading branch information
biomadeira committed Sep 19, 2017
1 parent c29be7d commit 7e4f556
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 71 deletions.
20 changes: 5 additions & 15 deletions prointvar/arpeggio.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from prointvar.utils import row_selector
from prointvar.utils import string_split
from prointvar.utils import constrain_column_types
from prointvar.utils import exclude_columns
from prointvar.library import arpeggio_types
from prointvar.library import arpeggio_col_renames

Expand Down Expand Up @@ -98,13 +99,8 @@ def parse_arpeggio_from_file(inputfile, excluded=(), add_res_split=True,
table = add_special_cont_types(inputfile, table)
logger.info("Parsed special contact-types...")

if excluded is not None:
assert type(excluded) is tuple
try:
table = table.drop(list(excluded), axis=1)
except ValueError:
# most likely theses are not in there
pass
# excluding columns
table = exclude_columns(table, excluded=excluded)

# enforce some specific column types
table = constrain_column_types(table, arpeggio_types)
Expand Down Expand Up @@ -179,14 +175,8 @@ def parse_arpeggio_spec_from_file(inputfile, excluded=(), add_res_split=True,
if add_res_split:
table = add_arpeggio_res_split(table)

if excluded is not None:
excluded = tuple([k for k in excluded if k in header])
assert type(excluded) is tuple
try:
table = table.drop(list(excluded), axis=1)
except ValueError:
# most likely theses are not in there
pass
# excluding columns
table = exclude_columns(table, excluded=excluded)

if table.empty:
raise ValueError('{} resulted in an empty DataFrame...'.format(inputfile))
Expand Down
10 changes: 3 additions & 7 deletions prointvar/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from prointvar.utils import row_selector
from prointvar.utils import lazy_file_remover
from prointvar.utils import constrain_column_types
from prointvar.utils import exclude_columns
from prointvar.library import dssp_types

from prointvar.config import config
Expand Down Expand Up @@ -174,13 +175,8 @@ def parse_dssp_from_file(inputfile, excluded=(), add_full_chain=True, add_ss_red
table['LINE'] = table.index + 1
logger.info("DSSP reset residue number...")

if excluded is not None:
assert type(excluded) is tuple
try:
table = table.drop(list(excluded), axis=1)
except ValueError:
# most likely theses are not in there
pass
# excluding columns
table = exclude_columns(table, excluded=excluded)

# enforce some specific column types
table = constrain_column_types(table, dssp_types)
Expand Down
10 changes: 3 additions & 7 deletions prointvar/hbplus.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from prointvar.utils import row_selector
from prointvar.utils import lazy_file_remover
from prointvar.utils import constrain_column_types
from prointvar.utils import exclude_columns
from prointvar.library import hbplus_types

from prointvar.config import config
Expand Down Expand Up @@ -89,13 +90,8 @@ def fix_res_id(data, key):
table.INSCODE_D[table.INSCODE_D == "-"] = "?"
table.INSCODE_A[table.INSCODE_A == "-"] = "?"

if excluded is not None:
assert type(excluded) is tuple
try:
table = table.drop(list(excluded), axis=1)
except ValueError:
# most likely theses are not in there
pass
# excluding columns
table = exclude_columns(table, excluded=excluded)

# enforce some specific column types
table = constrain_column_types(table, hbplus_types)
Expand Down
9 changes: 2 additions & 7 deletions prointvar/msas.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from prointvar.fetchers import fetch_uniprot_id_from_name
from prointvar.utils import constrain_column_types
from prointvar.utils import exclude_columns

logger = logging.getLogger("prointvar")

Expand Down Expand Up @@ -83,13 +84,7 @@ def parse_msa_sequences_from_file(inputfile, excluded=(), get_uniprot_id=False,
table = pd.DataFrame(rows)

# excluding columns
if excluded is not None:
assert type(excluded) is tuple
try:
table = table.drop(list(excluded), axis=1)
except ValueError:
# most likely theses are not in there
pass
table = exclude_columns(table, excluded=excluded)

# enforce some specific column types
msa_types = {key: str for key in list(table) if key != 'Start' and key != 'End'}
Expand Down
17 changes: 3 additions & 14 deletions prointvar/pdbx.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from prointvar.utils import get_new_pro_ids
from prointvar.utils import check_sequence
from prointvar.utils import constrain_column_types
from prointvar.utils import exclude_columns
from prointvar.library import mmcif_types
from prointvar.library import aa_default_atoms
from prointvar.library import aa_codes_3to1_extended
Expand Down Expand Up @@ -95,13 +96,7 @@ def parse_mmcif_atoms_from_file(inputfile, excluded=(), add_res_full=True,
keep_default_na=False)

# excluding columns
if excluded is not None:
assert type(excluded) is tuple
try:
table = table.drop(list(excluded), axis=1)
except ValueError:
# most likely theses are not in there
pass
table = exclude_columns(table, excluded=excluded)

# if only first model (>1 in NMR structures)
if first_model:
Expand Down Expand Up @@ -221,13 +216,7 @@ def parse_pdb_atoms_from_file(inputfile, excluded=(), add_contacts=False,
compression=None, converters=all_str, keep_default_na=False)

# excluding columns
if excluded is not None:
assert type(excluded) is tuple
try:
table = table.drop(list(excluded), axis=1)
except ValueError:
# most likely theses are not in there
pass
table = exclude_columns(table, excluded=excluded)

# if only first model (>1 in NMR structures)
if first_model:
Expand Down
9 changes: 2 additions & 7 deletions prointvar/stamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from prointvar.pdbx import PDBXwriter

from prointvar.utils import constrain_column_types
from prointvar.utils import exclude_columns
from prointvar.library import stamp_types

from prointvar.config import config
Expand Down Expand Up @@ -229,13 +230,7 @@ def parse_stamp_scan_scores_from_file(inputfile, excluded=()):
keep_default_na=False)

# excluding columns
if excluded is not None:
assert type(excluded) is tuple
try:
table = table.drop(list(excluded), axis=1)
except ValueError:
# most likely theses are not in there
pass
table = exclude_columns(table, excluded=excluded)

# enforce some specific column types
table = constrain_column_types(table, stamp_types)
Expand Down
19 changes: 5 additions & 14 deletions prointvar/variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from prointvar.utils import flatten_nested_structure
from prointvar.utils import refactor_key_val_singletons
from prointvar.utils import constrain_column_types
from prointvar.utils import exclude_columns
from prointvar.library import uni_ens_var_types
from prointvar.library import update_ensembl_to_uniprot

Expand Down Expand Up @@ -59,13 +60,8 @@ def flatten_uniprot_variants_ebi(data, excluded=()):

table = pd.DataFrame(var_rows)

if excluded is not None:
assert type(excluded) is tuple
try:
table = table.drop(list(excluded), axis=1)
except ValueError:
# most likely theses are not in there
pass
# excluding columns
table = exclude_columns(table, excluded=excluded)

# enforce some specific column types
table = constrain_column_types(table, uni_ens_var_types)
Expand Down Expand Up @@ -102,13 +98,8 @@ def flatten_ensembl_variants(data, excluded=(), synonymous=True):
# rename columns
table.rename(columns=update_ensembl_to_uniprot, inplace=True)

if excluded is not None:
assert type(excluded) is tuple
try:
table = table.drop(list(excluded), axis=1)
except ValueError:
# most likely theses are not in there
pass
# excluding columns
table = exclude_columns(table, excluded=excluded)

# enforce some specific column types
table = constrain_column_types(table, uni_ens_var_types)
Expand Down

0 comments on commit 7e4f556

Please sign in to comment.