Skip to content

Commit

Permalink
Simplified the parse_pdb_atoms function fix methods. Also imrproved…
Browse files Browse the repository at this point in the history
… their corresponding tests.
  • Loading branch information
biomadeira committed Oct 4, 2017
1 parent 9e03152 commit 259f03b
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 50 deletions.
37 changes: 16 additions & 21 deletions prointvar/pdbx.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ def parse_pdb_atoms_from_file(inputfile, excluded=(), add_contacts=False,
dist=5, first_model=True, add_atom_altloc=False,
remove_altloc=False, remove_hydrogens=True,
reset_atom_id=True, add_new_pro_id=False,
remove_partial_res=True):
remove_partial_res=True, fix_label_alt_id=True,
fix_ins_code=True, fix_type_symbol=True):
"""
Parse PDB ATOM and HETATM lines.
Expand All @@ -166,6 +167,9 @@ def parse_pdb_atoms_from_file(inputfile, excluded=(), add_contacts=False,
:param reset_atom_id: boolean
:param add_new_pro_id: (boolean) used for chain_id mapping
:param remove_partial_res: (boolean) removes amino acids with missing atoms
:param fix_label_alt_id: boolean
:param fix_ins_code: boolean
:param fix_type_symbol: boolean
:return: returns a pandas DataFrame
"""

Expand Down Expand Up @@ -222,11 +226,14 @@ def parse_pdb_atoms_from_file(inputfile, excluded=(), add_contacts=False,
table = row_selector(table, key='pdbx_PDB_model_num', value='first')

# fixes the 'pdbx_PDB_ins_code'
table = fix_pdb_ins_code(table)
if fix_ins_code:
table = pdb_fix_pdb_ins_code(table)
# fixes the 'label_alt_id
table = fix_label_alt_id(table)
if fix_label_alt_id:
table = pdb_fix_label_alt_id(table)
# fixes 'type_symbol' if missing
table = fix_type_symbol(table)
if fix_type_symbol:
table = pdb_fix_type_symbol(table)

# table modular extensions
if add_contacts:
Expand Down Expand Up @@ -541,7 +548,7 @@ def add_mmcif_res_full(data):
return table


def fix_pdb_ins_code(data):
def pdb_fix_pdb_ins_code(data):
"""
Utility that fixes the 'pdbx_PDB_ins_code' column to match is expected
in the mmcif format.
Expand All @@ -551,18 +558,12 @@ def fix_pdb_ins_code(data):
"""

table = data
ins_codes = []
for i in table.index:
value = table.loc[i, "pdbx_PDB_ins_code"]
if value == '' or value == ' ' or value == '?':
value = '?'
ins_codes.append(value)
table["pdbx_PDB_ins_code"] = ins_codes
table['pdbx_PDB_ins_code'] = table['pdbx_PDB_ins_code'].str.replace('| ', '?')
table['pdbx_PDB_ins_code'] = table['pdbx_PDB_ins_code'].fillna('?').astype(str)
return table


def fix_label_alt_id(data):
def pdb_fix_label_alt_id(data):
"""
Utility that fixes the 'label_alt_id' column to match what is
expected in the mmCIF format.
Expand All @@ -572,18 +573,12 @@ def fix_label_alt_id(data):
"""

table = data
alt_locs = []
for i in table.index:
value = table.loc[i, "label_alt_id"]
if value == '' or value == ' ' or value == '?':
value = '.'
alt_locs.append(value)
table["label_alt_id"] = alt_locs
table['label_alt_id'] = table['label_alt_id'].str.replace('""|" "|"?"', '.')
table['label_alt_id'] = table['label_alt_id'].fillna('.').astype(str)
return table


def fix_type_symbol(data):
def pdb_fix_type_symbol(data):
"""
Utility that fixes the 'type_symbol' column to match what is
expected in the mmCIF format - when missing in the Structure.
Expand Down
24 changes: 14 additions & 10 deletions tests/test_pdbx.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
add_mmcif_atom_altloc, residues_aggregation,
remove_multiple_altlocs, add_mmcif_new_pro_ids,
remove_partial_residues, get_coordinates, get_sequence,
fix_label_alt_id, fix_pdb_ins_code, fix_type_symbol,
pdb_fix_label_alt_id, pdb_fix_pdb_ins_code, pdb_fix_type_symbol,
get_real_residue_ranges)

from prointvar.config import config as c
Expand Down Expand Up @@ -72,9 +72,9 @@ def setUp(self):
self.remove_altloc = remove_multiple_altlocs
self.add_mmcif_new_pro_ids = add_mmcif_new_pro_ids
self.remove_partial_residues = remove_partial_residues
self.fix_label_alt_id = fix_label_alt_id
self.fix_pdb_ins_code = fix_pdb_ins_code
self.fix_type_symbol = fix_type_symbol
self.fix_label_alt_id = pdb_fix_label_alt_id
self.fix_pdb_ins_code = pdb_fix_pdb_ins_code
self.fix_type_symbol = pdb_fix_type_symbol
self.get_coordinates = get_coordinates
self.get_sequence = get_sequence
self.get_real_residue_ranges = get_real_residue_ranges
Expand Down Expand Up @@ -569,23 +569,27 @@ def test_remove_partial_res(self):
self.assertEqual(8, list(data.label_seq_id.tolist()).count('25'))

def test_fix_label_alt_id(self):
reader = self.reader(self.inputpdb2)
data = reader.atoms(format_type='pdb')
data = self.parser_pdb(self.inputpdb2, fix_label_alt_id=False)
self.assertEqual(1, data.loc[0, 'id'])
self.assertEqual('N', data.loc[0, 'label_atom_id'])
self.assertEqual('A', data.loc[0, 'label_alt_id'])
data = self.fix_label_alt_id(data)
self.assertEqual(1, data.loc[0, 'id'])
self.assertEqual('N', data.loc[0, 'label_atom_id'])
self.assertEqual('A', data.loc[0, 'label_alt_id'])

def test_fix_pdb_ins_code(self):
reader = self.reader(self.inputpdb2)
data = reader.atoms(format_type='pdb')
data = self.parser_pdb(self.inputpdb2, fix_ins_code=False)
self.assertEqual(1, data.loc[0, 'id'])
self.assertEqual('', data.loc[0, 'pdbx_PDB_ins_code'])
data = self.fix_pdb_ins_code(data)
self.assertEqual(1, data.loc[0, 'id'])
self.assertEqual('?', data.loc[0, 'pdbx_PDB_ins_code'])

def test_fix_type_symbol(self):
reader = self.reader(self.inputpdb2)
data = reader.atoms(format_type='pdb')
data = self.parser_pdb(self.inputpdb2, fix_type_symbol=False)
self.assertEqual(1, data.loc[0, 'id'])
self.assertEqual('', data.loc[0, 'type_symbol'])
data = self.fix_type_symbol(data)
self.assertEqual(1, data.loc[0, 'id'])
self.assertEqual('N', data.loc[0, 'type_symbol'])
Expand Down
38 changes: 19 additions & 19 deletions tests/testdata/pdbx/1ejg.pdb
Original file line number Diff line number Diff line change
Expand Up @@ -273,25 +273,25 @@ ORIGX3 0.000000 0.000000 1.000000 0.00000
SCALE1 0.024495 0.000000 0.000201 0.00000
SCALE2 0.000000 0.054060 0.000000 0.00000
SCALE3 0.000000 0.000000 0.044702 0.00000
ATOM 1 N ATHR A 1 16.885 14.078 3.427 0.82 4.48 N
ANISOU 1 N ATHR A 1 434 531 735 201 133 -28 N
ATOM 2 N BTHR A 1 17.553 14.234 4.214 0.18 5.51 N
ATOM 3 CA ATHR A 1 16.938 12.834 4.234 0.82 3.12 C
ANISOU 3 CA ATHR A 1 305 437 441 39 103 -65 C
ATOM 4 CA BTHR A 1 16.985 12.901 4.228 0.18 16.71 C
ATOM 5 C THR A 1 15.642 12.760 4.985 1.00 3.28 C
ANISOU 5 C THR A 1 338 431 477 -32 137 -43 C
ATOM 6 O THR A 1 15.150 13.818 5.439 1.00 5.91 O
ANISOU 6 O THR A 1 476 455 1311 -213 289 -42 O
ATOM 7 CB ATHR A 1 18.103 12.875 5.202 0.82 4.05 C
ANISOU 7 CB ATHR A 1 249 662 627 23 60 -110 C
ATOM 8 CB BTHR A 1 17.983 11.950 4.917 0.18 8.95 C
ATOM 9 OG1ATHR A 1 19.256 13.004 4.401 0.82 5.75 O
ANISOU 9 OG1ATHR A 1 294 1049 842 134 158 -125 O
ATOM 10 OG1BTHR A 1 17.792 10.543 4.939 0.18 9.94 O
ATOM 11 CG2ATHR A 1 18.173 11.582 6.055 0.82 5.30 C
ANISOU 11 CG2ATHR A 1 343 850 818 215 -11 -32 C
ATOM 12 CG2BTHR A 1 18.117 12.174 6.499 0.18 6.78 C
ATOM 1 N ATHR A 1 16.885 14.078 3.427 0.82 4.48
ANISOU 1 N ATHR A 1 434 531 735 201 133 -28
ATOM 2 N BTHR A 1 17.553 14.234 4.214 0.18 5.51
ATOM 3 CA ATHR A 1 16.938 12.834 4.234 0.82 3.12
ANISOU 3 CA ATHR A 1 305 437 441 39 103 -65
ATOM 4 CA BTHR A 1 16.985 12.901 4.228 0.18 16.71
ATOM 5 C THR A 1 15.642 12.760 4.985 1.00 3.28
ANISOU 5 C THR A 1 338 431 477 -32 137 -43
ATOM 6 O THR A 1 15.150 13.818 5.439 1.00 5.91
ANISOU 6 O THR A 1 476 455 1311 -213 289 -42
ATOM 7 CB ATHR A 1 18.103 12.875 5.202 0.82 4.05
ANISOU 7 CB ATHR A 1 249 662 627 23 60 -110
ATOM 8 CB BTHR A 1 17.983 11.950 4.917 0.18 8.95
ATOM 9 OG1ATHR A 1 19.256 13.004 4.401 0.82 5.75
ANISOU 9 OG1ATHR A 1 294 1049 842 134 158 -125
ATOM 10 OG1BTHR A 1 17.792 10.543 4.939 0.18 9.94
ATOM 11 CG2ATHR A 1 18.173 11.582 6.055 0.82 5.30
ANISOU 11 CG2ATHR A 1 343 850 818 215 -11 -32
ATOM 12 CG2BTHR A 1 18.117 12.174 6.499 0.18 6.78
ATOM 13 H1 ATHR A 1 17.705 14.139 2.840 0.82 6.68 H
ATOM 14 H1 BTHR A 1 18.510 14.188 3.898 0.18 7.89 H
ATOM 15 H2 ATHR A 1 16.879 14.894 4.049 0.82 6.68 H
Expand Down

0 comments on commit 259f03b

Please sign in to comment.