Skip to content

Commit 0f0502a

Browse files
committed
[nomenclature._GPCRdbDataFrame2conlabs] recover GAIN labels even if scheme is not display_generic_number, tests
1 parent 897e1ae commit 0f0502a

File tree

4 files changed

+35
-2
lines changed

4 files changed

+35
-2
lines changed

mdciao/filenames/filenames.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ def __init__(self):
101101
self.adrb2_human_xlsx = _path.join(self.nomenclature_path,"adrb2_human.xlsx")
102102
self.nomenclature_bib = _path.join(self.nomenclature_path,"nomenclature.bib")
103103
self.KLIFS_P31751_xlsx = _path.join(self.nomenclature_path, "KLIFS_P31751.xlsx")
104+
self.agrg1_human_xlsx = _path.join(self.nomenclature_path, "agrg1_human.xlsx")
104105

105106
#json
106107
self.GDP_json = _path.join(self.json_path,"GDP.json")

mdciao/nomenclature/nomenclature.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,14 @@ def _GPCRdbDataFrame2conlabs(tablefile,
106106
assert scheme in df.keys(), ValueError("'%s' isn't an available scheme.\nAvailable schemes are %s" % (
107107
scheme, [key for key in df.keys() if key in _GPCR_available_schemes + ["display_generic_number"]]))
108108
AA2conlab = {key: str(val) for key, val in df[["AAresSeq", scheme]].values}
109+
dgn_other = df[df[scheme].isna() & ~df["display_generic_number"].isna()][["AAresSeq", "display_generic_number"]]
110+
if len(dgn_other)>0:
111+
AA2dgn = {key: str(val) for key, val in df[["AAresSeq", "display_generic_number"]].values}
112+
for ii, key in enumerate(AA2conlab.keys()):
113+
if str(AA2conlab[key]) == "None" and AA2dgn.get(key,None) is not None:
114+
AA2conlab[key]=AA2dgn[key]
115+
#print(f"Some labels of the 'display_generic_number' column would be lost by choosing scheme='{scheme}'.")
116+
109117
# Locate definition lines and use their indices
110118
fragments = _defdict(list)
111119

@@ -1246,7 +1254,7 @@ def __init__(self, UniProt_name,
12461254
including the extension "xslx", then the lookup will
12471255
fail. This what the `format` parameter is for
12481256
write_to_disk : bool, default is False
1249-
Save an excel file with the nomenclature
1257+
Save an Excel file with the nomenclature
12501258
information
12511259
"""
12521260

@@ -1258,7 +1266,8 @@ def __init__(self, UniProt_name,
12581266
write_to_disk=write_to_disk
12591267
)
12601268
# Re-introduce the "." in the GPS label
1261-
self._dataframe = self._dataframe.replace("B.GPS-2","B.GPS.-2").replace("B.GPS-1","B.GPS.-1").replace("B.GPS+1","B.GPS.+1")
1269+
self._dataframe = self._dataframe.replace("B.GPS-2", "B.GPS.-2").replace("B.GPS-1", "B.GPS.-1").replace("B.GPS+1", "B.GPS.+1")
1270+
12621271
# Check for GPS in the middle of S14
12631272
if "B.S14" in self.dataframe.protein_segment.values:
12641273
assert self.dataframe[self.dataframe.protein_segment == "B.S14"].index.diff().fillna(1).unique() == 1, (
35.1 KB
Binary file not shown.

tests/test_nomenclature.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,29 @@ def test_wont_fail_if_found_online_and_write_to_disk(self):
156156
_np.testing.assert_array_equal(list(df.keys())[:3], nomenclature._GPCR_mandatory_fields)
157157
assert any([key in df.keys() for key in nomenclature._GPCR_mandatory_fields]) # at least one scheme
158158

159+
class Test_GPCRdbDataFrame2conlabs_aGPCR(unittest.TestCase):
160+
def setUp(self):
161+
self.file = test_filenames.agrg1_human_xlsx
162+
163+
def test_works(self):
164+
pass
165+
CL = nomenclature.LabelerGPCRdb(self.file)
166+
167+
def test_works_scheme_BW(self):
168+
CL_generic = nomenclature.LabelerGPCRdb(self.file)
169+
CL_BW = nomenclature.LabelerGPCRdb(self.file, scheme="BW")
170+
self.assertDictEqual(CL_generic.fragments, CL_BW.fragments)
171+
172+
#self.assertDictEqual(CL_generic.AA2conlab, CL_BW.AA2conlab)
173+
# This assertion will fail because some residues of the TM bundle have different labels
174+
# on display_generic_number and BW, e.g 1.32x32 vs 1.28 on D396
175+
# protein_segment AAresSeq display_generic_number BW Wootten Pin Wang Fungal GPCRdb(A) GPCRdb(B) GPCRdb(C) GPCRdb(F) GPCRdb(D) Oliveira BS
176+
# TM1 D396 1.32x32 1.28 1.32 1.32 1.25 1.30 1.28x28 1.32x32 1.32x32 1.25x25 1.30x30 108 I:-05
177+
# We thus only test the GAIN domain
178+
generic_GAIN_conlab = {key : val for key, val in CL_generic.AA2conlab.items() if key[0] in ["A","B"]}
179+
BW_GAIN_conlab = {key: val for key, val in CL_generic.AA2conlab.items() if key[0] in ["A", "B"]}
180+
self.assertDictEqual(generic_GAIN_conlab, BW_GAIN_conlab)
181+
159182

160183
class Test_GPCRdbDataFrame2conlabs(unittest.TestCase):
161184
def setUp(self):

0 commit comments

Comments
 (0)