Skip to content

Commit 47d62b2

Browse files
Merge pull request #36 from dominiquesydow/collapse-envpartners
Collapse envpartners with same residue and superfeature
2 parents 8e9c5b6 + e4ca76f commit 47d62b2

File tree

8 files changed

+277
-215
lines changed

8 files changed

+277
-215
lines changed

docs/tutorials/explore_plots.ipynb

Lines changed: 12 additions & 199 deletions
Large diffs are not rendered by default.

dynophores/core/envpartner.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,18 @@ def __init__(
4242
self.occurrences = occurrences
4343
self.distances = distances
4444

45+
@property
46+
def residue_id(self):
47+
"""
48+
Get the residue's ID (residue name - residue number - chain).
49+
50+
Returns
51+
-------
52+
str
53+
Residue's ID.
54+
"""
55+
return f"{self.residue_name}-{self.residue_number}-{self.chain}"
56+
4557
@property
4658
def n_frames(self):
4759
"""

dynophores/core/superfeature.py

Lines changed: 139 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,39 @@ def envpartners_occurrences(self):
6262
-------
6363
pandas.DataFrame
6464
Occurrences (0=no, 1=yes) of an environmental partner (columns) in each frame (row).
65+
"""
66+
67+
return self._envpartners_occurrences(self._data(type="occurrences"))
68+
69+
@property
70+
def envpartners_occurrences_collapsed(self):
71+
"""
72+
Get the superfeature's environmental partners' occurrences per environmental partner and
73+
frame.
74+
If an environmental partner interacts multiple times with the same superfeature,
75+
aggregate them. This can happen if differen atoms of an environmental partner are involved
76+
in the same superfeature.
77+
78+
Returns
79+
-------
80+
pandas.DataFrame
81+
Occurrences (0=no, 1=yes) of an environmental partner (columns) in each frame (row).
82+
"""
83+
84+
return self._envpartners_occurrences(self._data_collapsed())
85+
86+
def _envpartners_occurrences(self, method_data):
87+
"""
88+
Get the superfeature's environmental partners' occurrences per environmental partner and
89+
frame.
6590
91+
Returns
92+
-------
93+
pandas.DataFrame
94+
Occurrences (0=no, 1=yes) of an environmental partner (columns) in each frame (row).
6695
"""
6796

68-
occurrences = self._data(type="occurrences").astype("int32")
97+
occurrences = method_data.astype("int32")
6998

7099
# Sort columns by superfeature occurrence
71100
sorted_columns = occurrences.sum().sort_values(ascending=False).index
@@ -118,10 +147,45 @@ def count(self):
118147
environmental partner as well as any environmental partner.
119148
"""
120149

150+
return self._count(self.envpartners_occurrences)
151+
152+
@property
153+
def count_collapsed(self):
154+
"""
155+
Get number of frames in which the superfeature occurs, including the superfeature's
156+
environmental partners occurrences (collapsed if they share the same residue!).
157+
158+
Returns
159+
-------
160+
pandas.Series
161+
Superfeature count: The Series shows interactions (yes/no) to each single
162+
environmental partner as well as any environmental partner.
163+
"""
164+
165+
return self._count(self.envpartners_occurrences_collapsed)
166+
167+
def _count(self, property_envpartners_occurrences):
168+
"""
169+
Count the occurrence of the superfeature's environmental partners.
170+
171+
Parameter
172+
---------
173+
property : property_envpartners_occurrences
174+
If you want un-collapsed environmental partners, use `self.envpartners_occurrences`.
175+
If you want collapsed environmental partners, use
176+
`self.envpartners_occurrences_collapsed`.
177+
178+
Returns
179+
-------
180+
pandas.Series
181+
Superfeature count: The Series shows interactions (yes/no) to each single
182+
environmental partner as well as any environmental partner.
183+
"""
184+
121185
superfeature_count = pd.Series(
122-
{"any": (self.envpartners_occurrences.sum(axis=1) != 0).sum()}
186+
{"any": (property_envpartners_occurrences.sum(axis=1) != 0).sum()}
123187
)
124-
envpartners_count = self.envpartners_occurrences.sum()
188+
envpartners_count = property_envpartners_occurrences.sum()
125189

126190
return superfeature_count.append(envpartners_count)
127191

@@ -138,7 +202,41 @@ def frequency(self):
138202
environmental partner as well as any environmental partner.
139203
"""
140204

141-
return self.count.apply(lambda x: round(x / self.n_frames * 100, 2))
205+
return self._frequency(self.count)
206+
207+
@property
208+
def frequency_collapsed(self):
209+
"""
210+
Get frequency of frames in which the superfeature occurs, including the superfeature's
211+
environmental partners occurrences (collapsed if they share the same residue!).
212+
213+
Returns
214+
-------
215+
pandas.Series
216+
Superfeature frequency: The Series shows interactions (yes/no) to each single
217+
environmental partner as well as any environmental partner.
218+
"""
219+
220+
return self._frequency(self.count_collapsed)
221+
222+
def _frequency(self, property_count):
223+
"""
224+
Get the frequency of the occurrence of the superfeature's environmental partners.
225+
226+
Parameter
227+
---------
228+
property : property_count
229+
If you want un-collapsed environmental partners, use `self.count`.
230+
If you want collapsed environmental partners, use `self.count_collapsed`.
231+
232+
Returns
233+
-------
234+
pandas.Series
235+
Superfeature frequency: The Series shows interactions (yes/no) to each single
236+
environmental partner as well as any environmental partner.
237+
"""
238+
239+
return property_count.apply(lambda x: round(x / self.n_frames * 100, 2))
142240

143241
def _data(self, type="occurrences"):
144242
"""
@@ -168,3 +266,40 @@ def _data(self, type="occurrences"):
168266
for envpartner_id, envpartner in self.envpartners.items()
169267
}
170268
)
269+
270+
def _data_collapsed(self):
271+
"""TODO"""
272+
273+
# List of environmental partner IDs (e.g. ILE-10-A[169,171,172])
274+
ids = self.envpartners_occurrences.columns
275+
# Unique list of residue IDs (e.g. ILE-10-A)
276+
residue_ids = [envpartner.residue_id for _, envpartner in self.envpartners.items()]
277+
residue_ids = list(set(residue_ids))
278+
279+
occurrences_dict = {}
280+
281+
# For each unique residue ID,
282+
# we want to aggregate data for all environmental partners that belong to the same residue
283+
for residue_id in residue_ids:
284+
285+
# Get all environmental partner IDs that belong to this residue
286+
ids_to_be_collapsed = [_id for _id in ids if _id.startswith(residue_id)]
287+
288+
# Merge all atom numbers
289+
atom_numbers = []
290+
for _id in ids_to_be_collapsed:
291+
atom_numbers.extend(self.envpartners[_id].atom_numbers)
292+
atom_numbers = sorted(list(set(atom_numbers)))
293+
id_collapsed = f"{residue_id}{atom_numbers}".replace(" ", "")
294+
295+
# Merge all occurrences
296+
occurrences = [self.envpartners[_id].occurrences for _id in ids_to_be_collapsed]
297+
occurrences = pd.DataFrame(occurrences)
298+
# If frame 1 in any environmental partner, set to 1 in collapsed environmental partner
299+
occurrences = occurrences.sum().apply(lambda x: 1 if x > 0 else 0)
300+
301+
occurrences_dict[id_collapsed] = occurrences
302+
303+
occurrences = pd.DataFrame(occurrences_dict, dtype="int32")
304+
305+
return occurrences

dynophores/tests/core/test_envpartner.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ def test_init_raises(self, envpartner_dict):
6868
with pytest.raises(ValueError):
6969
EnvPartner(**envpartner_dict)
7070

71+
@pytest.mark.parametrize("residue_id", ["ILE-10-A"])
72+
def test_residue_id(self, envpartner, residue_id):
73+
"""
74+
Test class property.
75+
"""
76+
77+
assert envpartner.residue_id == residue_id
78+
7179
@pytest.mark.parametrize("n_frames", [1002])
7280
def test_n_frames(self, envpartner, n_frames):
7381
"""

dynophores/tests/core/test_superfeature.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,21 @@ def test_envpartners_occurrences(self, superfeature):
6565
)
6666
assert data.dtypes.unique() == "int32"
6767

68+
@pytest.mark.parametrize(
69+
"envpartners_collapsed",
70+
[["ILE-10-A[169,171,172]", "PHE-82-A[1245,1246,1247,1248,1249,1250]"]],
71+
)
72+
def test_envpartners_occurrences_collapsed(self, superfeature, envpartners_collapsed):
73+
"""
74+
Test class property.
75+
"""
76+
77+
data = superfeature.envpartners_occurrences_collapsed
78+
assert isinstance(data, pd.DataFrame)
79+
assert data.index.to_list() == list(range(0, len(superfeature.occurrences)))
80+
assert sorted(data.columns.to_list()) == sorted(envpartners_collapsed)
81+
assert data.dtypes.unique() == "int32"
82+
6883
def test_envpartners_distances(self, superfeature):
6984
"""
7085
Test class property.
@@ -120,3 +135,28 @@ def test_count_frequency(self, superfeature, count, frequency, envpartner_ids):
120135

121136
frequency = pd.Series(frequency, index=envpartner_ids)
122137
assert all(superfeature.frequency == frequency)
138+
139+
@pytest.mark.parametrize(
140+
"count, frequency, envpartner_ids",
141+
[
142+
(
143+
[1001, 1001, 57],
144+
[99.90, 99.90, 5.69],
145+
[
146+
"any",
147+
"ILE-10-A[169,171,172]",
148+
"PHE-82-A[1245,1246,1247,1248,1249,1250]",
149+
],
150+
)
151+
],
152+
)
153+
def test_count_frequency_collapsed(self, superfeature, count, frequency, envpartner_ids):
154+
"""
155+
Test class property.
156+
"""
157+
158+
count = pd.Series(count, index=envpartner_ids)
159+
assert all(superfeature.count_collapsed == count)
160+
161+
frequency = pd.Series(frequency, index=envpartner_ids)
162+
assert all(superfeature.frequency_collapsed == frequency)

dynophores/tests/viz/test_plot_static.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,20 +75,44 @@ def test_superfeatures_occurrences_raises(dynophore, superfeature_ids):
7575

7676

7777
@pytest.mark.parametrize(
78-
"superfeature_ids, frames_range, frames_step_size, occurrence_min",
78+
"superfeature_ids, frames_range, frames_step_size, occurrence_min, collapse_residues",
7979
[
80-
("AR[4605,4607,4603,4606,4604]", [0, None], 1, 50),
81-
(["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"], [0, None], 10, 50),
82-
(["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"], [10, 90], 1, 50),
83-
(["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"], [10, 90], 10, 50),
80+
("AR[4605,4607,4603,4606,4604]", [0, None], 1, 50, False),
81+
(
82+
["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"],
83+
[0, None],
84+
10,
85+
50,
86+
False,
87+
),
88+
(
89+
["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"],
90+
[10, 90],
91+
1,
92+
50,
93+
False,
94+
),
95+
(
96+
["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"],
97+
[10, 90],
98+
10,
99+
50,
100+
False,
101+
),
102+
("AR[4605,4607,4603,4606,4604]", [0, None], 1, 50, True),
84103
],
85104
)
86105
def test_envpartners_occurrences(
87-
dynophore, superfeature_ids, frames_range, frames_step_size, occurrence_min
106+
dynophore, superfeature_ids, frames_range, frames_step_size, occurrence_min, collapse_residues
88107
):
89108

90109
fig, axes = plot.static.envpartners_occurrences(
91-
dynophore, superfeature_ids, frames_range, frames_step_size, occurrence_min
110+
dynophore,
111+
superfeature_ids,
112+
frames_range,
113+
frames_step_size,
114+
occurrence_min,
115+
collapse_residues,
92116
)
93117
assert isinstance(fig, matplotlib.figure.Figure)
94118
if isinstance(superfeature_ids, str):

dynophores/viz/plot/interactive.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,14 @@ def envpartners_occurrences(dynophore):
164164
style=WIDGET_STYLE,
165165
layout=WIDGET_LAYOUT,
166166
),
167+
collapse_residues=widgets.ToggleButtons(
168+
options=[False, True],
169+
description="Collapse residues?",
170+
button_style="",
171+
tooltips=["False", "True"],
172+
style=WIDGET_STYLE,
173+
layout=WIDGET_LAYOUT,
174+
),
167175
)
168176

169177

0 commit comments

Comments
 (0)