@@ -62,10 +62,39 @@ def envpartners_occurrences(self):
6262 -------
6363 pandas.DataFrame
6464 Occurrences (0=no, 1=yes) of an environmental partner (columns) in each frame (row).
65+ """
66+
67+ return self ._envpartners_occurrences (self ._data (type = "occurrences" ))
68+
69+ @property
70+ def envpartners_occurrences_collapsed (self ):
71+ """
72+ Get the superfeature's environmental partners' occurrences per environmental partner and
73+ frame.
74+ If an environmental partner interacts multiple times with the same superfeature,
75+ aggregate them. This can happen if differen atoms of an environmental partner are involved
76+ in the same superfeature.
77+
78+ Returns
79+ -------
80+ pandas.DataFrame
81+ Occurrences (0=no, 1=yes) of an environmental partner (columns) in each frame (row).
82+ """
83+
84+ return self ._envpartners_occurrences (self ._data_collapsed ())
85+
86+ def _envpartners_occurrences (self , method_data ):
87+ """
88+ Get the superfeature's environmental partners' occurrences per environmental partner and
89+ frame.
6590
91+ Returns
92+ -------
93+ pandas.DataFrame
94+ Occurrences (0=no, 1=yes) of an environmental partner (columns) in each frame (row).
6695 """
6796
68- occurrences = self . _data ( type = "occurrences" ) .astype ("int32" )
97+ occurrences = method_data .astype ("int32" )
6998
7099 # Sort columns by superfeature occurrence
71100 sorted_columns = occurrences .sum ().sort_values (ascending = False ).index
@@ -118,10 +147,45 @@ def count(self):
118147 environmental partner as well as any environmental partner.
119148 """
120149
150+ return self ._count (self .envpartners_occurrences )
151+
152+ @property
153+ def count_collapsed (self ):
154+ """
155+ Get number of frames in which the superfeature occurs, including the superfeature's
156+ environmental partners occurrences (collapsed if they share the same residue!).
157+
158+ Returns
159+ -------
160+ pandas.Series
161+ Superfeature count: The Series shows interactions (yes/no) to each single
162+ environmental partner as well as any environmental partner.
163+ """
164+
165+ return self ._count (self .envpartners_occurrences_collapsed )
166+
167+ def _count (self , property_envpartners_occurrences ):
168+ """
169+ Count the occurrence of the superfeature's environmental partners.
170+
171+ Parameter
172+ ---------
173+ property : property_envpartners_occurrences
174+ If you want un-collapsed environmental partners, use `self.envpartners_occurrences`.
175+ If you want collapsed environmental partners, use
176+ `self.envpartners_occurrences_collapsed`.
177+
178+ Returns
179+ -------
180+ pandas.Series
181+ Superfeature count: The Series shows interactions (yes/no) to each single
182+ environmental partner as well as any environmental partner.
183+ """
184+
121185 superfeature_count = pd .Series (
122- {"any" : (self . envpartners_occurrences .sum (axis = 1 ) != 0 ).sum ()}
186+ {"any" : (property_envpartners_occurrences .sum (axis = 1 ) != 0 ).sum ()}
123187 )
124- envpartners_count = self . envpartners_occurrences .sum ()
188+ envpartners_count = property_envpartners_occurrences .sum ()
125189
126190 return superfeature_count .append (envpartners_count )
127191
@@ -138,7 +202,41 @@ def frequency(self):
138202 environmental partner as well as any environmental partner.
139203 """
140204
141- return self .count .apply (lambda x : round (x / self .n_frames * 100 , 2 ))
205+ return self ._frequency (self .count )
206+
207+ @property
208+ def frequency_collapsed (self ):
209+ """
210+ Get frequency of frames in which the superfeature occurs, including the superfeature's
211+ environmental partners occurrences (collapsed if they share the same residue!).
212+
213+ Returns
214+ -------
215+ pandas.Series
216+ Superfeature frequency: The Series shows interactions (yes/no) to each single
217+ environmental partner as well as any environmental partner.
218+ """
219+
220+ return self ._frequency (self .count_collapsed )
221+
222+ def _frequency (self , property_count ):
223+ """
224+ Get the frequency of the occurrence of the superfeature's environmental partners.
225+
226+ Parameter
227+ ---------
228+ property : property_count
229+ If you want un-collapsed environmental partners, use `self.count`.
230+ If you want collapsed environmental partners, use `self.count_collapsed`.
231+
232+ Returns
233+ -------
234+ pandas.Series
235+ Superfeature frequency: The Series shows interactions (yes/no) to each single
236+ environmental partner as well as any environmental partner.
237+ """
238+
239+ return property_count .apply (lambda x : round (x / self .n_frames * 100 , 2 ))
142240
143241 def _data (self , type = "occurrences" ):
144242 """
@@ -168,3 +266,40 @@ def _data(self, type="occurrences"):
168266 for envpartner_id , envpartner in self .envpartners .items ()
169267 }
170268 )
269+
270+ def _data_collapsed (self ):
271+ """TODO"""
272+
273+ # List of environmental partner IDs (e.g. ILE-10-A[169,171,172])
274+ ids = self .envpartners_occurrences .columns
275+ # Unique list of residue IDs (e.g. ILE-10-A)
276+ residue_ids = [envpartner .residue_id for _ , envpartner in self .envpartners .items ()]
277+ residue_ids = list (set (residue_ids ))
278+
279+ occurrences_dict = {}
280+
281+ # For each unique residue ID,
282+ # we want to aggregate data for all environmental partners that belong to the same residue
283+ for residue_id in residue_ids :
284+
285+ # Get all environmental partner IDs that belong to this residue
286+ ids_to_be_collapsed = [_id for _id in ids if _id .startswith (residue_id )]
287+
288+ # Merge all atom numbers
289+ atom_numbers = []
290+ for _id in ids_to_be_collapsed :
291+ atom_numbers .extend (self .envpartners [_id ].atom_numbers )
292+ atom_numbers = sorted (list (set (atom_numbers )))
293+ id_collapsed = f"{ residue_id } { atom_numbers } " .replace (" " , "" )
294+
295+ # Merge all occurrences
296+ occurrences = [self .envpartners [_id ].occurrences for _id in ids_to_be_collapsed ]
297+ occurrences = pd .DataFrame (occurrences )
298+ # If frame 1 in any environmental partner, set to 1 in collapsed environmental partner
299+ occurrences = occurrences .sum ().apply (lambda x : 1 if x > 0 else 0 )
300+
301+ occurrences_dict [id_collapsed ] = occurrences
302+
303+ occurrences = pd .DataFrame (occurrences_dict , dtype = "int32" )
304+
305+ return occurrences
0 commit comments