Merge pull request #36 from dominiquesydow/collapse-envpartners

dominiquesydow · web-flow · commit 47d62b21cb4d · 2021-08-20T13:21:06.000+02:00
Collapse envpartners with same residue and superfeature
diff --git a/docs/tutorials/explore_plots.ipynb b/docs/tutorials/explore_plots.ipynb
diff --git a/dynophores/core/envpartner.py b/dynophores/core/envpartner.py
@@ -42,6 +42,18 @@ def __init__(
         self.occurrences = occurrences
         self.distances = distances
 
+    @property
+    def residue_id(self):
+        """
+        Get the residue's ID (residue name - residue number - chain).
+
+        Returns
+        -------
+        str
+            Residue's ID.
+        """
+        return f"{self.residue_name}-{self.residue_number}-{self.chain}"
+
     @property
     def n_frames(self):
         """
diff --git a/dynophores/core/superfeature.py b/dynophores/core/superfeature.py
@@ -62,10 +62,39 @@ def envpartners_occurrences(self):
         -------
         pandas.DataFrame
             Occurrences (0=no, 1=yes) of an environmental partner (columns) in each frame (row).
+        """
+
+        return self._envpartners_occurrences(self._data(type="occurrences"))
+
+    @property
+    def envpartners_occurrences_collapsed(self):
+        """
+        Get the superfeature's environmental partners' occurrences per environmental partner and
+        frame.
+        If an environmental partner interacts multiple times with the same superfeature,
+        aggregate them. This can happen if differen atoms of an environmental partner are involved
+        in the same superfeature.
+
+        Returns
+        -------
+        pandas.DataFrame
+            Occurrences (0=no, 1=yes) of an environmental partner (columns) in each frame (row).
+        """
+
+        return self._envpartners_occurrences(self._data_collapsed())
+
+    def _envpartners_occurrences(self, method_data):
+        """
+        Get the superfeature's environmental partners' occurrences per environmental partner and
+        frame.
 
+        Returns
+        -------
+        pandas.DataFrame
+            Occurrences (0=no, 1=yes) of an environmental partner (columns) in each frame (row).
         """
 
-        occurrences = self._data(type="occurrences").astype("int32")
+        occurrences = method_data.astype("int32")
 
         # Sort columns by superfeature occurrence
         sorted_columns = occurrences.sum().sort_values(ascending=False).index
@@ -118,10 +147,45 @@ def count(self):
             environmental partner as well as any environmental partner.
         """
 
+        return self._count(self.envpartners_occurrences)
+
+    @property
+    def count_collapsed(self):
+        """
+        Get number of frames in which the superfeature occurs, including the superfeature's
+        environmental partners occurrences (collapsed if they share the same residue!).
+
+        Returns
+        -------
+        pandas.Series
+            Superfeature count: The Series shows interactions (yes/no) to each single
+            environmental partner as well as any environmental partner.
+        """
+
+        return self._count(self.envpartners_occurrences_collapsed)
+
+    def _count(self, property_envpartners_occurrences):
+        """
+        Count the occurrence of the superfeature's environmental partners.
+
+        Parameter
+        ---------
+        property : property_envpartners_occurrences
+            If you want un-collapsed environmental partners, use `self.envpartners_occurrences`.
+            If you want collapsed environmental partners, use
+            `self.envpartners_occurrences_collapsed`.
+
+        Returns
+        -------
+        pandas.Series
+            Superfeature count: The Series shows interactions (yes/no) to each single
+            environmental partner as well as any environmental partner.
+        """
+
         superfeature_count = pd.Series(
-            {"any": (self.envpartners_occurrences.sum(axis=1) != 0).sum()}
+            {"any": (property_envpartners_occurrences.sum(axis=1) != 0).sum()}
         )
-        envpartners_count = self.envpartners_occurrences.sum()
+        envpartners_count = property_envpartners_occurrences.sum()
 
         return superfeature_count.append(envpartners_count)
 
@@ -138,7 +202,41 @@ def frequency(self):
             environmental partner as well as any environmental partner.
         """
 
-        return self.count.apply(lambda x: round(x / self.n_frames * 100, 2))
+        return self._frequency(self.count)
+
+    @property
+    def frequency_collapsed(self):
+        """
+        Get frequency of frames in which the superfeature occurs, including the superfeature's
+        environmental partners occurrences (collapsed if they share the same residue!).
+
+        Returns
+        -------
+        pandas.Series
+            Superfeature frequency: The Series shows interactions (yes/no) to each single
+            environmental partner as well as any environmental partner.
+        """
+
+        return self._frequency(self.count_collapsed)
+
+    def _frequency(self, property_count):
+        """
+        Get the frequency of the occurrence of the superfeature's environmental partners.
+
+        Parameter
+        ---------
+        property : property_count
+            If you want un-collapsed environmental partners, use `self.count`.
+            If you want collapsed environmental partners, use `self.count_collapsed`.
+
+        Returns
+        -------
+        pandas.Series
+            Superfeature frequency: The Series shows interactions (yes/no) to each single
+            environmental partner as well as any environmental partner.
+        """
+
+        return property_count.apply(lambda x: round(x / self.n_frames * 100, 2))
 
     def _data(self, type="occurrences"):
         """
@@ -168,3 +266,40 @@ def _data(self, type="occurrences"):
                 for envpartner_id, envpartner in self.envpartners.items()
             }
         )
+
+    def _data_collapsed(self):
+        """TODO"""
+
+        # List of environmental partner IDs (e.g. ILE-10-A[169,171,172])
+        ids = self.envpartners_occurrences.columns
+        # Unique list of residue IDs (e.g. ILE-10-A)
+        residue_ids = [envpartner.residue_id for _, envpartner in self.envpartners.items()]
+        residue_ids = list(set(residue_ids))
+
+        occurrences_dict = {}
+
+        # For each unique residue ID,
+        # we want to aggregate data for all environmental partners that belong to the same residue
+        for residue_id in residue_ids:
+
+            # Get all environmental partner IDs that belong to this residue
+            ids_to_be_collapsed = [_id for _id in ids if _id.startswith(residue_id)]
+
+            # Merge all atom numbers
+            atom_numbers = []
+            for _id in ids_to_be_collapsed:
+                atom_numbers.extend(self.envpartners[_id].atom_numbers)
+            atom_numbers = sorted(list(set(atom_numbers)))
+            id_collapsed = f"{residue_id}{atom_numbers}".replace(" ", "")
+
+            # Merge all occurrences
+            occurrences = [self.envpartners[_id].occurrences for _id in ids_to_be_collapsed]
+            occurrences = pd.DataFrame(occurrences)
+            # If frame 1 in any environmental partner, set to 1 in collapsed environmental partner
+            occurrences = occurrences.sum().apply(lambda x: 1 if x > 0 else 0)
+
+            occurrences_dict[id_collapsed] = occurrences
+
+        occurrences = pd.DataFrame(occurrences_dict, dtype="int32")
+
+        return occurrences
diff --git a/dynophores/tests/core/test_envpartner.py b/dynophores/tests/core/test_envpartner.py
@@ -68,6 +68,14 @@ def test_init_raises(self, envpartner_dict):
         with pytest.raises(ValueError):
             EnvPartner(**envpartner_dict)
 
+    @pytest.mark.parametrize("residue_id", ["ILE-10-A"])
+    def test_residue_id(self, envpartner, residue_id):
+        """
+        Test class property.
+        """
+
+        assert envpartner.residue_id == residue_id
+
     @pytest.mark.parametrize("n_frames", [1002])
     def test_n_frames(self, envpartner, n_frames):
         """
diff --git a/dynophores/tests/core/test_superfeature.py b/dynophores/tests/core/test_superfeature.py
@@ -65,6 +65,21 @@ def test_envpartners_occurrences(self, superfeature):
         )
         assert data.dtypes.unique() == "int32"
 
+    @pytest.mark.parametrize(
+        "envpartners_collapsed",
+        [["ILE-10-A[169,171,172]", "PHE-82-A[1245,1246,1247,1248,1249,1250]"]],
+    )
+    def test_envpartners_occurrences_collapsed(self, superfeature, envpartners_collapsed):
+        """
+        Test class property.
+        """
+
+        data = superfeature.envpartners_occurrences_collapsed
+        assert isinstance(data, pd.DataFrame)
+        assert data.index.to_list() == list(range(0, len(superfeature.occurrences)))
+        assert sorted(data.columns.to_list()) == sorted(envpartners_collapsed)
+        assert data.dtypes.unique() == "int32"
+
     def test_envpartners_distances(self, superfeature):
         """
         Test class property.
@@ -120,3 +135,28 @@ def test_count_frequency(self, superfeature, count, frequency, envpartner_ids):
 
         frequency = pd.Series(frequency, index=envpartner_ids)
         assert all(superfeature.frequency == frequency)
+
+    @pytest.mark.parametrize(
+        "count, frequency, envpartner_ids",
+        [
+            (
+                [1001, 1001, 57],
+                [99.90, 99.90, 5.69],
+                [
+                    "any",
+                    "ILE-10-A[169,171,172]",
+                    "PHE-82-A[1245,1246,1247,1248,1249,1250]",
+                ],
+            )
+        ],
+    )
+    def test_count_frequency_collapsed(self, superfeature, count, frequency, envpartner_ids):
+        """
+        Test class property.
+        """
+
+        count = pd.Series(count, index=envpartner_ids)
+        assert all(superfeature.count_collapsed == count)
+
+        frequency = pd.Series(frequency, index=envpartner_ids)
+        assert all(superfeature.frequency_collapsed == frequency)
diff --git a/dynophores/tests/viz/test_plot_static.py b/dynophores/tests/viz/test_plot_static.py
@@ -75,20 +75,44 @@ def test_superfeatures_occurrences_raises(dynophore, superfeature_ids):
 
 
 @pytest.mark.parametrize(
-    "superfeature_ids, frames_range, frames_step_size, occurrence_min",
+    "superfeature_ids, frames_range, frames_step_size, occurrence_min, collapse_residues",
     [
-        ("AR[4605,4607,4603,4606,4604]", [0, None], 1, 50),
-        (["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"], [0, None], 10, 50),
-        (["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"], [10, 90], 1, 50),
-        (["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"], [10, 90], 10, 50),
+        ("AR[4605,4607,4603,4606,4604]", [0, None], 1, 50, False),
+        (
+            ["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"],
+            [0, None],
+            10,
+            50,
+            False,
+        ),
+        (
+            ["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"],
+            [10, 90],
+            1,
+            50,
+            False,
+        ),
+        (
+            ["AR[4605,4607,4603,4606,4604]", "AR[4622,4615,4623,4613,4614,4621]"],
+            [10, 90],
+            10,
+            50,
+            False,
+        ),
+        ("AR[4605,4607,4603,4606,4604]", [0, None], 1, 50, True),
     ],
 )
 def test_envpartners_occurrences(
-    dynophore, superfeature_ids, frames_range, frames_step_size, occurrence_min
+    dynophore, superfeature_ids, frames_range, frames_step_size, occurrence_min, collapse_residues
 ):
 
     fig, axes = plot.static.envpartners_occurrences(
-        dynophore, superfeature_ids, frames_range, frames_step_size, occurrence_min
+        dynophore,
+        superfeature_ids,
+        frames_range,
+        frames_step_size,
+        occurrence_min,
+        collapse_residues,
     )
     assert isinstance(fig, matplotlib.figure.Figure)
     if isinstance(superfeature_ids, str):
diff --git a/dynophores/viz/plot/interactive.py b/dynophores/viz/plot/interactive.py
@@ -164,6 +164,14 @@ def envpartners_occurrences(dynophore):
             style=WIDGET_STYLE,
             layout=WIDGET_LAYOUT,
         ),
+        collapse_residues=widgets.ToggleButtons(
+            options=[False, True],
+            description="Collapse residues?",
+            button_style="",
+            tooltips=["False", "True"],
+            style=WIDGET_STYLE,
+            layout=WIDGET_LAYOUT,
+        ),
     )
 
 
diff --git a/dynophores/viz/plot/static.py b/dynophores/viz/plot/static.py