Merge pull request #329 from MannLabs/decouple-dataset-from-statistics

Decouple dataset from statistics
MannLabs · Sep 20, 2024 · 198cbdc · 198cbdc
2 parents 2903d60 + a9d2893
commit 198cbdc
Show file tree

Hide file tree

Showing 11 changed files with 339 additions and 256 deletions.
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -49,14 +49,14 @@ jobs:
     - name: Print pip freeze
       run: |
           pip freeze
+    - name: Run tests
+      run: |
+        coverage run -m pytest
+    - name: Upload Coverage to Codecov
+      uses: codecov/codecov-action@v4
     - name: Run notebooks
       run: |
         python3 -m ipykernel install --user
         # TODO add the excluded notebook
         TEST_NBS=$(find ./nbs -name "*.ipynb" | grep -v "ramus_2016.ipynb")
         python -m pytest --nbmake $(echo $TEST_NBS)
-    - name: Run tests
-      run: |
-        coverage run -m pytest
-    - name: Upload Coverage to Codecov
-      uses: codecov/codecov-action@v4
diff --git a/alphastats/DataSet.py b/alphastats/DataSet.py
@@ -13,6 +13,7 @@
 from alphastats.DataSet_Preprocess import Preprocess, PreprocessingStateKeys
 from alphastats.DataSet_Statistics import Statistics
 from alphastats.utils import LoaderError
+from alphastats.statistics.tukey_test import tukey_test
 
 plotly.io.templates["alphastats_colors"] = plotly.graph_objects.layout.Template(
     layout=plotly.graph_objects.Layout(
@@ -35,7 +36,7 @@
 plotly.io.templates.default = "simple_white+alphastats_colors"
 
 
-class DataSet(Statistics, Plot):
+class DataSet(Plot):
     """Analysis Object"""
 
     def __init__(
@@ -100,9 +101,21 @@ def __init__(
 
         print("DataSet has been created.")
 
+    def _get_preprocess(self) -> Preprocess:
+        """Return instance of the Preprocess object."""
+        return Preprocess(
+            self.filter_columns,
+            self.rawinput,
+            self.index_column,
+            self.sample,
+            self.metadata,
+            self.preprocessing_info,
+            self.mat,
+        )
+
     def preprocess(
         self,
-        log2_transform: bool = True,
+        log2_transform: bool = False,
         remove_contaminations: bool = False,
         subset: bool = False,
         data_completeness: float = 0,
@@ -111,26 +124,18 @@ def preprocess(
         remove_samples: list = None,
         **kwargs,
     ) -> None:
-        """A wrapper for the preprocess() method, see documentation in Preprocess.preprocess()."""
-        pp = Preprocess(
-            self.filter_columns,
-            self.rawinput,
-            self.index_column,
-            self.sample,
-            self.metadata,
-            self.preprocessing_info,
-            self.mat,
-        )
-
-        self.mat, self.metadata, self.preprocessing_info = pp.preprocess(
-            log2_transform,
-            remove_contaminations,
-            subset,
-            data_completeness,
-            normalization,
-            imputation,
-            remove_samples,
-            **kwargs,
+        """A wrapper for Preprocess.preprocess(), see documentation there."""
+        self.mat, self.metadata, self.preprocessing_info = (
+            self._get_preprocess().preprocess(
+                log2_transform,
+                remove_contaminations,
+                subset,
+                data_completeness,
+                normalization,
+                imputation,
+                remove_samples,
+                **kwargs,
+            )
         )
         self.preprocessed = True
 
@@ -149,16 +154,59 @@ def reset_preprocessing(self):
         print("All preprocessing steps are reset.")
 
     def batch_correction(self, batch: str) -> None:
-        pp = Preprocess(
-            self.filter_columns,
-            self.rawinput,
+        """A wrapper for Preprocess.batch_correction(), see documentation there."""
+        self.mat = self._get_preprocess().batch_correction(batch)
+
+    def _get_statistics(self) -> Statistics:
+        """Return instance of the Statistics object."""
+        return Statistics(
+            self.mat,
+            self.metadata,
             self.index_column,
             self.sample,
-            self.metadata,
             self.preprocessing_info,
-            self.mat,
         )
-        self.mat = pp.batch_correction(batch)
+
+    def diff_expression_analysis(
+        self,
+        group1: Union[str, list],
+        group2: Union[str, list],
+        column: str = None,
+        method: str = "ttest",
+        perm: int = 10,
+        fdr: float = 0.05,
+    ) -> pd.DataFrame:
+        """A wrapper for the Statistics.diff_expression_analysis(), see documentation there."""
+        return self._get_statistics().diff_expression_analysis(
+            group1,
+            group2,
+            column,
+            method,
+            perm,
+            fdr,
+        )
+
+    def tukey_test(self, protein_id: str, group: str) -> pd.DataFrame:
+        """A wrapper for tukey_test.tukey_test(), see documentation there."""
+        df = self.mat[[protein_id]].reset_index().rename(columns={"index": self.sample})
+        df = df.merge(self.metadata, how="inner", on=[self.sample])
+
+        return tukey_test(
+            df,
+            protein_id,
+            group,
+            self.index_column,
+        )
+
+    def anova(self, column: str, protein_ids="all", tukey: bool = True) -> pd.DataFrame:
+        """A wrapper for Statistics.anova(), see documentation there."""
+        return self._get_statistics().anova(column, protein_ids, tukey)
+
+    def ancova(
+        self, protein_id: str, covar: Union[str, list], between: str
+    ) -> pd.DataFrame:
+        """A wrapper for Statistics.ancova(), see documentation there."""
+        return self._get_statistics().ancova(protein_id, covar, between)
 
     def _check_loader(self, loader):
         """Checks if the Loader is from class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader

diff --git a/alphastats/DataSet_Plot.py b/alphastats/DataSet_Plot.py
@@ -185,7 +185,7 @@ def plot_volcano(
 
         return volcano_plot.plot
 
-    def plot_correlation_matrix(self, method: str = "pearson"):
+    def plot_correlation_matrix(self, method: str = "pearson"):  # TODO unused
         """Plot Correlation Matrix
 
         Args:
@@ -369,7 +369,7 @@ def plot_dendrogram(
         )
         return fig
 
-    def plot_imputed_values(self):
+    def plot_imputed_values(self):  # not used
         # get coordinates of missing values
         df = self.mat
         s = df.stack(dropna=False)