Skip to content

Commit

Permalink
Merge pull request #329 from MannLabs/decouple-dataset-from-statistics
Browse files Browse the repository at this point in the history
Decouple dataset from statistics
  • Loading branch information
mschwoer authored Sep 20, 2024
2 parents 2903d60 + a9d2893 commit 198cbdc
Show file tree
Hide file tree
Showing 11 changed files with 339 additions and 256 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ jobs:
- name: Print pip freeze
run: |
pip freeze
- name: Run tests
run: |
coverage run -m pytest
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v4
- name: Run notebooks
run: |
python3 -m ipykernel install --user
# TODO add the excluded notebook
TEST_NBS=$(find ./nbs -name "*.ipynb" | grep -v "ramus_2016.ipynb")
python -m pytest --nbmake $(echo $TEST_NBS)
- name: Run tests
run: |
coverage run -m pytest
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v4
104 changes: 76 additions & 28 deletions alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from alphastats.DataSet_Preprocess import Preprocess, PreprocessingStateKeys
from alphastats.DataSet_Statistics import Statistics
from alphastats.utils import LoaderError
from alphastats.statistics.tukey_test import tukey_test

plotly.io.templates["alphastats_colors"] = plotly.graph_objects.layout.Template(
layout=plotly.graph_objects.Layout(
Expand All @@ -35,7 +36,7 @@
plotly.io.templates.default = "simple_white+alphastats_colors"


class DataSet(Statistics, Plot):
class DataSet(Plot):
"""Analysis Object"""

def __init__(
Expand Down Expand Up @@ -100,9 +101,21 @@ def __init__(

print("DataSet has been created.")

def _get_preprocess(self) -> Preprocess:
"""Return instance of the Preprocess object."""
return Preprocess(
self.filter_columns,
self.rawinput,
self.index_column,
self.sample,
self.metadata,
self.preprocessing_info,
self.mat,
)

def preprocess(
self,
log2_transform: bool = True,
log2_transform: bool = False,
remove_contaminations: bool = False,
subset: bool = False,
data_completeness: float = 0,
Expand All @@ -111,26 +124,18 @@ def preprocess(
remove_samples: list = None,
**kwargs,
) -> None:
"""A wrapper for the preprocess() method, see documentation in Preprocess.preprocess()."""
pp = Preprocess(
self.filter_columns,
self.rawinput,
self.index_column,
self.sample,
self.metadata,
self.preprocessing_info,
self.mat,
)

self.mat, self.metadata, self.preprocessing_info = pp.preprocess(
log2_transform,
remove_contaminations,
subset,
data_completeness,
normalization,
imputation,
remove_samples,
**kwargs,
"""A wrapper for Preprocess.preprocess(), see documentation there."""
self.mat, self.metadata, self.preprocessing_info = (
self._get_preprocess().preprocess(
log2_transform,
remove_contaminations,
subset,
data_completeness,
normalization,
imputation,
remove_samples,
**kwargs,
)
)
self.preprocessed = True

Expand All @@ -149,16 +154,59 @@ def reset_preprocessing(self):
print("All preprocessing steps are reset.")

def batch_correction(self, batch: str) -> None:
pp = Preprocess(
self.filter_columns,
self.rawinput,
"""A wrapper for Preprocess.batch_correction(), see documentation there."""
self.mat = self._get_preprocess().batch_correction(batch)

def _get_statistics(self) -> Statistics:
"""Return instance of the Statistics object."""
return Statistics(
self.mat,
self.metadata,
self.index_column,
self.sample,
self.metadata,
self.preprocessing_info,
self.mat,
)
self.mat = pp.batch_correction(batch)

def diff_expression_analysis(
self,
group1: Union[str, list],
group2: Union[str, list],
column: str = None,
method: str = "ttest",
perm: int = 10,
fdr: float = 0.05,
) -> pd.DataFrame:
"""A wrapper for the Statistics.diff_expression_analysis(), see documentation there."""
return self._get_statistics().diff_expression_analysis(
group1,
group2,
column,
method,
perm,
fdr,
)

def tukey_test(self, protein_id: str, group: str) -> pd.DataFrame:
"""A wrapper for tukey_test.tukey_test(), see documentation there."""
df = self.mat[[protein_id]].reset_index().rename(columns={"index": self.sample})
df = df.merge(self.metadata, how="inner", on=[self.sample])

return tukey_test(
df,
protein_id,
group,
self.index_column,
)

def anova(self, column: str, protein_ids="all", tukey: bool = True) -> pd.DataFrame:
"""A wrapper for Statistics.anova(), see documentation there."""
return self._get_statistics().anova(column, protein_ids, tukey)

def ancova(
self, protein_id: str, covar: Union[str, list], between: str
) -> pd.DataFrame:
"""A wrapper for Statistics.ancova(), see documentation there."""
return self._get_statistics().ancova(protein_id, covar, between)

def _check_loader(self, loader):
"""Checks if the Loader is from class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader
Expand Down
4 changes: 2 additions & 2 deletions alphastats/DataSet_Plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def plot_volcano(

return volcano_plot.plot

def plot_correlation_matrix(self, method: str = "pearson"):
def plot_correlation_matrix(self, method: str = "pearson"): # TODO unused
"""Plot Correlation Matrix
Args:
Expand Down Expand Up @@ -369,7 +369,7 @@ def plot_dendrogram(
)
return fig

def plot_imputed_values(self):
def plot_imputed_values(self): # not used
# get coordinates of missing values
df = self.mat
s = df.stack(dropna=False)
Expand Down
Loading

0 comments on commit 198cbdc

Please sign in to comment.