Skip to content

Commit

Permalink
🚚 move to vuecore
Browse files Browse the repository at this point in the history
  • Loading branch information
enryH committed Jan 31, 2025
1 parent 835a91a commit 9f67485
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 140 deletions.
130 changes: 28 additions & 102 deletions docs/api_examples/normalization_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
},
"outputs": [],
"source": [
"import itertools\n",
"from typing import Optional\n",
"\n",
"import matplotlib.pyplot as plt\n",
Expand All @@ -60,59 +59,51 @@
"import sklearn\n",
"import sklearn.impute\n",
"import sklearn.preprocessing\n",
"import umap\n",
"from vuecore.decomposition import plot_explained_variance\n",
"import vuecore.decomposition\n",
"\n",
"import acore.decomposition\n",
"import acore.normalization\n",
"import acore.sklearn\n",
"from acore.decomposition import pca as acore_pca # ! to remove\n",
"\n",
"\n",
"def plot_umap(X_scaled, y, meta_column, random_state=42) -> plt.Axes:\n",
" \"\"\"Fit and plot UMAP embedding with two components with colors defined by meta_column.\"\"\"\n",
" reducer = umap.UMAP(random_state=random_state, n_jobs=1)\n",
" embedding = reducer.fit_transform(X_scaled)\n",
" embedding = pd.DataFrame(\n",
" embedding, index=X_scaled.index, columns=[\"UMAP 1\", \"UMAP 2\"]\n",
" ).join(y.astype(\"category\"))\n",
" embedding = acore.decomposition.umap.run_umap(\n",
" X_scaled, y, random_state=random_state\n",
" )\n",
" ax = embedding.plot.scatter(\"UMAP 1\", \"UMAP 2\", c=meta_column, cmap=\"Paired\")\n",
" return ax\n",
"\n",
"\n",
"def standard_normalize(X: pd.DataFrame) -> pd.DataFrame:\n",
" \"\"\"Standard normalize data and keep indices of DataFrame.\"\"\"\n",
" scaler = sklearn.preprocessing.StandardScaler()\n",
" X_scaled = acore.sklearn.transform_DataFrame(X, fct=scaler.fit_transform)\n",
" X_scaled = (\n",
" sklearn.preprocessing.StandardScaler()\n",
" .set_output(transform=\"pandas\")\n",
" .fit_transform(X)\n",
" )\n",
" return X_scaled\n",
"\n",
"\n",
"def median_impute(X: pd.DataFrame) -> pd.DataFrame:\n",
" median_imputer = sklearn.impute.SimpleImputer(strategy=\"median\")\n",
" X_imputed = acore.sklearn.transform_DataFrame(X, median_imputer.fit_transform)\n",
" X_imputed = (\n",
" sklearn.impute.SimpleImputer(strategy=\"median\")\n",
" .set_output(transform=\"pandas\")\n",
" .fit_transform(X)\n",
" )\n",
" return X_imputed\n",
"\n",
"\n",
"def run_and_plot_pca(\n",
" X_scaled,\n",
" y,\n",
" meta_column,\n",
" n_components=4,\n",
" meta_column: Optional[str] = None,\n",
" n_components: int = 4,\n",
") -> tuple[pd.DataFrame, plt.Figure]:\n",
" PCs, _ = acore_pca.run_pca(X_scaled, n_components=n_components)\n",
" PCs, _ = acore.decomposition.pca.run_pca(X_scaled, n_components=n_components)\n",
" PCs.columns = [s.replace(\"principal component\", \"PC\") for s in PCs.columns]\n",
" PCs = PCs.join(y.astype(\"category\"))\n",
" up_to = min(PCs.shape[-1], n_components)\n",
" fig, axes = plt.subplots(up_to - 1, 2, figsize=(6, 8), layout=\"constrained\")\n",
" for k, (pos, ax) in enumerate(\n",
" zip(itertools.combinations(range(up_to), 2), axes.flatten())\n",
" ):\n",
" i, j = pos\n",
" plot_heatmap = bool(k % 2)\n",
" PCs.plot.scatter(\n",
" i, j, c=meta_column, cmap=\"Paired\", ax=ax, colorbar=plot_heatmap\n",
" )\n",
" _ = PCs.pop(\n",
" meta_column,\n",
" fig = vuecore.decomposition.pca_grid(\n",
" PCs=PCs, meta_column=y, n_components=n_components, meta_col_name=meta_column\n",
" )\n",
" return PCs, fig"
]
Expand Down Expand Up @@ -435,91 +426,23 @@
"id": "55f6ab16",
"metadata": {},
"source": [
"## Dimensionality reduction - unnormalized data\n",
"on median imputed and standard normalized omics data."
]
},
{
"cell_type": "markdown",
"id": "85f8172a",
"metadata": {},
"source": [
"### Principal Components\n",
"Plot first 4 PCs with categorical metadata as label annotating each sample."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ded39ffb",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-15T08:04:42.488173Z",
"iopub.status.busy": "2024-10-15T08:04:42.488058Z",
"iopub.status.idle": "2024-10-15T08:04:42.621417Z",
"shell.execute_reply": "2024-10-15T08:04:42.618618Z"
},
"tags": [
"hide-input"
]
},
"outputs": [],
"source": [
"omics_imp_scaled = standard_normalize(omics_imputed)\n",
"\n",
"PCs, pca = acore_pca.run_pca(omics_imp_scaled, n_components=4)\n",
"ax = plot_explained_variance(pca)\n",
"ax.locator_params(axis=\"x\", integer=True)\n",
"omics_imp_scaled.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2a740a0",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-15T08:04:42.718760Z",
"iopub.status.busy": "2024-10-15T08:04:42.718502Z",
"iopub.status.idle": "2024-10-15T08:04:43.184605Z",
"shell.execute_reply": "2024-10-15T08:04:43.184367Z"
},
"tags": [
"hide-input"
]
},
"outputs": [],
"source": [
"pcs, fig = run_and_plot_pca(omics_imp_scaled, y, METACOL_LABEL)"
]
},
{
"cell_type": "markdown",
"id": "3cd76469",
"metadata": {},
"source": [
"### UMAP\n",
"of median imputed and normalized omics data:"
"Explained variance by first four principal components in data."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "33b64b35",
"id": "7134cd23",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-15T08:04:43.216151Z",
"iopub.status.busy": "2024-10-15T08:04:43.215329Z",
"iopub.status.idle": "2024-10-15T08:04:45.453903Z",
"shell.execute_reply": "2024-10-15T08:04:45.449160Z"
},
"tags": [
"hide-input"
]
},
"outputs": [],
"source": [
"ax = plot_umap(omics_imp_scaled, y, METACOL_LABEL)"
"PCs, pca = acore.decomposition.pca.run_pca(omics_imputed, n_components=4)\n",
"ax = vuecore.decomposition.plot_explained_variance(pca)\n",
"ax.locator_params(axis=\"x\", integer=True)"
]
},
{
Expand Down Expand Up @@ -1043,6 +966,9 @@
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "tags,-all"
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
Expand Down
46 changes: 8 additions & 38 deletions docs/api_examples/normalization_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
# %pip install acore

# %% tags=["hide-input"]
import itertools
from typing import Optional

import matplotlib.pyplot as plt
Expand All @@ -35,7 +34,7 @@
import sklearn
import sklearn.impute
import sklearn.preprocessing
from vuecore.decomposition import plot_explained_variance
import vuecore.decomposition

import acore.decomposition
import acore.normalization
Expand Down Expand Up @@ -73,24 +72,13 @@ def median_impute(X: pd.DataFrame) -> pd.DataFrame:
def run_and_plot_pca(
X_scaled,
y,
meta_column,
n_components=4,
meta_column: Optional[str] = None,
n_components: int = 4,
) -> tuple[pd.DataFrame, plt.Figure]:
PCs, _ = acore.decomposition.pca.run_pca(X_scaled, n_components=n_components)
PCs.columns = [s.replace("principal component", "PC") for s in PCs.columns]
PCs = PCs.join(y.astype("category"))
up_to = min(PCs.shape[-1], n_components)
fig, axes = plt.subplots(up_to - 1, 2, figsize=(6, 8), layout="constrained")
for k, (pos, ax) in enumerate(
zip(itertools.combinations(range(up_to), 2), axes.flatten())
):
i, j = pos
plot_heatmap = bool(k % 2)
PCs.plot.scatter(
i, j, c=meta_column, cmap="Paired", ax=ax, colorbar=plot_heatmap
)
_ = PCs.pop(
meta_column,
fig = vuecore.decomposition.pca_grid(
PCs=PCs, meta_column=y, n_components=n_components, meta_col_name=meta_column
)
return PCs, fig

Expand Down Expand Up @@ -218,30 +206,12 @@ def run_and_plot_pca(
omics_imputed.shape

# %% [markdown]
# ## Dimensionality reduction - unnormalized data
# on median imputed and standard normalized omics data.

# %% [markdown]
# ### Principal Components
# Plot first 4 PCs with categorical metadata as label annotating each sample.
# Explained variance by first four principal components in data.

# %% tags=["hide-input"]
omics_imp_scaled = standard_normalize(omics_imputed)

PCs, pca = acore.decomposition.pca.run_pca(omics_imp_scaled, n_components=4)
ax = plot_explained_variance(pca)
PCs, pca = acore.decomposition.pca.run_pca(omics_imputed, n_components=4)
ax = vuecore.decomposition.plot_explained_variance(pca)
ax.locator_params(axis="x", integer=True)
omics_imp_scaled.shape

# %% tags=["hide-input"]
pcs, fig = run_and_plot_pca(omics_imp_scaled, y, METACOL_LABEL)

# %% [markdown]
# ### UMAP
# of median imputed and normalized omics data:

# %% tags=["hide-input"]
ax = plot_umap(omics_imp_scaled, y, METACOL_LABEL)

# %% [markdown]
# ## Normalization of samples in a dataset
Expand Down

0 comments on commit 9f67485

Please sign in to comment.