Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEGA-137-Custom-segment-arg-for-Landscape-view #64

Draft
wants to merge 17 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions js/deck-gl/cell_layer.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,13 @@ const cell_layer_onclick = async (info, d, deck_ist, layers_obj, viz_state) => {

export const ini_cell_layer = async (base_url, viz_state) => {

const cell_url = base_url + `/cell_metadata.parquet`;
let cell_url
if (viz_state.seg.version === 'default'){
cell_url = base_url + `/cell_metadata.parquet`;
} else {
cell_url = base_url + '/cell_metadata_' + viz_state.seg.version + '.parquet';
}

var cell_arrow_table = await get_arrow_table(cell_url, options.fetch)

set_cell_names_array(viz_state.cats, cell_arrow_table)
Expand All @@ -105,7 +111,9 @@ export const ini_cell_layer = async (base_url, viz_state) => {
viz_state.cats.cell_cats = viz_state.cats.cell_names_array.map(name => viz_state.cats.meta_cell[name])
} else {
// default clustering
var cluster_arrow_table = await get_arrow_table(base_url + `/cell_clusters/cluster.parquet`, options.fetch)

var cluster_arrow_table = await get_arrow_table(base_url + `/cell_clusters${viz_state.seg.version && viz_state.seg.version !== 'default' ? '_' + viz_state.seg.version : ''}/cluster.parquet`,
options.fetch)
set_cell_cats(viz_state.cats, cluster_arrow_table, 'cluster')
}

Expand All @@ -114,8 +122,13 @@ export const ini_cell_layer = async (base_url, viz_state) => {
// Combine names and positions into a single array of objects
const new_cell_names_array = cell_arrow_table.getChild("name").toArray()

console.log(cell_arrow_table)

const flatCoordinateArray = viz_state.spatial.cell_scatter_data.attributes.getPosition.value;

console.log('*********************')
console.log(new_cell_names_array)

// save cell positions and categories in one place for updating cluster bar plot
viz_state.combo_data.cell = new_cell_names_array.map((name, index) => ({
name: name,
Expand Down
8 changes: 8 additions & 0 deletions js/viz/landscape_ist.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,13 @@ export const landscape_ist = async (
meta_cluster={},
umap={},
landscape_state='spatial',
segmentation='default',
view_change_custom_callback=null
) => {

console.log('checking segmentation', segmentation)


if (width === 0){
width = '100%'
}
Expand Down Expand Up @@ -240,6 +244,10 @@ export const landscape_ist = async (
viz_state.edit.visible = false
viz_state.edit.modify_index = null

// starting to set up custom segmentation support
viz_state.seg = {}
viz_state.seg.version = segmentation

if (Object.keys(viz_state.model).length !== 0){

if (Object.keys(viz_state.model.get('region')).length === 0) {
Expand Down
4 changes: 3 additions & 1 deletion js/widget.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export const render_landscape_ist = async ({ model, el }) => {
const meta_cluster = model.get('meta_cluster')
const umap = model.get('umap')
const landscape_state = model.get('landscape_state')
const segmentation = model.get('segmentation')

return landscape_ist(
el,
Expand All @@ -51,7 +52,8 @@ export const render_landscape_ist = async ({ model, el }) => {
meta_cell,
meta_cluster,
umap,
landscape_state
landscape_state,
segmentation
)

}
Expand Down
12 changes: 2 additions & 10 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

200 changes: 199 additions & 1 deletion src/celldega/clust/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

import numpy as np
import pandas as pd
import os
from copy import deepcopy

from . import initialize_net
Expand All @@ -47,6 +48,7 @@
from . import categories

from scipy.stats import ttest_ind, mannwhitneyu
from matplotlib.colors import to_hex
from sklearn.metrics import pairwise_distances, roc_curve, auc
from scipy.spatial.distance import pdist
from sklearn.metrics import confusion_matrix
Expand All @@ -57,6 +59,11 @@
import ipywidgets as widgets
import statsmodels.stats.multitest as smm

from spatialdata_io import xenium
import scanpy as sc
import squidpy as sq
import spatialdata as sd

def hc(df, filter_N_top=None, norm_col='total', norm_row='zscore'):

"""
Expand Down Expand Up @@ -98,6 +105,197 @@ def hc(df, filter_N_top=None, norm_col='total', norm_row='zscore'):

return network

def calc_cluster_signatures(path_landscape_files, segmentation_parameters, cbg, use_default_clustering=False, use_custom_clustering=False):

os.makedirs(os.path.join(path_landscape_files,
f"cell_clusters{'_' + segmentation_parameters['segmentation_approach'] if segmentation_parameters['segmentation_approach'] else ''}"),
exist_ok=True)

if not use_default_clustering:
if use_custom_clustering:

if segmentation_parameters['technology'] == 'custom' or segmentation_parameters['technology'] == 'Xenium':
sdata = xenium(os.path.dirname(path_landscape_files))
# could add merscope functionality later on

adata = sdata.tables["table"]

sc.pp.calculate_qc_metrics(adata, percent_top=(10, 20, 50, 150), inplace=True)
cprobes = (
adata.obs["control_probe_counts"].sum() / adata.obs["total_counts"].sum() * 100)
cwords = (
adata.obs["control_codeword_counts"].sum() / adata.obs["total_counts"].sum() * 100)

sc.pp.filter_cells(adata, min_counts=10)
sc.pp.filter_genes(adata, min_cells=5)

adata.layers["counts"] = adata.X.copy()
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.tl.leiden(adata)

adata.obs.set_index('cell_index', inplace=True)

meta_cell = adata.obs['leiden'].to_dict()

clusters = adata.obs['leiden'].cat.categories.tolist()
colors = adata.uns['leiden_colors']
ser_counts = adata.obs['leiden'].value_counts()
ser_color = pd.Series(colors, index=clusters, name='color')
meta_cluster_df = pd.DataFrame(ser_color)
meta_cluster_df['count'] = ser_counts

meta_cluster_df.index = [str(x) for x in meta_cluster_df.index]
meta_cluster = meta_cluster_df.to_dict(orient='index')

meta_cluster.to_parquet(os.path.join(path_landscape_files, f"cell_clusters_{segmentation_parameters['segmentation_approach']}/meta_cluster.parquet"))

inst_cells = cbg.index.tolist()
df_cluster = pd.DataFrame(index=inst_cells)
df_cluster['cluster'] = pd.Series('0', index=inst_cells)
df_cluster.to_parquet(os.path.join(path_landscape_files, f"cell_clusters_{segmentation_parameters['segmentation_approach']}/cluster.parquet"))

adata.obs['leiden'] = adata.obs['leiden'].astype('string')

list_ser = []
for inst_cat in adata.obs['leiden'].unique():
if inst_cat is not None:
inst_cells = adata.obs[adata.obs['leiden'] == inst_cat].index.tolist()
inst_ser = pd.Series(adata[inst_cells].X.mean(axis=0).A1, index=adata.var_names)
inst_ser.name = inst_cat
list_ser.append(inst_ser)

df_sig = pd.concat(list_ser, axis=1)
df_sig.columns = df_sig.columns.tolist()

keep_genes = df_sig.index.tolist()
keep_genes = [x for x in keep_genes if 'Unassigned' not in x]
keep_genes = [x for x in keep_genes if 'NegControl' not in x]
keep_genes = [x for x in keep_genes if 'DeprecatedCodeword' not in x]

df_sig = df_sig.loc[keep_genes]

df_sig.to_parquet(
os.path.join(
path_landscape_files,
f"df_sig{'_' + segmentation_parameters['segmentation_approach'] if segmentation_parameters['segmentation_approach'] else ''}.parquet"
)
)

else:
inst_cells = cbg.index.tolist()
df_cluster = pd.DataFrame(index=inst_cells)
df_cluster['cluster'] = pd.Series('0', index=inst_cells)
df_cluster.to_parquet(os.path.join(
path_landscape_files,
f"cell_clusters{'_' + segmentation_parameters['segmentation_approach'] if segmentation_parameters['segmentation_approach'] else ''}",
"cluster.parquet"
))

meta_clust = pd.DataFrame(index=['0'])
meta_clust.loc['0', 'color'] = '#1f77b4'
meta_clust.loc['0', 'count'] = 100
meta_clust.to_parquet(os.path.join(
path_landscape_files,
f"cell_clusters{'_' + segmentation_parameters['segmentation_approach'] if segmentation_parameters['segmentation_approach'] else ''}",
"meta_cluster.parquet"
))

df_cluster['cluster'] = df_cluster['cluster'].astype('string')

list_ser = []
for inst_cat in df_cluster['cluster'].unique():
if inst_cat is not None:
inst_cells = df_cluster[df_cluster['cluster'] == inst_cat].index.tolist()
inst_ser = cbg.loc[inst_cells].sum()/len(inst_cells)
inst_ser.name = inst_cat
list_ser.append(inst_ser)

df_sig = pd.concat(list_ser, axis=1)
df_sig.columns = df_sig.columns.tolist()

keep_genes = df_sig.index.tolist()
keep_genes = [x for x in keep_genes if 'Unassigned' not in x]
keep_genes = [x for x in keep_genes if 'NegControl' not in x]
keep_genes = [x for x in keep_genes if 'DeprecatedCodeword' not in x]

df_sig = df_sig.loc[keep_genes]

for col in df_sig.columns:
if isinstance(df_sig[col].dtype, pd.SparseDtype):
df_sig[col] = df_sig[col].sparse.to_dense()

df_sig.to_parquet(
os.path.join(
path_landscape_files,
f"df_sig{'_' + segmentation_parameters['segmentation_approach'] if segmentation_parameters['segmentation_approach'] else ''}.parquet"
)
)

else:
default_clusters_path = os.path.join(os.path.dirname(path_landscape_files), 'analysis/clustering/gene_expression_graphclust/clusters.csv')
default_clustering = pd.read_csv(default_clusters_path, index_col=0)

default_clustering = pd.DataFrame(default_clustering.values, index=default_clustering.index.tolist(), columns=['cluster'])

default_clustering_ini = pd.DataFrame(default_clustering.values, index=default_clustering.index.tolist(), columns=['cluster'])
default_clustering_ini['cluster'] = default_clustering_ini['cluster'].astype('string')

meta_cell = pd.read_parquet(os.path.join(path_landscape_files, 'cell_metadata.parquet'))

default_clustering = pd.DataFrame(index=meta_cell.index.tolist())
default_clustering.loc[default_clustering_ini.index.tolist(), 'cluster'] = default_clustering_ini['cluster']

default_clustering.to_parquet(os.path.join(path_landscape_files, f"cell_clusters_{segmentation_parameters['segmentation_approach']}/cluster.parquet"))
ser_counts = default_clustering['cluster'].value_counts()
clusters = ser_counts.index.tolist()
palettes = [plt.get_cmap(name).colors for name in plt.colormaps() if "tab" in name]
flat_colors = [color for palette in palettes for color in palette]
flat_colors_hex = [to_hex(color) for color in flat_colors]

colors = [
flat_colors_hex[i % len(flat_colors_hex)] if "Blank" not in cluster else "#FFFFFF"
for i, cluster in enumerate(clusters)
]

ser_color = pd.Series(colors, index=clusters, name='color')
meta_cluster = pd.DataFrame(ser_color)
meta_cluster['count'] = ser_counts
meta_cluster.to_parquet(os.path.join(path_landscape_files, f"cell_clusters_{segmentation_parameters['segmentation_approach']}/meta_cluster.parquet"))

df_meta = pd.read_csv(default_clusters_path, index_col=0)
df_meta['Cluster'] = df_meta['Cluster'].astype('string')
df_meta.columns = ['cluster']

meta_cell['cluster'] = df_meta['cluster']

list_ser = []
for inst_cat in meta_cell['cluster'].unique().tolist():
if inst_cat is not None:
inst_cells = meta_cell[meta_cell['cluster'] == inst_cat].index.tolist()
inst_ser = cbg.loc[inst_cells].sum()/len(inst_cells)
inst_ser.name = inst_cat

list_ser.append(inst_ser)

df_sig = pd.concat(list_ser, axis=1)

df_sig = pd.concat(list_ser, axis=1)
df_sig.columns = df_sig.columns.tolist()
df_sig.index = df_sig.index.tolist()

keep_genes = df_sig.index.tolist()
keep_genes = [x for x in keep_genes if 'Unassigned' not in x]
keep_genes = [x for x in keep_genes if 'NegControl' not in x]
keep_genes = [x for x in keep_genes if 'DeprecatedCodeword' not in x]

df_sig = df_sig.loc[keep_genes, clusters]

df_sig.sparse.to_dense().to_parquet(os.path.join(path_landscape_files, f"df_sig_{segmentation_parameters['segmentation_approach']}.parquet"))

class Network(object):
'''
Clustergrammer.py takes a matrix as input (either from a file of a Pandas DataFrame), normalizes/filters, hierarchically clusters, and produces the :ref:`visualization_json` for :ref:`clustergrammer_js`.
Expand Down Expand Up @@ -1568,4 +1766,4 @@ def make_df_from_cols(cols):

df_meta = pd.DataFrame(data=mat, index=rows, columns=cat_titles)

return df_meta
return df_meta
Loading