Skip to content

Commit

Permalink
Add feature to extract the gene expression matrix of a given cluster.…
Browse files Browse the repository at this point in the history
… Update tutorial. Update to version 0.3.0.
  • Loading branch information
mase5 committed Nov 8, 2018
1 parent 34956c5 commit 234d42c
Show file tree
Hide file tree
Showing 12 changed files with 341 additions and 42 deletions.
9 changes: 5 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
Package: SCopeLoomR
Type: Package
Title: Build .loom files compatible with SCope
Version: 0.2.2
Title: Build .loom files (compatible with SCope) and extract data from .loom files.
Version: 0.3.0
Author: mase5
Maintainer: mase5 <[email protected]>
Description: R package to build generic .loom files aligning with the default naming convention of the .loom format and
to integrate other data types e.g.: regulons (SCENIC), clusters from Seurat, ...
Imports: hdf5r, rjson, utils, methods, base, base64enc, igraph
to integrate other data types e.g.: regulons (SCENIC), clusters from Seurat, ... The package can also be used to extract
data from .loom files.
Imports: hdf5r, rjson, utils, methods, base, base64enc, igraph, plyr, rlist
Suggests: seurat, stringr
License: Apache-2
Encoding: UTF-8
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ export(create_trajectory_from_monocle)
export(finalize)
export(flush)
export(get_cell_ids)
export(get_cluster_dgem_by_name)
export(get_cluster_info_by_cluster_name)
export(get_clustering_by_id)
export(get_clusterings)
export(get_col_attr_by_key)
export(get_default_embedding)
export(get_dgem)
Expand Down
107 changes: 104 additions & 3 deletions R/loom.R
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ add_global_md_clustering<-function(loom
return (list(id = cluster.id
, description = description))
})
clusterings<-get_col_attr_by_key(loom = loom, key = CA_CLUSTERINGS_NAME)
clusterings<-get_clusterings(loom = loom)
clustering<-list(id = id
, group = group
, name = name
Expand Down Expand Up @@ -584,7 +584,7 @@ add_seurat_clustering<-function(loom
append_clustering_update_ca<-function(loom
, clustering.id
, clustering) {
ca.clusterings<-get_col_attr_by_key(loom = loom, key = CA_CLUSTERINGS_NAME)
ca.clusterings<-get_clusterings(loom = loom)
colnames(clustering)<-as.character(clustering.id)
# Append this clustering
ca.clusterings<-cbind(ca.clusterings, clustering)
Expand Down Expand Up @@ -695,7 +695,7 @@ add_annotated_clustering<-function(loom
# Adding the clustering data
if(col_attrs_exists_by_key(loom = loom, key = CA_CLUSTERINGS_NAME)) {
print(paste(CA_CLUSTERINGS_NAME, "already exists..."))
ca.clusterings<-get_col_attr_by_key(loom = loom, key = CA_CLUSTERINGS_NAME)
ca.clusterings<-get_clusterings(loom = loom)
# Set the clustering id
id<-ncol(ca.clusterings) # n clusterings (start at 0)
clustering<-data.frame("x" = as.integer(as.character(x = clusters)))
Expand Down Expand Up @@ -1437,3 +1437,104 @@ get_default_embedding<-function(loom) {
return (loom[["col_attrs"]][[CA_EMBEDDING_NAME]])
}

#'@title get_clustering_idx_by_cluster_name
#'@description Get index of the clutering related to the given cluster.name.
#'@param loom The loom file handler.
#'@param cluster.name The name of the cluster.
#'@return The index of the clustering in the clusterings metadata global attribute corresponding to the clustering where the given cluster.name is found.
get_clustering_idx_by_cluster_name<-function(loom
, cluster.name) {
library(rlist)
# Get global meta data
md<-get_global_meta_data(loom = loom)
# Unlist the nested meta data tree
tmp<-list.flatten(md$clusterings)
# Look for the given cluster.name
idx = match(x = cluster.name, table = tmp)
if(is.na(idx)) {
return (idx)
}
# Reverse search from idx till the first key id is found
for(i in idx:1) {
if(names(tmp)[i] == "id") id<-tmp[[i]]; break
}
return (list.findi(.data = md$clusterings, id == id))
}

#'@title get_cluster_info_by_cluster_name
#'@description Get cluster information (Clustering ID, Clustering Name, Clustering Group, Cluster ID, Cluster Name) of the given cluster.name.
#'@param loom The loom file handler.
#'@param cluster.name The name of the cluster.
#'@return The index of the clustering in the clusterings metadata global attribute corresponding to the clustering where the given cluster.name is found.
#'@export
get_cluster_info_by_cluster_name<-function(loom
, cluster.name) {
library(rlist)
# Get global meta data
md<-get_global_meta_data(loom = loom)
# Get the index of the clustering in the meta data clusterings
clustering.idx<-get_clustering_idx_by_cluster_name(loom = loom, cluster.name = cluster.name)
if(is.na(x = clustering.idx)) {
stop(paste0("The given cluster ", cluster.name, " does not exist in this .loom."))
}
clustering.id<-md$clusterings[[clustering.idx]]$id
cluster.idx<-list.findi(.data = md$clusterings[[clustering.idx]]$clusters, description == cluster.name)
return (list(clustering.id=clustering.id
, clustering.name=md$clusterings[[clustering.idx]]$name
, clustering.group=md$clusterings[[clustering.idx]]$group
, cluster.id=md$clusterings[[clustering.idx]]$clusters[[cluster.idx]]$id
, cluster.name=md$clusterings[[clustering.idx]]$clusters[[cluster.idx]]$description))
}

#'@title get_clusterings
#'@description Get clustering of the given loom.
#'@param loom The loom file handler.
#'@param cluster.name The name of the cluster.
#'@return A N-by-M data.frame containing the clusterings of the given loom. N represents the cells and M the clusterings.
#'@export
get_clusterings<-function(loom) {
return (get_col_attr_by_key(loom = loom, key = CA_CLUSTERINGS_NAME))
}

#'@title get_clustering_by_id
#'@description Get clustering with the given clustering.id of the given loom.
#'@param loom The loom file handler.
#'@param clustering.id The ID of the clustering.
#'@return A N-by-1 vector containing the cell assignments to each of the clusters of the clustering.
#'@export
get_clustering_by_id<-function(loom
, clustering.id) {
ca.clusterings<-get_clusterings(loom = loom)
return (ca.clusterings[, colnames(ca.clusterings)%in%clustering.id])
}

#'@title get_cell_mask_by_cluster_name
#'@description Get a cell mask for the given cluster.name of the given loom.
#'@param loom The loom file handler.
#'@param cluster.name The name of the cluster.
#'@return A N-by-1 boolean vector specifying which cells belong to the given cluster.name in the given loom.
get_cell_mask_by_cluster_name<-function(loom
, cluster.name) {
# Get the cluster info given the cluster.name
cluster.info<-get_cluster_info_by_cluster_name(loom = loom, cluster.name = cluster.name)
# Get the clustering related to the given cluster.name
ca.clustering<-get_clustering_by_id(loom = loom, clustering.id = cluster.info$clustering.id)
# Create the mask
return (ca.clustering%in%cluster.info$cluster.id)
}

#'@title get_cluster_dgem_by_name
#'@description Get a subset of the digital gene expression matrix containing only the cells in the cluster annotated by the given cluster.name.
#'@param loom The loom file handler.
#'@param cluster.name The name/description of the cluster.
#'@return A N-by-M matrix containing the gene expression levels of the cells in the cluster annotated by the given cluster.name. N represents the genes and M the cells.
#'@export
get_cluster_dgem_by_name<-function(loom
, cluster.name) {
# Get the cell mask for the given cluster.name
mask<-get_cell_mask_by_cluster_name(loom = loom, cluster.name = cluster.name)
dgem<-get_dgem(loom = loom)
return (dgem[, mask])
}


13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SCopeLoomR v0.2.2
An R package (compatible with SCope) to create generic .loom files and extend them with other data e.g.: SCENIC regulons, Seurat clusters and markers, ...
# SCopeLoomR v0.3.0
An R package (compatible with SCope) to create generic .loom files and extend them with other data e.g.: SCENIC regulons, Seurat clusters and markers, ... The package can also be used to extract
data from .loom files.

## Installation

Expand All @@ -12,10 +13,16 @@ install_github("aertslab/SCopeLoomR")
```

## Tutorial
You can find a tutorial on how to create .loom files [here](https://github.com/aertslab/SCopeLoomR/blob/master/vignettes/SCopeLoomR_tutorial.Rmd).
You can find a tutorial on how to create .loom files and extract data from them [here](https://github.com/aertslab/SCopeLoomR/blob/master/vignettes/SCopeLoomR_tutorial.Rmd).

## Version History

November 8, 2018

* Version 0.3.0
* Add feature to extract the gene expression matrix of a given cluster.
* Update [tutorial](https://github.com/aertslab/SCopeLoomR/blob/master/vignettes/SCopeLoomR_tutorial.Rmd).

October 31, 2018

* Version 0.2.2
Expand Down
19 changes: 19 additions & 0 deletions man/get_cell_mask_by_cluster_name.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/get_cluster_dgem_by_name.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/get_cluster_info_by_cluster_name.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/get_clustering_by_id.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/get_clustering_idx_by_cluster_name.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/get_clusterings.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 20 additions & 1 deletion vignettes/SCopeLoomR_tutorial.Rmd
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
---
title: "SCopeLoomR tutorial"
title: "SCopeLoomR Tutorial - Create a .loom file and extract data from a .loom"
package: r pkg_ver('SCopeLoomR')
output:
html_notebook:
toc: yes
html_document:
keep_md: true
df_print: paged
toc: yes
BiocStyle::html_document:
Expand Down Expand Up @@ -200,6 +201,24 @@ add_embedding(loom = loom, embedding = monocle.embedding, name = "Monocle (DDRTr
finalize(loom=loom)
```

# Extract data from a loom object

The .loom file related to this part of the tutorial can be downloaded at http://scope.aertslab.org in the left panel under `Drosophila` > `Brain`.

## Get the gene expression matrix

```{r}
loom<-open_loom(loom = "Aerts_Fly_AdultBrain_Filtered_57k.loom")
dgem<-get_dgem(loom = loom)
```

## Get the gene expression matrix of a given cluster

```{r}
loom<-open_loom(loom = "Aerts_Fly_AdultBrain_Filtered_57k.loom")
cluster.10.dgem<-get_cluster_dgem_by_name(loom = loom, cluster.name = "MBON - Cluster 57")
```




115 changes: 84 additions & 31 deletions vignettes/SCopeLoomR_tutorial.nb.html

Large diffs are not rendered by default.

0 comments on commit 234d42c

Please sign in to comment.