Title: | Calculate similarity of clusters from different single cell samples using foldchanges |
---|---|
Description: | This package calculates a similarity coefficient using the fold changes of shared features (e.g. genes) among clusters of different samples/batches/datasets. The similarity coefficient is calculated using the dot-product (Hadamard product) of every pairwise combination of Fold Changes between a source cluster i of sample/dataset n and all the target clusters j in sample/dataset m |
Authors: | Oscar Gonzalez-Velasco [cre, aut] |
Maintainer: | Oscar Gonzalez-Velasco <[email protected]> |
License: | Artistic-2.0 |
Version: | 1.3.0 |
Built: | 2024-12-19 03:29:29 UTC |
Source: | https://github.com/bioc/ClusterFoldSimilarity |
'clusterFoldSimilarity()' returns a dataframe containing the best top similarities between all possible pairs of single cell samples.
clusterFoldSimilarity( scList = NULL, sampleNames = NULL, topN = 1, topNFeatures = 1, nSubsampling = 15, parallel = FALSE )
clusterFoldSimilarity( scList = NULL, sampleNames = NULL, topN = 1, topNFeatures = 1, nSubsampling = 15, parallel = FALSE )
scList |
List. A list of Single Cell Experiments or Seurat objects. At least 2 are needed. The objects are expected to have cluster or label groups set as identity class. |
sampleNames |
Character Vector. Specify the sample names, if not a number corresponding with its position on (scList). |
topN |
Numeric. Specifies the number of target clusters with best similarity to report for each cluster comparison (default 1). If set to Inf, then all similarity values from all possible pairs of clusters are returned. |
topNFeatures |
Numeric. Number of top features that explains the clusters similarity to report for each cluster comparison (default 1). If topN = Inf then topNFeatures is automatically set to 1. |
nSubsampling |
Numeric. Number of random sampling of cells to achieve fold change stability (default 15). |
parallel |
Boolean. Whether to use parallel computing using BiocParallel or not (default FALSE). |
This function will calculate a similarity coeficient using the fold changes of shared features (e.g.:genes for a single-cell RNA-Seq, peaks for ATAC-Seq) among clusters, or user-defined-groups, from different samples/batches. The similarity coeficient is calculated using the dotproduct of every pairwise combination of Fold Changes between a source cluster/group i from sample n and all the target clusters/groups in sample j.
The function returns a DataFrame containing the best top similarities between all possible pairs of single cell samples. Column values are:
similarityValue |
The top similarity value calculated between datasetL:clusterL and datasetR. |
w |
Weight associated with the similarity score value. |
datasetL |
Dataset left, the dataset/sample which has been used to be compared. |
clusterL |
Cluster left, the cluster source from datasetL which has been compared. |
datasetR |
Dataset right, the dataset/sample used for comparison against datasetL. |
clusterR |
Cluster right, the cluster target from datasetR which is being compared with the clusterL from datasetL. |
topFeatureConserved |
The features (e.g.: genes, peaks...) that most contributed to the similarity between clusterL & clusterR. |
featureScore |
The similarity score contribution for the specific topFeatureConserved (e.g.: genes, peaks...). |
Oscar Gonzalez-Velasco
if (requireNamespace("Seurat") & requireNamespace("SeuratObject")){ library(ClusterFoldSimilarity) library(Seurat) library(SeuratObject) # data dimensions nfeatures <- 2000; ncells <- 400 # single-cell 1 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu1 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu1) <- "cluster" # single-cell 2 counts <- matrix(rpois(n=nfeatures * ncells, lambda=20), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3","Cluster4"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu2 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu2) <- "cluster" # Create a list with the unprocessed single-cell datasets singlecellObjectList <- list(seu1, seu2) similarityTable <- clusterFoldSimilarity(scList=singlecellObjectList, sampleNames = c("sc1","sc2")) head(similarityTable) }
if (requireNamespace("Seurat") & requireNamespace("SeuratObject")){ library(ClusterFoldSimilarity) library(Seurat) library(SeuratObject) # data dimensions nfeatures <- 2000; ncells <- 400 # single-cell 1 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu1 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu1) <- "cluster" # single-cell 2 counts <- matrix(rpois(n=nfeatures * ncells, lambda=20), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3","Cluster4"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu2 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu2) <- "cluster" # Create a list with the unprocessed single-cell datasets singlecellObjectList <- list(seu1, seu2) similarityTable <- clusterFoldSimilarity(scList=singlecellObjectList, sampleNames = c("sc1","sc2")) head(similarityTable) }
'findCommunitiesSimmilarity()' Find communities by constructing and clustering graph using the similarity values calculated by ClusterFoldSimilarity().
findCommunitiesSimmilarity(similarityTable = NULL)
findCommunitiesSimmilarity(similarityTable = NULL)
similarityTable |
Dataframe. A table obtained from ClusterFoldSimilarity that contains the similarity values as a column "similarityValue" that represents the similarity of a source cluster to a target cluster. |
This function will group together nodes of the network into communities using the InfoMap community detection algorithm.
This function returns a data frame with the community that each node of the network (cell groups defined by the user) belongs to, and plots a graph in which the nodes are clusters from a specific dataset, the edges represent the similarity and the direction of that similarity between clusters.
sample |
The sample name. |
group |
The group/cluster from sample defined by the user. |
community |
Community group to which the sample:group belongs to. |
Oscar Gonzalez-Velasco
if (requireNamespace("Seurat") & requireNamespace("SeuratObject")){ library(ClusterFoldSimilarity) library(Seurat) library(SeuratObject) # data dimensions nfeatures <- 2000; ncells <- 400 # single-cell 1 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu1 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu1) <- "cluster" # single-cell 2 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3","Cluster4"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu2 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu2) <- "cluster" # Create a list with the unprocessed single-cell datasets singlecellObjectList <- list(seu1, seu2) similarityTable <- clusterFoldSimilarity(scList = singlecellObjectList, sampleNames = c("sc1","sc2")) head(similarityTable) findCommunitiesSimmilarity(similarityTable=similarityTable) }
if (requireNamespace("Seurat") & requireNamespace("SeuratObject")){ library(ClusterFoldSimilarity) library(Seurat) library(SeuratObject) # data dimensions nfeatures <- 2000; ncells <- 400 # single-cell 1 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu1 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu1) <- "cluster" # single-cell 2 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3","Cluster4"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu2 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu2) <- "cluster" # Create a list with the unprocessed single-cell datasets singlecellObjectList <- list(seu1, seu2) similarityTable <- clusterFoldSimilarity(scList = singlecellObjectList, sampleNames = c("sc1","sc2")) head(similarityTable) findCommunitiesSimmilarity(similarityTable=similarityTable) }
'plotClustersGraph()' Creates a graph plot using the similarity values calculated with ClusterFoldSimilarity().
plotClustersGraph(similarityTable = NULL)
plotClustersGraph(similarityTable = NULL)
similarityTable |
Dataframe. A table obtained from ClusterFoldSimilarity that contains the similarity values as a column "similarityValue" that represents the similarity of a source cluster to a target cluster. |
This function will calculate a similarity coeficient using the fold changes of shared genes among clusters of different samples/batches. The similarity coeficient is calculated using the dotproduct of every pairwise combination of Fold Changes between a source cluster i of sample n and all the target clusters in sample j.
This function plots a graph in which the nodes are clusters from a specific dataset, the edges represent the similarity and the direction of that similarity between clusters.
Oscar Gonzalez-Velasco
if (requireNamespace("Seurat") & requireNamespace("SeuratObject")){ library(ClusterFoldSimilarity) library(Seurat) library(SeuratObject) # data dimensions nfeatures <- 2000; ncells <- 400 # single-cell 1 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu1 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu1) <- "cluster" # single-cell 2 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3","Cluster4"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu2 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu2) <- "cluster" # Create a list with the unprocessed single-cell datasets singlecellObjectList <- list(seu1, seu2) similarityTable <- clusterFoldSimilarity(scList = singlecellObjectList, sampleNames = c("sc1","sc2")) head(similarityTable) plotClustersGraph(similarityTable=similarityTable) }
if (requireNamespace("Seurat") & requireNamespace("SeuratObject")){ library(ClusterFoldSimilarity) library(Seurat) library(SeuratObject) # data dimensions nfeatures <- 2000; ncells <- 400 # single-cell 1 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu1 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu1) <- "cluster" # single-cell 2 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3","Cluster4"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu2 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu2) <- "cluster" # Create a list with the unprocessed single-cell datasets singlecellObjectList <- list(seu1, seu2) similarityTable <- clusterFoldSimilarity(scList = singlecellObjectList, sampleNames = c("sc1","sc2")) head(similarityTable) plotClustersGraph(similarityTable=similarityTable) }
'similarityHeatmap()' returns a ggplot heatmap representing the similarity values between pairs of clusters as obtained from clusterFoldSimilarity.
similarityHeatmap( similarityTable = NULL, mainDataset = NULL, otherDatasets = NULL, highlightTop = TRUE )
similarityHeatmap( similarityTable = NULL, mainDataset = NULL, otherDatasets = NULL, highlightTop = TRUE )
similarityTable |
A DataFrame containing the similarities between all possible pairs of single cell samples obtained with clusterFoldSimilarity using the option n_top=Inf. |
mainDataset |
Numeric. Specify the main dataset (y axis). It corresponds with the datasetL column from the similarityTable |
otherDatasets |
Numeric. Specify some specific dataset to be ploted along the mainDataset (x axis, default: all other datasets found on datasetR column from similarity_table). |
highlightTop |
Boolean. If the top 2 similarity values should be highlighted on the heatmap (default: TRUE) |
This function plots a heatmap using ggplot. It is intended to be used with the output table from clusterFoldSimilarity, which includes the columns: datasetL (the dataset used for comparison) datasetR (the dataset against datasetL has been contrasted), clusterL (clusters from datasetL), clusterR (clusters from datasetR) and the similarityValue.
The function returns a heatmap ggplot object.
Oscar Gonzalez-Velasco
if (requireNamespace("Seurat") & requireNamespace("SeuratObject")){ library(ClusterFoldSimilarity) library(Seurat) library(SeuratObject) # data dimensions nfeatures <- 2000; ncells <- 400 # single-cell 1 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu1 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu1) <- "cluster" # single-cell 2 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3","Cluster4"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu2 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu2) <- "cluster" # Create a list with the unprocessed single-cell datasets singlecellObjectList <- list(seu1, seu2) # Using topN = Inf by default plots a heatmap using the similarity values: similarityTableAll <- clusterFoldSimilarity(scList=singlecellObjectList, topN=Inf) # Using the dataset 2 as a reference on the Y-axis of the heatmap: similarityHeatmap(similarityTable=similarityTableAll, mainDataset=2, highlightTop=FALSE) }
if (requireNamespace("Seurat") & requireNamespace("SeuratObject")){ library(ClusterFoldSimilarity) library(Seurat) library(SeuratObject) # data dimensions nfeatures <- 2000; ncells <- 400 # single-cell 1 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu1 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu1) <- "cluster" # single-cell 2 counts <- matrix(rpois(n=nfeatures * ncells, lambda=10), nfeatures) rownames(counts) <- paste0("gene",seq(nfeatures)) colnames(counts) <- paste0("cell",seq(ncells)) colData <- data.frame(cluster=sample(c("Cluster1","Cluster2","Cluster3","Cluster4"),size = ncells,replace = TRUE), row.names=paste0("cell",seq(ncells))) seu2 <- SeuratObject::CreateSeuratObject(counts = counts, meta.data = colData) Idents(object = seu2) <- "cluster" # Create a list with the unprocessed single-cell datasets singlecellObjectList <- list(seu1, seu2) # Using topN = Inf by default plots a heatmap using the similarity values: similarityTableAll <- clusterFoldSimilarity(scList=singlecellObjectList, topN=Inf) # Using the dataset 2 as a reference on the Y-axis of the heatmap: similarityHeatmap(similarityTable=similarityTableAll, mainDataset=2, highlightTop=FALSE) }