Title: | Combination Connectivity Mapping |
---|---|
Description: | Finds drugs and drug combinations that are predicted to reverse or mimic gene expression signatures. These drugs might reverse diseases or mimic healthy lifestyles. |
Authors: | Alex Pickering |
Maintainer: | Alex Pickering <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.33.0 |
Built: | 2024-11-18 03:12:58 UTC |
Source: | https://github.com/bioc/ccmap |
Function extracts mu (overall mean effect size) and dprimes (unbiased effect sizes from each contrast).
get_dprimes(es)
get_dprimes(es)
es |
Result of call to |
Result used to query connectivity map drugs and predicted drug combinations.
List containing:
meta |
Named numeric vector with overall mean effect sizes for all genes from meta-analysis. |
contrasts |
List of named numeric vectors (one per contrast) with unbiased effect sizes for all measured genes. |
library(crossmeta) library(lydata) data_dir <- system.file("extdata", package = "lydata") # gather GSE names gse_names <- c("GSE9601", "GSE15069", "GSE50841", "GSE34817", "GSE29689") # load previous differential expression analysis anals <- load_diff(gse_names, data_dir) # run meta-analysis es <- es_meta(anals) #get dprimes dprimes <- get_dprimes(es)
library(crossmeta) library(lydata) data_dir <- system.file("extdata", package = "lydata") # gather GSE names gse_names <- c("GSE9601", "GSE15069", "GSE50841", "GSE34817", "GSE29689") # load previous differential expression analysis anals <- load_diff(gse_names, data_dir) # run meta-analysis es <- es_meta(anals) #get dprimes dprimes <- get_dprimes(es)
Drugs with the largest positive and negative cosine similarity are predicted to, respectively, mimic and reverse the query signature. Values range from +1 to -1.
query_combos(query_genes, drug_info = c("cmap", "l1000"), method = c("average", "ml"), include = NULL, ncores = parallel::detectCores())
query_combos(query_genes, drug_info = c("cmap", "l1000"), method = c("average", "ml"), include = NULL, ncores = parallel::detectCores())
query_genes |
Named numeric vector of differentual expression values for
query genes. Usually 'meta' slot of |
drug_info |
Character vector specifying which dataset to query (either 'cmap' or 'l1000'). Can also provide a matrix of differential expression values for drugs or drug combinations (rows are genes, columns are drugs). |
method |
One of 'average' (default) or 'ml' (machine learning - see details and vignette). |
include |
Character vector of drug names for which combinations with all
other drugs will be predicted and queried. If |
ncores |
Integer, number of cores to use for method 'average'. Default is to use all cores. |
To predict and query all 856086 two-drug cmap combinations, the 'average'
method
can take as little as 10 minutes (Intel Core i7-6700). The 'ml'
(machine learning) method
takes two hours on the same hardware and
requires ~10GB of RAM but is slightly more accurate. Both methods will run
faster by specifying only a subset of drugs using the include
parameter.
To speed up the 'ml' method, the MRO+MKL distribution of R can help
substantially (link).
The combinations of LINCS l1000 signatures (~26 billion) can also be queried
using the 'average' method
. In order to compare l1000 results to those
obtained with cmap, only the same genes should be queried (see example).
Vector of cosine similarities between query and drug combination signatures.
library(lydata) library(crossmeta) # location of data data_dir <- system.file("extdata", package = "lydata") # gather GSE names gse_names <- c("GSE9601", "GSE15069", "GSE50841", "GSE34817", "GSE29689") # load previous analysis anals <- load_diff(gse_names, data_dir) # perform meta-analysis es <- es_meta(anals) # get dprimes dprimes <- get_dprimes(es) # query combinations of metformin and all other cmap drugs top_met_combos <- query_combos(dprimes$all$meta, include = 'metformin', ncores = 1) # previous query but with machine learning method # top_met_combos <- query_combos(dprimes$all$meta, method = 'ml', include = 'metformin') # query all cmap drug combinations # top_combos <- query_combos(dprimes$all$meta) # query all cmap drug combinations with machine learning method # top_combos <- query_combos(dprimes$all$meta, method = 'ml') # query l1000 and cmap using same genes # library(ccdata) # data(cmap_es) # data(l1000_es) # cmap_es <- cmap_es[row.names(l1000_es), ] # met_cmap <- query_combos(dprimes$all$meta, cmap_es, include = 'metformin') # met_l1000 <- query_combos(dprimes$all$meta, l1000_es, include = 'metformin')
library(lydata) library(crossmeta) # location of data data_dir <- system.file("extdata", package = "lydata") # gather GSE names gse_names <- c("GSE9601", "GSE15069", "GSE50841", "GSE34817", "GSE29689") # load previous analysis anals <- load_diff(gse_names, data_dir) # perform meta-analysis es <- es_meta(anals) # get dprimes dprimes <- get_dprimes(es) # query combinations of metformin and all other cmap drugs top_met_combos <- query_combos(dprimes$all$meta, include = 'metformin', ncores = 1) # previous query but with machine learning method # top_met_combos <- query_combos(dprimes$all$meta, method = 'ml', include = 'metformin') # query all cmap drug combinations # top_combos <- query_combos(dprimes$all$meta) # query all cmap drug combinations with machine learning method # top_combos <- query_combos(dprimes$all$meta, method = 'ml') # query l1000 and cmap using same genes # library(ccdata) # data(cmap_es) # data(l1000_es) # cmap_es <- cmap_es[row.names(l1000_es), ] # met_cmap <- query_combos(dprimes$all$meta, cmap_es, include = 'metformin') # met_l1000 <- query_combos(dprimes$all$meta, l1000_es, include = 'metformin')
The 230829 LINCS l1000 signatures (drugs & genetic over/under expression) can also be queried. In order to compare l1000 results to those obtained with cmap, only the same genes should be included (see second example).
query_drugs(query_genes, drug_info = c("cmap", "l1000"), sorted = TRUE, ngenes = 200, path = NULL)
query_drugs(query_genes, drug_info = c("cmap", "l1000"), sorted = TRUE, ngenes = 200, path = NULL)
query_genes |
Named numeric vector of differentual expression values for
query genes. Usually 'meta' slot of |
drug_info |
Character vector specifying which dataset to query (either 'cmap' or 'l1000'). Can also provide a matrix of differential expression values for drugs or drug combinations (rows are genes, columns are drugs). |
sorted |
Would you like the results sorted by decreasing similarity? Default is TRUE. |
ngenes |
The number of top differentially-regulated (up and down) query genes
to use if |
path |
Character vector specifying KEGG pathway. Used to find drugs that most closely mimic or reverse query signature for specific pathway. |
Vector of pearson correlations between query and drug combination signatures.
query_combos
to get similarity between query and
predicted drug combination signatures. diff_path and path_meta
to perform pathway meta-analysis.
# Example 1 ----- library(crossmeta) library(ccdata) library(lydata) data_dir <- system.file("extdata", package = "lydata") data(cmap_es) # gather GSE names gse_names <- c("GSE9601", "GSE15069", "GSE50841", "GSE34817", "GSE29689") # load previous differential expression analysis anals <- load_diff(gse_names, data_dir) # run meta-analysis es <- es_meta(anals) # get meta-analysis effect size values dprimes <- get_dprimes(es) # most significant pathway (from path_meta) path <- 'Amino sugar and nucleotide sugar metabolism' # query using entire transcriptional profile topd <- query_drugs(dprimes$all$meta, cmap_es) # query restricted to transcriptional profile for above pathway topd_path <- query_drugs(dprimes$all$meta, cmap_es, path=path) # Example 2 ----- # create drug signatures genes <- paste("GENE", 1:1000, sep = "_") set.seed(0) drug_info <- data.frame(row.names = genes, drug1 = rnorm(1000, sd = 2), drug2 = rnorm(1000, sd = 2), drug3 = rnorm(1000, sd = 2)) # query signature is drug3 query_sig <- drug_info$drug3 names(query_sig) <- genes res <- query_drugs(query_sig, as.matrix(drug_info)) # use only common genes for l1000 and cmap matrices # library(ccdata) # data(cmap_es) # data(l1000_es) # cmap_es <- cmap_es[row.names(l1000_es), ]
# Example 1 ----- library(crossmeta) library(ccdata) library(lydata) data_dir <- system.file("extdata", package = "lydata") data(cmap_es) # gather GSE names gse_names <- c("GSE9601", "GSE15069", "GSE50841", "GSE34817", "GSE29689") # load previous differential expression analysis anals <- load_diff(gse_names, data_dir) # run meta-analysis es <- es_meta(anals) # get meta-analysis effect size values dprimes <- get_dprimes(es) # most significant pathway (from path_meta) path <- 'Amino sugar and nucleotide sugar metabolism' # query using entire transcriptional profile topd <- query_drugs(dprimes$all$meta, cmap_es) # query restricted to transcriptional profile for above pathway topd_path <- query_drugs(dprimes$all$meta, cmap_es, path=path) # Example 2 ----- # create drug signatures genes <- paste("GENE", 1:1000, sep = "_") set.seed(0) drug_info <- data.frame(row.names = genes, drug1 = rnorm(1000, sd = 2), drug2 = rnorm(1000, sd = 2), drug3 = rnorm(1000, sd = 2)) # query signature is drug3 query_sig <- drug_info$drug3 names(query_sig) <- genes res <- query_drugs(query_sig, as.matrix(drug_info)) # use only common genes for l1000 and cmap matrices # library(ccdata) # data(cmap_es) # data(l1000_es) # cmap_es <- cmap_es[row.names(l1000_es), ]
Equivalent to computing the cumulative sum of a matrix over rows, then over columns, then suming every value (though much faster and more memory efficient).
sum_rowcolCumsum(x, i, j)
sum_rowcolCumsum(x, i, j)
x |
Numeric vector of non-zero values of matrix. |
i |
Integer vector of row indices of x. |
j |
Integer vector of column indices of x. |
Numeric value equal to the sum of the cumulative sum computed over rows then columns of a matrix.
x <- c(1, 1, 1, -1) # non-zero values of matrix i <- c(1, 2, 3, 4) # row indices of x j <- c(4, 1, 3, 2) # col indices of x sum_rowcolCumsum(x, i, j)
x <- c(1, 1, 1, -1) # non-zero values of matrix i <- c(1, 2, 3, 4) # row indices of x j <- c(4, 1, 3, 2) # col indices of x sum_rowcolCumsum(x, i, j)