Title: | Identification of candidate causal perturbations from differential gene expression data |
---|---|
Description: | Compare differential gene expression results with those from known cellular perturbations (such as gene knock-down, overexpression or small molecules) derived from the Connectivity Map. Such analyses allow not only to infer the molecular causes of the observed difference in gene expression but also to identify small molecules that could drive or revert specific transcriptomic alterations. |
Authors: | Bernardo P. de Almeida [aut], Nuno Saraiva-Agostinho [aut, cre], Nuno L. Barbosa-Morais [aut, led] |
Maintainer: | Nuno Saraiva-Agostinho <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.25.0 |
Built: | 2024-12-29 06:40:28 UTC |
Source: | https://github.com/bioc/cTRAP |
Analyse drug set enrichment
analyseDrugSetEnrichment( sets, stats, col = NULL, nperm = 10000, maxSize = 500, ..., keyColSets = NULL, keyColStats = NULL )
analyseDrugSetEnrichment( sets, stats, col = NULL, nperm = 10000, maxSize = 500, ..., keyColSets = NULL, keyColStats = NULL )
sets |
Named list of characters: named sets containing compound
identifiers (obtain drug sets by running |
stats |
Named numeric vector or either a |
col |
Character: name of the column to use for statistics (only required
if class of |
nperm |
Number of permutations to do. Minimial possible nominal p-value is about 1/nperm |
maxSize |
Maximal size of a gene set to test. All pathways above the threshold are excluded. |
... |
Arguments passed on to
|
keyColSets |
Character: column from |
keyColStats |
Character: column from |
Enrichment analysis based on GSEA
Other functions for drug set enrichment analysis:
loadDrugDescriptors()
,
plotDrugSetEnrichment()
,
prepareDrugSets()
descriptors <- loadDrugDescriptors() drugSets <- prepareDrugSets(descriptors) # Analyse drug set enrichment in ranked targeting drugs for a differential # expression profile data("diffExprStat") gdsc <- loadExpressionDrugSensitivityAssociation("GDSC") predicted <- predictTargetingDrugs(diffExprStat, gdsc) analyseDrugSetEnrichment(drugSets, predicted)
descriptors <- loadDrugDescriptors() drugSets <- prepareDrugSets(descriptors) # Analyse drug set enrichment in ranked targeting drugs for a differential # expression profile data("diffExprStat") gdsc <- loadExpressionDrugSensitivityAssociation("GDSC") predicted <- predictTargetingDrugs(diffExprStat, gdsc) analyseDrugSetEnrichment(drugSets, predicted)
Cross Tabulation and Table Creation
## S3 method for class 'referenceComparison' as.table(x, ..., clean = TRUE)
## S3 method for class 'referenceComparison' as.table(x, ..., clean = TRUE)
x |
|
... |
Extra parameters not currently used |
clean |
Boolean: only show certain columns (to avoid redundancy)? |
Complete table with metadata based on a targetingDrugs
object
Other functions related with the ranking of CMap perturbations:
filterCMapMetadata()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
loadCMapZscores()
,
parseCMapID()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
Other functions related with the prediction of targeting drugs:
listExpressionDrugSensitivityAssociation()
,
loadExpressionDrugSensitivityAssociation()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
predictTargetingDrugs()
Convert ENSEMBL gene identifiers to gene symbols
convertENSEMBLtoGeneSymbols( genes, dataset = "hsapiens_gene_ensembl", mart = "ensembl" )
convertENSEMBLtoGeneSymbols( genes, dataset = "hsapiens_gene_ensembl", mart = "ensembl" )
genes |
Character: ENSEMBL gene identifiers |
dataset |
Character: |
mart |
Character: |
Named character vector where names are the input ENSEMBL gene identifiers and the values are the matching gene symbols
Convert gene identifiers
convertGeneIdentifiers( genes, annotation = "Homo sapiens", key = "ENSEMBL", target = "SYMBOL", ignoreDuplicatedTargets = TRUE )
convertGeneIdentifiers( genes, annotation = "Homo sapiens", key = "ENSEMBL", target = "SYMBOL", ignoreDuplicatedTargets = TRUE )
genes |
Character: genes to be converted |
annotation |
|
key |
Character: type of identifier used, e.g. |
target |
Character: type of identifier to convert to; read
|
ignoreDuplicatedTargets |
Boolean: if |
Character vector of the respective targets of gene identifiers. The
previous identifiers remain other identifiers have the same target (in case
ignoreDuplicatedTargets = TRUE
) or if no target was found.
genes <- c("ENSG00000012048", "ENSG00000083093", "ENSG00000141510", "ENSG00000051180") convertGeneIdentifiers(genes) convertGeneIdentifiers(genes, key="ENSEMBL", target="UNIPROT") # Explicit species name to automatically look for its OrgDb database sp <- "Homo sapiens" genes <- c("ENSG00000012048", "ENSG00000083093", "ENSG00000141510", "ENSG00000051180") convertGeneIdentifiers(genes, sp) # Alternatively, set the annotation database directly ah <- AnnotationHub::AnnotationHub() sp <- AnnotationHub::query(ah, c("OrgDb", "Homo sapiens"))[[1]] columns(sp) # these attributes can be used to change the attributes convertGeneIdentifiers(genes, sp)
genes <- c("ENSG00000012048", "ENSG00000083093", "ENSG00000141510", "ENSG00000051180") convertGeneIdentifiers(genes) convertGeneIdentifiers(genes, key="ENSEMBL", target="UNIPROT") # Explicit species name to automatically look for its OrgDb database sp <- "Homo sapiens" genes <- c("ENSG00000012048", "ENSG00000083093", "ENSG00000141510", "ENSG00000051180") convertGeneIdentifiers(genes, sp) # Alternatively, set the annotation database directly ah <- AnnotationHub::AnnotationHub() sp <- AnnotationHub::query(ah, c("OrgDb", "Homo sapiens"))[[1]] columns(sp) # these attributes can be used to change the attributes convertGeneIdentifiers(genes, sp)
Compare differential gene expression results with those from big datasets (e.g. CMap), allowing to infer which types of perturbations may explain the observed difference in gene expression.
Optimised to run in ShinyProxy with Celery/Flower backend with argument
shinyproxy = TRUE
.
cTRAP( ..., commonPath = "data", expire = 14, fileSizeLimitMiB = 50, flowerURL = NULL, port = getOption("shiny.port"), host = getOption("shiny.host", "127.0.0.1") )
cTRAP( ..., commonPath = "data", expire = 14, fileSizeLimitMiB = 50, flowerURL = NULL, port = getOption("shiny.port"), host = getOption("shiny.host", "127.0.0.1") )
... |
Objects |
commonPath |
Character: path where to store data common to all sessions |
expire |
Character: days until a session expires (message purposes only) |
fileSizeLimitMiB |
Numeric: file size limit in MiB |
flowerURL |
Character: Flower REST API's URL ( |
port |
The TCP port that the application should listen on. If the
|
host |
The IPv4 address that the application should listen on. Defaults
to the |
Input: To use this package, a named vector of differentially expressed gene metric is needed, where its values represent the significance and magnitude of the differentially expressed genes (e.g. t-statistic) and its names are gene symbols.
Workflow: The differentially expressed genes will be compared against selected perturbation conditions by:
Spearman or Pearson correlation with z-scores of differentially
expressed genes after perturbations from CMap. Use function
rankSimilarPerturbations
with method = "spearman"
or
method = "pearson"
Gene set enrichment analysis (GSEA) using the (around) 12 000 genes
from CMap. Use function rankSimilarPerturbations
with
method = gsea
.
Available perturbation conditions for CMap include:
Cell line(s).
Perturbation type (gene knockdown, gene upregulation or drug intake).
Drug concentration.
Time points.
Values for each perturbation type can be listed with
getCMapPerturbationTypes()
Output: The output includes a data frame of ranked perturbations based on the associated statistical values and respective p-values.
Launches result viewer and plotter (returns NULL
)
Maintainer: Nuno Saraiva-Agostinho [email protected]
Authors:
Bernardo P. de Almeida
Nuno L. Barbosa-Morais [lead]
Useful links:
Report bugs at https://github.com/nuno-agostinho/cTRAP/issues
Other visual interface functions:
launchCMapDataLoader()
,
launchDiffExprLoader()
,
launchDrugSetEnrichmentAnalyser()
,
launchMetadataViewer()
,
launchResultPlotter()
expressionDrugSensitivityAssociation
objectsOperations on expressionDrugSensitivityAssociation
objects
## S3 method for class 'expressionDrugSensitivityAssociation' dimnames(x) ## S3 method for class 'expressionDrugSensitivityAssociation' dim(x) ## S3 method for class 'expressionDrugSensitivityAssociation' x[i, j, drop = FALSE, ...]
## S3 method for class 'expressionDrugSensitivityAssociation' dimnames(x) ## S3 method for class 'expressionDrugSensitivityAssociation' dim(x) ## S3 method for class 'expressionDrugSensitivityAssociation' x[i, j, drop = FALSE, ...]
x |
An |
i , j
|
Character or numeric indexes specifying elements to extract |
drop |
Boolean: coerce result to the lowest possible dimension? |
... |
Extra arguments given to other methods |
Subset, dimension or dimension names
Download metadata for ENCODE knockdown experiments
downloadENCODEknockdownMetadata( cellLine = NULL, gene = NULL, file = "ENCODEmetadata.rds" )
downloadENCODEknockdownMetadata( cellLine = NULL, gene = NULL, file = "ENCODEmetadata.rds" )
cellLine |
Character: cell line |
gene |
Character: target gene |
file |
Character: RDS filepath with metadata (if file doesn't exist, it will be created) |
Data frame containing ENCODE knockdown experiment metadata
Other functions related with using ENCODE expression data:
loadENCODEsamples()
,
performDifferentialExpression()
,
prepareENCODEgeneExpression()
downloadENCODEknockdownMetadata("HepG2", "EIF4G1")
downloadENCODEknockdownMetadata("HepG2", "EIF4G1")
Filter CMap metadata
filterCMapMetadata( metadata, cellLine = NULL, timepoint = NULL, dosage = NULL, perturbationType = NULL )
filterCMapMetadata( metadata, cellLine = NULL, timepoint = NULL, dosage = NULL, perturbationType = NULL )
metadata |
Data frame (CMap metadata) or character (respective filepath) |
cellLine |
Character: cell line (if |
timepoint |
Character: timepoint (if |
dosage |
Character: dosage (if |
perturbationType |
Character: type of perturbation (if |
Filtered CMap metadata
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
loadCMapZscores()
,
parseCMapID()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
cmapMetadata <- loadCMapData("cmapMetadata.txt", "metadata") filterCMapMetadata(cmapMetadata, cellLine="HEPG2", timepoint="2 h", dosage="25 ng/mL")
cmapMetadata <- loadCMapData("cmapMetadata.txt", "metadata") filterCMapMetadata(cmapMetadata, cellLine="HEPG2", timepoint="2 h", dosage="25 ng/mL")
Downloads metadata if not available
getCMapConditions( metadata, cellLine = NULL, timepoint = NULL, dosage = NULL, perturbationType = NULL, control = FALSE )
getCMapConditions( metadata, cellLine = NULL, timepoint = NULL, dosage = NULL, perturbationType = NULL, control = FALSE )
metadata |
Data frame (CMap metadata) or character (respective filepath) |
cellLine |
Character: cell line (if |
timepoint |
Character: timepoint (if |
dosage |
Character: dosage (if |
perturbationType |
Character: type of perturbation (if |
control |
Boolean: show controls for perturbation types? |
List of conditions in CMap datasets
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
loadCMapZscores()
,
parseCMapID()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
## Not run: cmapMetadata <- loadCMapData("cmapMetadata.txt", "metadata") ## End(Not run) getCMapConditions(cmapMetadata)
## Not run: cmapMetadata <- loadCMapData("cmapMetadata.txt", "metadata") ## End(Not run) getCMapConditions(cmapMetadata)
Get CMap perturbation types
getCMapPerturbationTypes(control = FALSE)
getCMapPerturbationTypes(control = FALSE)
control |
Boolean: return perturbation types used as control? |
Perturbation types and respective codes as used by CMap datasets
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapConditions()
,
loadCMapData()
,
loadCMapZscores()
,
parseCMapID()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
getCMapPerturbationTypes()
getCMapPerturbationTypes()
Load CMap data via a visual interface
launchCMapDataLoader( metadata = "cmapMetadata.txt", zscores = "cmapZscores.gctx", geneInfo = "cmapGeneInfo.txt", compoundInfo = "cmapCompoundInfo.txt", cellLine = NULL, timepoint = NULL, dosage = NULL, perturbationType = NULL )
launchCMapDataLoader( metadata = "cmapMetadata.txt", zscores = "cmapZscores.gctx", geneInfo = "cmapGeneInfo.txt", compoundInfo = "cmapCompoundInfo.txt", cellLine = NULL, timepoint = NULL, dosage = NULL, perturbationType = NULL )
metadata |
Data frame (CMap metadata) or character (respective filepath) |
zscores |
Data frame (GCTX z-scores) or character (respective filepath to load data from file) |
geneInfo |
Data frame (CMap gene info) or character (respective filepath to load data from file) |
compoundInfo |
Data frame (CMap compound info) or character (respective filepath to load data from file) |
cellLine |
Character: cell line (if |
timepoint |
Character: timepoint (if |
dosage |
Character: dosage (if |
perturbationType |
Character: type of perturbation (if |
CMap data
Other visual interface functions:
cTRAP()
,
launchDiffExprLoader()
,
launchDrugSetEnrichmentAnalyser()
,
launchMetadataViewer()
,
launchResultPlotter()
Currently only supports loading data from ENCODE knockdown experiments
launchDiffExprLoader( cellLine = NULL, gene = NULL, file = "ENCODEmetadata.rds", path = "." )
launchDiffExprLoader( cellLine = NULL, gene = NULL, file = "ENCODEmetadata.rds", path = "." )
cellLine |
Character: cell line |
gene |
Character: target gene |
file |
Character: RDS filepath with metadata (if file doesn't exist, it will be created) |
path |
Character: path where to download files |
Differential expression data
Other visual interface functions:
cTRAP()
,
launchCMapDataLoader()
,
launchDrugSetEnrichmentAnalyser()
,
launchMetadataViewer()
,
launchResultPlotter()
View and plot results via a visual interface
launchDrugSetEnrichmentAnalyser(sets, ...)
launchDrugSetEnrichmentAnalyser(sets, ...)
sets |
Named list of characters: named sets containing compound
identifiers (obtain drug sets by running |
... |
Objects |
Launches result viewer and plotter (returns NULL
)
Other visual interface functions:
cTRAP()
,
launchCMapDataLoader()
,
launchDiffExprLoader()
,
launchMetadataViewer()
,
launchResultPlotter()
View metadata via a visual interface
launchMetadataViewer(...)
launchMetadataViewer(...)
... |
Objects |
Metadata viewer (returns NULL
)
Other visual interface functions:
cTRAP()
,
launchCMapDataLoader()
,
launchDiffExprLoader()
,
launchDrugSetEnrichmentAnalyser()
,
launchResultPlotter()
View and plot results via a visual interface
launchResultPlotter(...)
launchResultPlotter(...)
... |
Objects |
Launches result viewer and plotter (returns NULL
)
Other visual interface functions:
cTRAP()
,
launchCMapDataLoader()
,
launchDiffExprLoader()
,
launchDrugSetEnrichmentAnalyser()
,
launchMetadataViewer()
List available gene expression and drug sensitivity correlation matrices
listExpressionDrugSensitivityAssociation(url = FALSE)
listExpressionDrugSensitivityAssociation(url = FALSE)
url |
Boolean: return download link? |
Character vector of available gene expression and drug sensitivity correlation matrices
Other functions related with the prediction of targeting drugs:
as.table.referenceComparison()
,
loadExpressionDrugSensitivityAssociation()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
predictTargetingDrugs()
listExpressionDrugSensitivityAssociation()
listExpressionDrugSensitivityAssociation()
Load CMap data (if not found, file
will be automatically downloaded)
loadCMapData( file, type = c("metadata", "geneInfo", "zscores", "compoundInfo"), zscoresID = NULL )
loadCMapData( file, type = c("metadata", "geneInfo", "zscores", "compoundInfo"), zscoresID = NULL )
file |
Character: path to file |
type |
Character: type of data to load ( |
zscoresID |
Character: identifiers to partially load z-scores file
(for performance reasons; if |
Metadata as a data table
If type = "compoundInfo"
, two files from
The Drug Repurposing Hub will be downloaded containing information
about drugs and perturbations. The files will be named file
with
_drugs
and _samples
before their extension, respectively.
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapZscores()
,
parseCMapID()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
# Load CMap metadata (data is automatically downloaded if not available) cmapMetadata <- loadCMapData("cmapMetadata.txt", "metadata") # Load CMap gene info loadCMapData("cmapGeneInfo.txt", "geneInfo") ## Not run: # Load CMap zscores based on filtered metadata cmapMetadataKnockdown <- filterCMapMetadata( cmapMetadata, cellLine="HepG2", perturbationType="Consensus signature from shRNAs targeting the same gene") loadCMapData("cmapZscores.gctx.gz", "zscores", cmapMetadataKnockdown$sig_id) ## End(Not run)
# Load CMap metadata (data is automatically downloaded if not available) cmapMetadata <- loadCMapData("cmapMetadata.txt", "metadata") # Load CMap gene info loadCMapData("cmapGeneInfo.txt", "geneInfo") ## Not run: # Load CMap zscores based on filtered metadata cmapMetadataKnockdown <- filterCMapMetadata( cmapMetadata, cellLine="HepG2", perturbationType="Consensus signature from shRNAs targeting the same gene") loadCMapData("cmapZscores.gctx.gz", "zscores", cmapMetadataKnockdown$sig_id) ## End(Not run)
Load matrix of CMap perturbation's differential expression z-scores (optional)
loadCMapZscores(data, inheritAttrs = FALSE, verbose = TRUE)
loadCMapZscores(data, inheritAttrs = FALSE, verbose = TRUE)
data |
|
inheritAttrs |
Boolean: convert to |
verbose |
Boolean: print additional details? |
Matrix containing CMap perturbation z-scores (genes as rows, perturbations as columns)
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
parseCMapID()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
metadata <- loadCMapData("cmapMetadata.txt", "metadata") metadata <- filterCMapMetadata(metadata, cellLine="HepG2") ## Not run: perts <- prepareCMapPerturbations(metadata, "cmapZscores.gctx", "cmapGeneInfo.txt") zscores <- loadCMapZscores(perts[ , 1:10]) ## End(Not run)
metadata <- loadCMapData("cmapMetadata.txt", "metadata") metadata <- filterCMapMetadata(metadata, cellLine="HepG2") ## Not run: perts <- prepareCMapPerturbations(metadata, "cmapZscores.gctx", "cmapGeneInfo.txt") zscores <- loadCMapZscores(perts[ , 1:10]) ## End(Not run)
Load table with drug descriptors
loadDrugDescriptors( source = c("NCI60", "CMap"), type = c("2D", "3D"), file = NULL, path = NULL )
loadDrugDescriptors( source = c("NCI60", "CMap"), type = c("2D", "3D"), file = NULL, path = NULL )
source |
Character: source of compounds used to calculate molecular
descriptors ( |
type |
Character: load |
file |
Character: filepath to drug descriptors (automatically downloaded if file does not exist) |
path |
Character: folder where to find files (optional; |
Data table with drug descriptors
Other functions for drug set enrichment analysis:
analyseDrugSetEnrichment()
,
plotDrugSetEnrichment()
,
prepareDrugSets()
loadDrugDescriptors()
loadDrugDescriptors()
Samples are automatically downloaded if they are not found in the current working directory.
loadENCODEsamples(metadata, path = ".")
loadENCODEsamples(metadata, path = ".")
metadata |
Character: ENCODE metadata |
path |
Character: path where to download files |
List of loaded ENCODE samples
Other functions related with using ENCODE expression data:
downloadENCODEknockdownMetadata()
,
performDifferentialExpression()
,
prepareENCODEgeneExpression()
if (interactive()) { # Load ENCODE metadata for a specific cell line and gene cellLine <- "HepG2" gene <- c("EIF4G1", "U2AF2") ENCODEmetadata <- downloadENCODEknockdownMetadata(cellLine, gene) # Load samples based on filtered ENCODE metadata loadENCODEsamples(ENCODEmetadata) }
if (interactive()) { # Load ENCODE metadata for a specific cell line and gene cellLine <- "HepG2" gene <- c("EIF4G1", "U2AF2") ENCODEmetadata <- downloadENCODEknockdownMetadata(cellLine, gene) # Load samples based on filtered ENCODE metadata loadENCODEsamples(ENCODEmetadata) }
Load gene expression and drug sensitivity correlation matrix
loadExpressionDrugSensitivityAssociation( source, file = NULL, path = NULL, rows = NULL, cols = NULL, loadValues = FALSE )
loadExpressionDrugSensitivityAssociation( source, file = NULL, path = NULL, rows = NULL, cols = NULL, loadValues = FALSE )
source |
Character: source of matrix to load; see
|
file |
Character: filepath to gene expression and drug sensitivity association dataset (automatically downloaded if file does not exist) |
path |
Character: folder where to find files (optional; |
rows |
Character or integer: rows |
cols |
Character or integer: columns |
loadValues |
Boolean: load data values (if available)? If |
Correlation matrix between gene expression (rows) and drug sensitivity (columns)
Other functions related with the prediction of targeting drugs:
as.table.referenceComparison()
,
listExpressionDrugSensitivityAssociation()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
predictTargetingDrugs()
gdsc <- listExpressionDrugSensitivityAssociation()[[1]] loadExpressionDrugSensitivityAssociation(gdsc)
gdsc <- listExpressionDrugSensitivityAssociation()[[1]] loadExpressionDrugSensitivityAssociation(gdsc)
Parse CMap identifier
parseCMapID(id, cellLine = FALSE)
parseCMapID(id, cellLine = FALSE)
id |
Character: CMap identifier |
cellLine |
Boolean: if |
Character vector with information from CMap identifiers
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
loadCMapZscores()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
id <- c("CVD001_HEPG2_24H:BRD-K94818765-001-01-0:4.8", "CVD001_HEPG2_24H:BRD-K96188950-001-04-5:4.3967", "CVD001_HUH7_24H:BRD-A14014306-001-01-1:4.1") parseCMapID(id, cellLine=TRUE) parseCMapID(id, cellLine=FALSE)
id <- c("CVD001_HEPG2_24H:BRD-K94818765-001-01-0:4.8", "CVD001_HEPG2_24H:BRD-K96188950-001-04-5:4.3967", "CVD001_HUH7_24H:BRD-A14014306-001-01-1:4.1") parseCMapID(id, cellLine=TRUE) parseCMapID(id, cellLine=FALSE)
Perform differential gene expression based on ENCODE data
performDifferentialExpression(counts)
performDifferentialExpression(counts)
counts |
Data frame: gene expression |
Data frame with differential gene expression results between knockdown and control
Other functions related with using ENCODE expression data:
downloadENCODEknockdownMetadata()
,
loadENCODEsamples()
,
prepareENCODEgeneExpression()
if (interactive()) { # Download ENCODE metadata for a specific cell line and gene cellLine <- "HepG2" gene <- "EIF4G1" ENCODEmetadata <- downloadENCODEknockdownMetadata(cellLine, gene) # Download samples based on filtered ENCODE metadata ENCODEsamples <- loadENCODEsamples(ENCODEmetadata)[[1]] counts <- prepareENCODEgeneExpression(ENCODEsamples) # Remove low coverage (at least 10 counts shared across two samples) minReads <- 10 minSamples <- 2 filter <- rowSums(counts[ , -c(1, 2)] >= minReads) >= minSamples counts <- counts[filter, ] # Convert ENSEMBL identifier to gene symbol counts$gene_id <- convertGeneIdentifiers(counts$gene_id) # Perform differential gene expression analysis diffExpr <- performDifferentialExpression(counts) }
if (interactive()) { # Download ENCODE metadata for a specific cell line and gene cellLine <- "HepG2" gene <- "EIF4G1" ENCODEmetadata <- downloadENCODEknockdownMetadata(cellLine, gene) # Download samples based on filtered ENCODE metadata ENCODEsamples <- loadENCODEsamples(ENCODEmetadata)[[1]] counts <- prepareENCODEgeneExpression(ENCODEsamples) # Remove low coverage (at least 10 counts shared across two samples) minReads <- 10 minSamples <- 2 filter <- rowSums(counts[ , -c(1, 2)] >= minReads) >= minSamples counts <- counts[filter, ] # Convert ENSEMBL identifier to gene symbol counts$gene_id <- convertGeneIdentifiers(counts$gene_id) # Perform differential gene expression analysis diffExpr <- performDifferentialExpression(counts) }
perturbationChanges
objectOperations on a perturbationChanges
object
## S3 method for class 'perturbationChanges' plot( x, perturbation, input, method = c("spearman", "pearson", "gsea"), geneSize = 150, genes = c("both", "top", "bottom"), ..., title = NULL ) ## S3 method for class 'perturbationChanges' x[i, j, drop = FALSE, ...] ## S3 method for class 'perturbationChanges' dim(x) ## S3 method for class 'perturbationChanges' dimnames(x)
## S3 method for class 'perturbationChanges' plot( x, perturbation, input, method = c("spearman", "pearson", "gsea"), geneSize = 150, genes = c("both", "top", "bottom"), ..., title = NULL ) ## S3 method for class 'perturbationChanges' x[i, j, drop = FALSE, ...] ## S3 method for class 'perturbationChanges' dim(x) ## S3 method for class 'perturbationChanges' dimnames(x)
x |
|
perturbation |
Character (perturbation identifier) or a
|
input |
|
method |
Character: comparison method ( |
geneSize |
Numeric: number of top up-/down-regulated genes to use as
gene sets to test for enrichment in reference data; if a 2-length numeric
vector, the first index is the number of top up-regulated genes and the
second index is the number of down-regulated genes used to create gene
sets; only used if |
genes |
Character: when plotting gene set enrichment analysis (GSEA),
plot most up-regulated genes ( |
... |
Extra arguments |
title |
Character: plot title (if |
i , j
|
Character or numeric indexes specifying elements to extract |
drop |
Boolean: coerce result to the lowest possible dimension? |
Subset, plot or return dimensions or names of a
perturbationChanges
object
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
loadCMapZscores()
,
parseCMapID()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
data("diffExprStat") data("cmapPerturbationsKD") compareKD <- rankSimilarPerturbations(diffExprStat, cmapPerturbationsKD) EIF4G1knockdown <- grep("EIF4G1", compareKD[[1]], value=TRUE) plot(cmapPerturbationsKD, EIF4G1knockdown, diffExprStat, method="spearman") plot(cmapPerturbationsKD, EIF4G1knockdown, diffExprStat, method="pearson") plot(cmapPerturbationsKD, EIF4G1knockdown, diffExprStat, method="gsea") data("cmapPerturbationsCompounds") pert <- "CVD001_HEPG2_24H:BRD-A14014306-001-01-1:4.1" plot(cmapPerturbationsCompounds, pert, diffExprStat, method="spearman") plot(cmapPerturbationsCompounds, pert, diffExprStat, method="pearson") plot(cmapPerturbationsCompounds, pert, diffExprStat, method="gsea") # Multiple cell line perturbations pert <- "CVD001_24H:BRD-A14014306-001-01-1:4.1" plot(cmapPerturbationsCompounds, pert, diffExprStat, method="spearman") plot(cmapPerturbationsCompounds, pert, diffExprStat, method="pearson") plot(cmapPerturbationsCompounds, pert, diffExprStat, method="gsea")
data("diffExprStat") data("cmapPerturbationsKD") compareKD <- rankSimilarPerturbations(diffExprStat, cmapPerturbationsKD) EIF4G1knockdown <- grep("EIF4G1", compareKD[[1]], value=TRUE) plot(cmapPerturbationsKD, EIF4G1knockdown, diffExprStat, method="spearman") plot(cmapPerturbationsKD, EIF4G1knockdown, diffExprStat, method="pearson") plot(cmapPerturbationsKD, EIF4G1knockdown, diffExprStat, method="gsea") data("cmapPerturbationsCompounds") pert <- "CVD001_HEPG2_24H:BRD-A14014306-001-01-1:4.1" plot(cmapPerturbationsCompounds, pert, diffExprStat, method="spearman") plot(cmapPerturbationsCompounds, pert, diffExprStat, method="pearson") plot(cmapPerturbationsCompounds, pert, diffExprStat, method="gsea") # Multiple cell line perturbations pert <- "CVD001_24H:BRD-A14014306-001-01-1:4.1" plot(cmapPerturbationsCompounds, pert, diffExprStat, method="spearman") plot(cmapPerturbationsCompounds, pert, diffExprStat, method="pearson") plot(cmapPerturbationsCompounds, pert, diffExprStat, method="gsea")
If element = NULL
, comparison is plotted based on all elements.
Otherwise, show scatter or GSEA plots for a single element compared with
previously given differential expression results.
## S3 method for class 'referenceComparison' plot( x, element = NULL, method = c("spearman", "pearson", "gsea", "rankProduct"), n = c(3, 3), showMetadata = TRUE, plotNonRankedPerturbations = FALSE, alpha = 0.3, genes = c("both", "top", "bottom"), ..., zscores = NULL, title = NULL )
## S3 method for class 'referenceComparison' plot( x, element = NULL, method = c("spearman", "pearson", "gsea", "rankProduct"), n = c(3, 3), showMetadata = TRUE, plotNonRankedPerturbations = FALSE, alpha = 0.3, genes = c("both", "top", "bottom"), ..., zscores = NULL, title = NULL )
x |
|
element |
Character: identifier in the first column of |
method |
Character: method to plot results; choose between
|
n |
Numeric: number of top and bottom genes to label (if a vector of two
numbers is given, the first and second numbers will be used as the number
of top and bottom genes to label, respectively); only used if
|
showMetadata |
Boolean: show available metadata information instead of
identifiers (if available)? Only used if |
plotNonRankedPerturbations |
Boolean: plot non-ranked data in grey? Only
used if |
alpha |
Numeric: transparency; only used if |
genes |
Character: when plotting gene set enrichment analysis (GSEA),
plot most up-regulated genes ( |
... |
Extra arguments currently not used |
zscores |
Data frame (GCTX z-scores) or character (respective filepath to load data from file) |
title |
Character: plot title (if |
Plot illustrating the reference comparison
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
loadCMapZscores()
,
parseCMapID()
,
plot.perturbationChanges()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
Other functions related with the prediction of targeting drugs:
as.table.referenceComparison()
,
listExpressionDrugSensitivityAssociation()
,
loadExpressionDrugSensitivityAssociation()
,
plotTargetingDrugsVSsimilarPerturbations()
,
predictTargetingDrugs()
# Example of a differential expression profile data("diffExprStat") ## Not run: # Download and load CMap perturbations to compare with cellLine <- "HepG2" cmapMetadataKD <- filterCMapMetadata( "cmapMetadata.txt", cellLine=cellLine, perturbationType="Consensus signature from shRNAs targeting the same gene") cmapPerturbationsKD <- prepareCMapPerturbations( cmapMetadataKD, "cmapZscores.gctx", "cmapGeneInfo.txt", loadZscores=TRUE) ## End(Not run) # Rank similar CMap perturbations compareKD <- rankSimilarPerturbations(diffExprStat, cmapPerturbationsKD) # Plot ranked list of CMap perturbations plot(compareKD, method="spearman") plot(compareKD, method="spearman", n=c(7, 3)) plot(compareKD, method="pearson") plot(compareKD, method="gsea") # Plot results for a single perturbation pert <- compareKD[[1, 1]] plot(compareKD, pert, method="spearman", zscores=cmapPerturbationsKD) plot(compareKD, pert, method="pearson", zscores=cmapPerturbationsKD) plot(compareKD, pert, method="gsea", zscores=cmapPerturbationsKD) # Predict targeting drugs based on a given differential expression profile gdsc <- loadExpressionDrugSensitivityAssociation("GDSC 7") predicted <- predictTargetingDrugs(diffExprStat, gdsc) # Plot ranked list of targeting drugs plot(predicted, method="spearman") plot(predicted, method="spearman", n=c(7, 3)) plot(predicted, method="pearson") plot(predicted, method="gsea") # Plot results for a single targeting drug drug <- predicted$compound[[4]] plot(predicted, drug, method="spearman") plot(predicted, drug, method="pearson") plot(predicted, drug, method="gsea")
# Example of a differential expression profile data("diffExprStat") ## Not run: # Download and load CMap perturbations to compare with cellLine <- "HepG2" cmapMetadataKD <- filterCMapMetadata( "cmapMetadata.txt", cellLine=cellLine, perturbationType="Consensus signature from shRNAs targeting the same gene") cmapPerturbationsKD <- prepareCMapPerturbations( cmapMetadataKD, "cmapZscores.gctx", "cmapGeneInfo.txt", loadZscores=TRUE) ## End(Not run) # Rank similar CMap perturbations compareKD <- rankSimilarPerturbations(diffExprStat, cmapPerturbationsKD) # Plot ranked list of CMap perturbations plot(compareKD, method="spearman") plot(compareKD, method="spearman", n=c(7, 3)) plot(compareKD, method="pearson") plot(compareKD, method="gsea") # Plot results for a single perturbation pert <- compareKD[[1, 1]] plot(compareKD, pert, method="spearman", zscores=cmapPerturbationsKD) plot(compareKD, pert, method="pearson", zscores=cmapPerturbationsKD) plot(compareKD, pert, method="gsea", zscores=cmapPerturbationsKD) # Predict targeting drugs based on a given differential expression profile gdsc <- loadExpressionDrugSensitivityAssociation("GDSC 7") predicted <- predictTargetingDrugs(diffExprStat, gdsc) # Plot ranked list of targeting drugs plot(predicted, method="spearman") plot(predicted, method="spearman", n=c(7, 3)) plot(predicted, method="pearson") plot(predicted, method="gsea") # Plot results for a single targeting drug drug <- predicted$compound[[4]] plot(predicted, drug, method="spearman") plot(predicted, drug, method="pearson") plot(predicted, drug, method="gsea")
Plot drug set enrichment
plotDrugSetEnrichment( sets, stats, col = "rankProduct_rank", selectedSets = NULL, keyColSets = NULL, keyColStats = NULL )
plotDrugSetEnrichment( sets, stats, col = "rankProduct_rank", selectedSets = NULL, keyColSets = NULL, keyColStats = NULL )
sets |
Named list of characters: named sets containing compound
identifiers (obtain drug sets by running |
stats |
Named numeric vector or either a |
col |
Character: name of the column to use for statistics (only required
if class of |
selectedSets |
Character: drug sets to plot (if |
keyColSets |
Character: column from |
keyColStats |
Character: column from |
List of GSEA plots per drug set
Other functions for drug set enrichment analysis:
analyseDrugSetEnrichment()
,
loadDrugDescriptors()
,
prepareDrugSets()
descriptors <- loadDrugDescriptors() drugSets <- prepareDrugSets(descriptors) # Analyse drug set enrichment in ranked targeting drugs for a differential # expression profile data("diffExprStat") gdsc <- loadExpressionDrugSensitivityAssociation("GDSC") predicted <- predictTargetingDrugs(diffExprStat, gdsc) plotDrugSetEnrichment(drugSets, predicted)
descriptors <- loadDrugDescriptors() drugSets <- prepareDrugSets(descriptors) # Analyse drug set enrichment in ranked targeting drugs for a differential # expression profile data("diffExprStat") gdsc <- loadExpressionDrugSensitivityAssociation("GDSC") predicted <- predictTargetingDrugs(diffExprStat, gdsc) plotDrugSetEnrichment(drugSets, predicted)
Plot similar perturbations against predicted targeting drugs
plotTargetingDrugsVSsimilarPerturbations( targetingDrugs, similarPerturbations, column, labelBy = "pert_iname", quantileThreshold = 0.25, showAllScores = FALSE, keyColTargetingDrugs = NULL, keyColSimilarPerturbations = NULL )
plotTargetingDrugsVSsimilarPerturbations( targetingDrugs, similarPerturbations, column, labelBy = "pert_iname", quantileThreshold = 0.25, showAllScores = FALSE, keyColTargetingDrugs = NULL, keyColSimilarPerturbations = NULL )
targetingDrugs |
|
similarPerturbations |
|
column |
Character: column to plot (must be available in both databases) |
labelBy |
Character: column in |
quantileThreshold |
Numeric: quantile (between 0 and 1) to highlight values of interest |
showAllScores |
Boolean: show all scores? If |
keyColTargetingDrugs |
Character: column from |
keyColSimilarPerturbations |
Character: column from
|
ggplot2
plot
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
loadCMapZscores()
,
parseCMapID()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
Other functions related with the prediction of targeting drugs:
as.table.referenceComparison()
,
listExpressionDrugSensitivityAssociation()
,
loadExpressionDrugSensitivityAssociation()
,
plot.referenceComparison()
,
predictTargetingDrugs()
# Rank similarity against CMap compound perturbations similarPerts <- rankSimilarPerturbations(diffExprStat, cmapPerturbationsCompounds) # Predict targeting drugs gdsc <- loadExpressionDrugSensitivityAssociation("GDSC 7") predicted <- predictTargetingDrugs(diffExprStat, gdsc) plotTargetingDrugsVSsimilarPerturbations(predicted, similarPerts, "spearman_rank")
# Rank similarity against CMap compound perturbations similarPerts <- rankSimilarPerturbations(diffExprStat, cmapPerturbationsCompounds) # Predict targeting drugs gdsc <- loadExpressionDrugSensitivityAssociation("GDSC 7") predicted <- predictTargetingDrugs(diffExprStat, gdsc) plotTargetingDrugsVSsimilarPerturbations(predicted, similarPerts, "spearman_rank")
Identify compounds that may target the phenotype associated with a user-provided differential expression profile by comparing such against a correlation matrix of gene expression and drug sensitivity.
predictTargetingDrugs( input, expressionDrugSensitivityCor, method = c("spearman", "pearson", "gsea"), geneSize = 150, isDrugActivityDirectlyProportionalToSensitivity = NULL, threads = 1, chunkGiB = 1, verbose = FALSE )
predictTargetingDrugs( input, expressionDrugSensitivityCor, method = c("spearman", "pearson", "gsea"), geneSize = 150, isDrugActivityDirectlyProportionalToSensitivity = NULL, threads = 1, chunkGiB = 1, verbose = FALSE )
input |
|
expressionDrugSensitivityCor |
Matrix or character: correlation matrix
of gene expression (rows) and drug sensitivity (columns) across cell lines
or path to file containing such data; see
|
method |
Character: comparison method ( |
geneSize |
Numeric: number of top up-/down-regulated genes to use as
gene sets to test for enrichment in reference data; if a 2-length numeric
vector, the first index is the number of top up-regulated genes and the
second index is the number of down-regulated genes used to create gene
sets; only used if |
isDrugActivityDirectlyProportionalToSensitivity |
Boolean: are the
values used for drug activity directly proportional to drug sensitivity?
If |
threads |
Integer: number of parallel threads |
chunkGiB |
Numeric: if second argument is a path to an HDF5 file
( |
verbose |
Boolean: print additional details? |
Data table with correlation and/or GSEA score results
If a file path to a valid HDF5 (.h5
) file is provided instead of a
data matrix, that file can be loaded and processed in chunks of size
chunkGiB
, resulting in decreased peak memory usage.
The default value of 1 GiB (1 GiB = 1024^3 bytes) allows loading chunks of ~10000 columns and
14000 rows (10000 * 14000 * 8 bytes / 1024^3 = 1.04 GiB
).
When method = "gsea"
, weighted connectivity scores (WTCS) are
calculated (https://clue.io/connectopedia/cmap_algorithms).
Other functions related with the prediction of targeting drugs:
as.table.referenceComparison()
,
listExpressionDrugSensitivityAssociation()
,
loadExpressionDrugSensitivityAssociation()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
# Example of a differential expression profile data("diffExprStat") # Load expression and drug sensitivity association derived from GDSC data gdsc <- loadExpressionDrugSensitivityAssociation("GDSC 7") # Predict targeting drugs on a differential expression profile predictTargetingDrugs(diffExprStat, gdsc)
# Example of a differential expression profile data("diffExprStat") # Load expression and drug sensitivity association derived from GDSC data gdsc <- loadExpressionDrugSensitivityAssociation("GDSC 7") # Predict targeting drugs on a differential expression profile predictTargetingDrugs(diffExprStat, gdsc)
Prepare CMap perturbation data
prepareCMapPerturbations( metadata, zscores, geneInfo, compoundInfo = NULL, ..., loadZscores = FALSE )
prepareCMapPerturbations( metadata, zscores, geneInfo, compoundInfo = NULL, ..., loadZscores = FALSE )
metadata |
Data frame (CMap metadata) or character (respective filepath to load data from file) |
zscores |
Data frame (GCTX z-scores) or character (respective filepath to load data from file) |
geneInfo |
Data frame (CMap gene info) or character (respective filepath to load data from file) |
compoundInfo |
Data frame (CMap compound info) or character (respective filepath to load data from file) |
... |
Arguments passed on to
|
loadZscores |
Boolean: load matrix of perturbation z-scores? Not
recommended in systems with less than 30GB of RAM; if |
CMap perturbation data attributes and filename
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
loadCMapZscores()
,
parseCMapID()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
print.similarPerturbations()
,
rankSimilarPerturbations()
metadata <- loadCMapData("cmapMetadata.txt", "metadata") metadata <- filterCMapMetadata(metadata, cellLine="HepG2") ## Not run: prepareCMapPerturbations(metadata, "cmapZscores.gctx", "cmapGeneInfo.txt") ## End(Not run)
metadata <- loadCMapData("cmapMetadata.txt", "metadata") metadata <- filterCMapMetadata(metadata, cellLine="HepG2") ## Not run: prepareCMapPerturbations(metadata, "cmapZscores.gctx", "cmapGeneInfo.txt") ## End(Not run)
Create a list of drug sets for each character and numeric column. For each
character column, drugs are split across that column's unique values (see
argument maxUniqueElems
). For each numeric column, drugs are split
across evenly-distributed bins.
prepareDrugSets( table, id = 1, maxUniqueElems = 15, maxBins = 15, k = 5, minPoints = NULL )
prepareDrugSets( table, id = 1, maxUniqueElems = 15, maxBins = 15, k = 5, minPoints = NULL )
table |
Data frame: drug descriptors |
id |
Integer or character: index or name of the identifier column |
maxUniqueElems |
Numeric: ignore character columns with more unique
elements than |
maxBins |
Numeric: maximum number of bins for numeric columns |
k |
Numeric: constant; the higher the constant, the smaller the bin size
(check |
minPoints |
Numeric: minimum number of points in a bin (if |
Named list of characters: named drug sets with respective compound identifiers as list elements
Other functions for drug set enrichment analysis:
analyseDrugSetEnrichment()
,
loadDrugDescriptors()
,
plotDrugSetEnrichment()
descriptors <- loadDrugDescriptors("NCI60") prepareDrugSets(descriptors)
descriptors <- loadDrugDescriptors("NCI60") prepareDrugSets(descriptors)
Load ENCODE gene expression data
prepareENCODEgeneExpression(samples)
prepareENCODEgeneExpression(samples)
samples |
List of loaded ENCODE samples |
Data frame containing gene read counts
Other functions related with using ENCODE expression data:
downloadENCODEknockdownMetadata()
,
loadENCODEsamples()
,
performDifferentialExpression()
if (interactive()) { # Load ENCODE metadata for a specific cell line and gene cellLine <- "HepG2" gene <- "EIF4G1" ENCODEmetadata <- downloadENCODEknockdownMetadata(cellLine, gene) # Load samples based on filtered ENCODE metadata ENCODEsamples <- loadENCODEsamples(ENCODEmetadata)[[1]] prepareENCODEgeneExpression(ENCODEsamples) }
if (interactive()) { # Load ENCODE metadata for a specific cell line and gene cellLine <- "HepG2" gene <- "EIF4G1" ENCODEmetadata <- downloadENCODEknockdownMetadata(cellLine, gene) # Load samples based on filtered ENCODE metadata ENCODEsamples <- loadENCODEsamples(ENCODEmetadata)[[1]] prepareENCODEgeneExpression(ENCODEsamples) }
similarPerturbations
objectPrint a similarPerturbations
object
## S3 method for class 'similarPerturbations' print(x, perturbation = NULL, ...)
## S3 method for class 'similarPerturbations' print(x, perturbation = NULL, ...)
x |
|
perturbation |
Character (perturbation identifier) or numeric (perturbation index) |
... |
Extra parameters passed to |
Information on perturbationChanges
object or on specific
perturbations
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
loadCMapZscores()
,
parseCMapID()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
rankSimilarPerturbations()
Compare differential expression results against CMap perturbations.
rankSimilarPerturbations( input, perturbations, method = c("spearman", "pearson", "gsea"), geneSize = 150, cellLineMean = "auto", rankPerCellLine = FALSE, threads = 1, chunkGiB = 1, verbose = FALSE )
rankSimilarPerturbations( input, perturbations, method = c("spearman", "pearson", "gsea"), geneSize = 150, cellLineMean = "auto", rankPerCellLine = FALSE, threads = 1, chunkGiB = 1, verbose = FALSE )
input |
|
perturbations |
|
method |
Character: comparison method ( |
geneSize |
Numeric: number of top up-/down-regulated genes to use as
gene sets to test for enrichment in reference data; if a 2-length numeric
vector, the first index is the number of top up-regulated genes and the
second index is the number of down-regulated genes used to create gene
sets; only used if |
cellLineMean |
Boolean: add rows with the mean of |
rankPerCellLine |
Boolean: rank results based on both individual cell
lines and mean scores across cell lines ( |
threads |
Integer: number of parallel threads |
chunkGiB |
Numeric: if second argument is a path to an HDF5 file
( |
verbose |
Boolean: print additional details? |
Data table with correlation and/or GSEA score results
If a file path to a valid HDF5 (.h5
) file is provided instead of a
data matrix, that file can be loaded and processed in chunks of size
chunkGiB
, resulting in decreased peak memory usage.
The default value of 1 GiB (1 GiB = 1024^3 bytes) allows loading chunks of ~10000 columns and
14000 rows (10000 * 14000 * 8 bytes / 1024^3 = 1.04 GiB
).
When method = "gsea"
, weighted connectivity scores (WTCS) are
calculated (https://clue.io/connectopedia/cmap_algorithms).
Other functions related with the ranking of CMap perturbations:
as.table.referenceComparison()
,
filterCMapMetadata()
,
getCMapConditions()
,
getCMapPerturbationTypes()
,
loadCMapData()
,
loadCMapZscores()
,
parseCMapID()
,
plot.perturbationChanges()
,
plot.referenceComparison()
,
plotTargetingDrugsVSsimilarPerturbations()
,
prepareCMapPerturbations()
,
print.similarPerturbations()
# Example of a differential expression profile data("diffExprStat") ## Not run: # Download and load CMap perturbations to compare with cellLine <- c("HepG2", "HUH7") cmapMetadataCompounds <- filterCMapMetadata( "cmapMetadata.txt", cellLine=cellLine, timepoint="24 h", dosage="5 \u00B5M", perturbationType="Compound") cmapPerturbationsCompounds <- prepareCMapPerturbations( cmapMetadataCompounds, "cmapZscores.gctx", "cmapGeneInfo.txt", "cmapCompoundInfo_drugs.txt", loadZscores=TRUE) ## End(Not run) perturbations <- cmapPerturbationsCompounds # Rank similar CMap perturbations (by default, Spearman's and Pearson's # correlation are used, as well as GSEA with the top and bottom 150 genes of # the differential expression profile used as reference) rankSimilarPerturbations(diffExprStat, perturbations) # Rank similar CMap perturbations using only Spearman's correlation rankSimilarPerturbations(diffExprStat, perturbations, method="spearman")
# Example of a differential expression profile data("diffExprStat") ## Not run: # Download and load CMap perturbations to compare with cellLine <- c("HepG2", "HUH7") cmapMetadataCompounds <- filterCMapMetadata( "cmapMetadata.txt", cellLine=cellLine, timepoint="24 h", dosage="5 \u00B5M", perturbationType="Compound") cmapPerturbationsCompounds <- prepareCMapPerturbations( cmapMetadataCompounds, "cmapZscores.gctx", "cmapGeneInfo.txt", "cmapCompoundInfo_drugs.txt", loadZscores=TRUE) ## End(Not run) perturbations <- cmapPerturbationsCompounds # Rank similar CMap perturbations (by default, Spearman's and Pearson's # correlation are used, as well as GSEA with the top and bottom 150 genes of # the differential expression profile used as reference) rankSimilarPerturbations(diffExprStat, perturbations) # Rank similar CMap perturbations using only Spearman's correlation rankSimilarPerturbations(diffExprStat, perturbations, method="spearman")