Title: | Functional analysis of DNA methylome datasets |
---|---|
Description: | KnowYourCG (KYCG) is a supervised learning framework designed for the functional analysis of DNA methylation data. Unlike existing tools that focus on genes or genomic intervals, KnowYourCG directly targets CpG dinucleotides, featuring automated supervised screenings of diverse biological and technical influences, including sequence motifs, transcription factor binding, histone modifications, replication timing, cell-type-specific methylation, and trait-epigenome associations. KnowYourCG addresses the challenges of data sparsity in various methylation datasets, including low-pass Nanopore sequencing, single-cell DNA methylomes, 5-hydroxymethylation profiles, spatial DNA methylation maps, and array-based datasets for epigenome-wide association studies and epigenetic clocks. |
Authors: | Zhou Wanding [aut] , Goldberg David [aut, cre] , Fu Hongxiang [ctb] |
Maintainer: | Goldberg David <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.3.15 |
Built: | 2025-01-05 03:31:09 UTC |
Source: | https://github.com/bioc/knowYourCG |
Aggregate test enrichment results
aggregateTestEnrichments(result_list, column = "estimate", return_df = FALSE)
aggregateTestEnrichments(result_list, column = "estimate", return_df = FALSE)
result_list |
a list of results from testEnrichment |
column |
the column name to aggregate (Default: estimate) |
return_df |
whether to return a merged data frame |
a matrix for all results
## pick some big TFBS-overlapping CpG groups sesameData::sesameDataCache(data_titles= c("KYCG.MM285.TFBSconsensus.20220116","KYCG.MM285.chromHMM.20210210", "probeIDSignature", "MM285.address")) cg_lists <- getDBs("MM285.TFBS") queries <- cg_lists[(sapply(cg_lists, length) > 40000)] result_list <- lapply(queries, testEnrichment, "MM285.chromHMM") mtx <- aggregateTestEnrichments(result_list)
## pick some big TFBS-overlapping CpG groups sesameData::sesameDataCache(data_titles= c("KYCG.MM285.TFBSconsensus.20220116","KYCG.MM285.chromHMM.20210210", "probeIDSignature", "MM285.address")) cg_lists <- getDBs("MM285.TFBS") queries <- cg_lists[(sapply(cg_lists, length) > 40000)] result_list <- lapply(queries, testEnrichment, "MM285.chromHMM") mtx <- aggregateTestEnrichments(result_list)
see sesameData_annoProbes if you'd like to annotate by genomic coordinates (in GRanges)
annoProbes( probeIDs, databases, db_names = NULL, platform = NULL, sep = ",", indicator = FALSE, silent = FALSE )
annoProbes( probeIDs, databases, db_names = NULL, platform = NULL, sep = ",", indicator = FALSE, silent = FALSE )
probeIDs |
probe IDs in a character vector |
databases |
character or actual database (i.e. list of probe IDs) |
db_names |
specific database (default to all databases) |
platform |
EPIC, MM285 etc. will infer from probe IDs if not given |
sep |
delimiter used in paste |
indicator |
return the indicator matrix instead of a concatenated annotation (in the case of have multiple annotations) |
silent |
suppress message |
named annotation vector, or indicator matrix
sesameData::sesameDataCache(data_titles= c("MM285.address","probeIDSignature","KYCG.MM285.designGroup.20210210")) probes <- names(sesameData::sesameData_getManifestGRanges("MM285")) anno <- annoProbes(probeIDs=probes, "designGroup", silent = TRUE)
sesameData::sesameDataCache(data_titles= c("MM285.address","probeIDSignature","KYCG.MM285.designGroup.20210210")) probes <- names(sesameData::sesameData_getManifestGRanges("MM285")) anno <- annoProbes(probeIDs=probes, "designGroup", silent = TRUE)
build gene-probe association database
buildGeneDBs( probeIDs = NULL, platform = NULL, genome = NULL, max_distance = 10000, silent = FALSE )
buildGeneDBs( probeIDs = NULL, platform = NULL, genome = NULL, max_distance = 10000, silent = FALSE )
probeIDs |
the query probe list. If NULL, use all the probes on the platform |
platform |
HM450, EPIC, MM285, Mammal40, will infer from query if not given |
genome |
hg38, mm10, ..., will infer if not given. |
max_distance |
probe-gene distance for association |
silent |
suppress messages |
gene databases
sesameData::sesameDataCache(data_titles= c("EPIC.address","genomeInfo.hg38","probeIDSignature")) query <- c("cg04707299", "cg13380562", "cg00480749") dbs <- buildGeneDBs(query, platform = "EPIC") testEnrichment(query, dbs, platform = "EPIC")
sesameData::sesameDataCache(data_titles= c("EPIC.address","genomeInfo.hg38","probeIDSignature")) query <- c("cg04707299", "cg13380562", "cg00480749") dbs <- buildGeneDBs(query, platform = "EPIC") testEnrichment(query, dbs, platform = "EPIC")
dbStats aggregates methylation of a given betas matrix over specified database set features
dbStats( betas, databases, fun = mean, na.rm = TRUE, n_min = NULL, f_min = 0.1, long = FALSE )
dbStats( betas, databases, fun = mean, na.rm = TRUE, n_min = NULL, f_min = 0.1, long = FALSE )
betas |
matrix of beta values where probes are on the rows and samples are on the columns |
databases |
List of vectors corresponding to probe locations for which the features will be extracted |
fun |
aggregation function, default to mean |
na.rm |
whether to remove NA |
n_min |
min number of non-NA for aggregation function to apply, overrides f_min |
f_min |
min fraction of non-NA for aggregation function to apply |
long |
produce long-form result |
matrix with samples on the rows and database set on the columns
library(SummarizedExperiment) sesameData::sesameDataCache(data_titles= c("MM285.467.SE.tissue20Kprobes","KYCG.MM285.probeType.20210630")) se <- sesameData::sesameDataGet("MM285.467.SE.tissue20Kprobes") head(dbStats(assay(se), "MM285.probeType")[,1:3]) sesameData::sesameDataGet_resetEnv()
library(SummarizedExperiment) sesameData::sesameDataCache(data_titles= c("MM285.467.SE.tissue20Kprobes","KYCG.MM285.probeType.20210630")) se <- sesameData::sesameDataGet("MM285.467.SE.tissue20Kprobes") head(dbStats(assay(se), "MM285.probeType")[,1:3]) sesameData::sesameDataGet_resetEnv()
Get databases by full or partial names of the database group(s)
getDBs( group_nms, db_names = NULL, platform = NULL, summary = FALSE, allow_multi = FALSE, type = NULL, silent = FALSE )
getDBs( group_nms, db_names = NULL, platform = NULL, summary = FALSE, allow_multi = FALSE, type = NULL, silent = FALSE )
group_nms |
database group names |
db_names |
name of the database, fetech only the given databases |
platform |
EPIC, HM450, MM285, ... If given, will restrict to that platform. |
summary |
return a summary of database instead of db itself |
allow_multi |
allow multiple groups to be returned for |
type |
numerical, categorical, default: all |
silent |
no messages each query. |
a list of databases, return NULL if no database is found
sesameData::sesameDataCache(data_titles= c("KYCG.MM285.chromHMM.20210210","KYCG.MM285.probeType.20210630")) dbs <- getDBs("MM285.chromHMM") dbs <- getDBs(c("MM285.chromHMM", "MM285.probeType"))
sesameData::sesameDataCache(data_titles= c("KYCG.MM285.chromHMM.20210210","KYCG.MM285.probeType.20210630")) dbs <- getDBs("MM285.chromHMM") dbs <- getDBs(c("MM285.chromHMM", "MM285.probeType"))
The input data frame should have an "estimate" and a "FDR" columns.
KYCG_plotBar(df, y = "-log10(FDR)", n = 20, order_by = "FDR", label = FALSE)
KYCG_plotBar(df, y = "-log10(FDR)", n = 20, order_by = "FDR", label = FALSE)
df |
KYCG result data frame |
y |
the column to be plotted on y-axis |
n |
number of CG groups to plot |
order_by |
the column by which CG groups are ordered |
label |
whether to label significant bars |
Top CG groups are determined by estimate (descending order).
grid plot object
KYCG_plotBar(data.frame( estimate=runif(10,0,10), FDR=runif(10,0,1), nD=10, overlap=as.integer(runif(10,0,30)), group="g", dbname=seq_len(10)))
KYCG_plotBar(data.frame( estimate=runif(10,0,10), FDR=runif(10,0,1), nD=10, overlap=as.integer(runif(10,0,30)), group="g", dbname=seq_len(10)))
The input data frame should have an "estimate" and a "FDR" columns.
KYCG_plotDot( df, y = "-log10(FDR)", n = 20, order_by = "FDR", title = "Enriched Knowledgebases", label_by = "dbname", size_by = "overlap", color_by = "estimate", short_label = FALSE )
KYCG_plotDot( df, y = "-log10(FDR)", n = 20, order_by = "FDR", title = "Enriched Knowledgebases", label_by = "dbname", size_by = "overlap", color_by = "estimate", short_label = FALSE )
df |
KYCG result data frame |
y |
the column to be plotted on y-axis |
n |
number of CG groups to plot |
order_by |
the column by which CG groups are ordered |
title |
plot title |
label_by |
the column for label |
size_by |
the column by which CG group size plot |
color_by |
the column by which CG groups are colored |
short_label |
omit group in label |
Top CG groups are determined by estimate (descending order).
grid plot object (by ggplot)
KYCG_plotDot(data.frame( estimate=runif(10,0,10), FDR=runif(10,0,1), nD=runif(10,10,20), overlap=as.integer(runif(10,0,30)), group="g", dbname=seq_len(10)))
KYCG_plotDot(data.frame( estimate=runif(10,0,10), FDR=runif(10,0,1), nD=runif(10,10,20), overlap=as.integer(runif(10,0,30)), group="g", dbname=seq_len(10)))
plot enrichment test result
KYCG_plotEnrichAll( df, fdr_max = 25, n_label = 15, min_estimate = 0, short_label = TRUE )
KYCG_plotEnrichAll( df, fdr_max = 25, n_label = 15, min_estimate = 0, short_label = TRUE )
df |
test enrichment result data frame |
fdr_max |
maximum fdr for capping |
n_label |
number of database to label |
min_estimate |
minimum estimate |
short_label |
use short label |
grid object
query <- getDBs("MM285.designGroup")[["PGCMeth"]] res <- testEnrichment(query, platform="MM285") KYCG_plotEnrichAll(res)
query <- getDBs("MM285.designGroup")[["PGCMeth"]] res <- testEnrichment(query, platform="MM285") KYCG_plotEnrichAll(res)
creates a lollipop plot of log(estimate) given data with fields estimate.
KYCG_plotLollipop(df, label_column = "dbname", n = 20)
KYCG_plotLollipop(df, label_column = "dbname", n = 20)
df |
DataFrame where each row is a database name with its estimate. |
label_column |
column in df to be used as the label (default: dbname) |
n |
Integer representing the number of top enrichments to report. Optional. (Default: 10) |
ggplot lollipop plot
KYCG_plotLollipop(data.frame( estimate=runif(10,0,10), FDR=runif(10,0,1), nD=runif(10,10,20), overlap=as.integer(runif(10,0,30)), group="g", dbname=as.character(seq_len(10))))
KYCG_plotLollipop(data.frame( estimate=runif(10,0,10), FDR=runif(10,0,1), nD=runif(10,10,20), overlap=as.integer(runif(10,0,30)), group="g", dbname=as.character(seq_len(10))))
KYCG_plotManhattan makes a manhattan plot to summarize EWAS results
KYCG_plotManhattan( vals, platform = NULL, genome = NULL, title = NULL, rasterize = FALSE, rasterize_thres = 3, label_min = 100, col = c("wheat1", "sienna3"), ylabel = "Value" )
KYCG_plotManhattan( vals, platform = NULL, genome = NULL, title = NULL, rasterize = FALSE, rasterize_thres = 3, label_min = 100, col = c("wheat1", "sienna3"), ylabel = "Value" )
vals |
named vector of values (P,Q etc), vector name is Probe ID. |
platform |
String corresponding to the type of platform to use for retrieving GRanges coordinates of probes. Either MM285, EPIC, HM450, or HM27. If it is not provided, it will be inferred from the query set probeIDs (Default: NA). |
genome |
hg38, mm10, ..., will infer if not given. For additional mapping, download the GRanges object from http://zwdzwd.github.io/InfiniumAnnotation and provide the following argument ..., genome = sesameAnno_buildManifestGRanges("downloaded_file"),... to this function. |
title |
title for plot |
rasterize |
if true use ggrastr to rasterize non-significant data. |
rasterize_thres |
the threshold of rasterize |
label_min |
Threshold above which data points will be labelled with Probe ID |
col |
color |
ylabel |
y-axis label |
a ggplot object
## see vignette for examples
## see vignette for examples
Plot meta gene or other meta genomic features
KYCG_plotMeta(betas, platform = NULL)
KYCG_plotMeta(betas, platform = NULL)
betas |
a named numeric vector or a matrix (row: probes; column: samples) |
platform |
if not given and x is a SigDF, will be inferred the meta features |
a grid plot object
library(sesameData) library(sesame) sdf <- sesameDataGet("EPIC.1.SigDF") KYCG_plotMeta(getBetas(sdf))
library(sesameData) library(sesame) sdf <- sesameDataGet("EPIC.1.SigDF") KYCG_plotMeta(getBetas(sdf))
Plot meta gene or other meta genomic features
KYCG_plotMetaEnrichment(result_list)
KYCG_plotMetaEnrichment(result_list)
result_list |
one or a list of testEnrichment |
a grid plot object
cg_lists <- getDBs("MM285.TFBS") queries <- cg_lists[(sapply(cg_lists, length) > 40000)] result_list <- lapply(queries, testEnrichment, "MM285.metagene", silent=TRUE, platform="MM285") KYCG_plotMetaEnrichment(result_list)
cg_lists <- getDBs("MM285.TFBS") queries <- cg_lists[(sapply(cg_lists, length) > 40000)] result_list <- lapply(queries, testEnrichment, "MM285.metagene", silent=TRUE, platform="MM285") KYCG_plotMetaEnrichment(result_list)
Plot point range for a list of enrichment testing results against the same set of databases
KYCG_plotPointRange(result_list)
KYCG_plotPointRange(result_list)
result_list |
a list of testEnrichment resultsx |
grid plot object
## pick some big TFBS-overlapping CpG groups cg_lists <- getDBs("MM285.TFBS") queries <- cg_lists[(sapply(cg_lists, length) > 40000)] result_list <- lapply(queries, testEnrichment, "MM285.chromHMM", platform="MM285") KYCG_plotPointRange(result_list)
## pick some big TFBS-overlapping CpG groups cg_lists <- getDBs("MM285.TFBS") queries <- cg_lists[(sapply(cg_lists, length) > 40000)] result_list <- lapply(queries, testEnrichment, "MM285.chromHMM", platform="MM285") KYCG_plotPointRange(result_list)
Plot Set Enrichment
KYCG_plotSetEnrichment(result, n_sample = 1000, n_presence = 200)
KYCG_plotSetEnrichment(result, n_sample = 1000, n_presence = 200)
result |
result object as returned from an element of the list of testEnrichmentSEA(..., prepPlot=TRUE) |
n_sample |
number of CpGs to sample |
n_presence |
number of overlap to sample for the plot |
grid object for plot
query <- getDBs("KYCG.MM285.designGroup")[["VMR"]] db <- getDBs("MM285.seqContextN", "distToTSS") res <- testEnrichmentSEA(query, db, prepPlot = TRUE) KYCG_plotSetEnrichment(res[[1]])
query <- getDBs("KYCG.MM285.designGroup")[["VMR"]] db <- getDBs("MM285.seqContextN", "distToTSS") res <- testEnrichmentSEA(query, db, prepPlot = TRUE) KYCG_plotSetEnrichment(res[[1]])
creates a volcano plot of -log2(p.value) and log(estimate) given data with fields estimate and p.value.
KYCG_plotVolcano(df, label_by = "dbname", alpha = 0.05)
KYCG_plotVolcano(df, label_by = "dbname", alpha = 0.05)
df |
DataFrame where each field is a database name with two fields for the estimate and p.value. |
label_by |
column in df to be used as the label (default: dbname) |
alpha |
Float representing the cut-off alpha value for the plot. Optional. (Default: 0.05) |
ggplot volcano plot
KYCG_plotVolcano(data.frame( estimate=runif(10,0,10), FDR=runif(10,0,1), nD=runif(10,10,20), overlap=as.integer(runif(10,0,30)), group="g", dbname=seq_len(10)))
KYCG_plotVolcano(data.frame( estimate=runif(10,0,10), FDR=runif(10,0,1), nD=runif(10,10,20), overlap=as.integer(runif(10,0,30)), group="g", dbname=seq_len(10)))
create a waterfall plot of log(estimate) given test enrichment
KYCG_plotWaterfall( df, order_by = "Log2(OR)", size_by = "-log10(FDR)", label_by = "dbname", n_label = 10 )
KYCG_plotWaterfall( df, order_by = "Log2(OR)", size_by = "-log10(FDR)", label_by = "dbname", n_label = 10 )
df |
data frame where each row is a database with test enrichment result |
order_by |
the column by which CG groups are ordered |
size_by |
the column by which CG group size plot |
label_by |
column in df to be used as the label (default: dbname) |
n_label |
number of datapoints to label |
grid
library(SummarizedExperiment) library(sesameData) df <- rowData(sesameDataGet('MM285.tissueSignature')) query <- df$Probe_ID[df$branch == "fetal_brain" & df$type == "Hypo"] results <- testEnrichment(query, "TFBS", platform="MM285") KYCG_plotWaterfall(results)
library(SummarizedExperiment) library(sesameData) df <- rowData(sesameDataGet('MM285.tissueSignature')) query <- df$Probe_ID[df$branch == "fetal_brain" & df$type == "Hypo"] results <- testEnrichment(query, "TFBS", platform="MM285") KYCG_plotWaterfall(results)
List database group names
listDBGroups(filter = NULL, path = NULL, type = NULL)
listDBGroups(filter = NULL, path = NULL, type = NULL)
filter |
keywords for filtering |
path |
file path to downloaded knowledgebase sets |
type |
categorical, numerical (default: all) |
a list of db group names
head(listDBGroups("chromHMM")) ## or listDBGroups(path = "~/Downloads")
head(listDBGroups("chromHMM")) ## or listDBGroups(path = "~/Downloads")
testEnrichment tests for the enrichment of query in knowledgebase sets
testEnrichment( query, databases = NULL, universe = NULL, alternative = "greater", include_genes = FALSE, platform = NULL, silent = FALSE )
testEnrichment( query, databases = NULL, universe = NULL, alternative = "greater", include_genes = FALSE, platform = NULL, silent = FALSE )
query |
For array input, it is a vector of probes of interest (e.g., significant differential methylated probes). For sequencing data input, it expect the file name for YAME-compressed CG sets. |
databases |
List of vectors corresponding to the database sets of interest with associated meta data as an attribute to each element. Optional. (Default: NA) |
universe |
Vector of probes in the universe set containing all of the probes to be considered in the test. If it is not provided, it will be inferred from the provided platform. (Default: NA). |
alternative |
"two.sided", "greater", or "less" |
include_genes |
include gene link enrichment testing |
platform |
String corresponding to the type of platform to use. Either MM285, EPIC, HM450, or HM27. If it is not provided, it will be inferred from the query set probeIDs (Default: NA). |
silent |
output message? (Default: FALSE) |
A data frame containing features corresponding to the test estimate, p-value, and type of test.
library(SummarizedExperiment) sesameData::sesameDataCache(data_titles= c("MM285.tissueSignature","KYCG.MM285.chromHMM.20210210","MM285.address")) df <- rowData(sesameData::sesameDataGet("MM285.tissueSignature")) probes <- df$Probe_ID[df$branch == "B_cell"] res <- testEnrichment(probes, "chromHMM", platform="MM285") sesameData::sesameDataGet_resetEnv() ## Not run: # Define temporary directory and file URLs temp_dir <- tempdir() knowledgebase <- file.path(temp_dir, "ChromHMM.20220414.cm") query <- file.path(temp_dir, "single_cell_10_samples.cg") # URLs for the knowledgebase and query files knowledgebase_url <- "https://github.com/zhou-lab/KYCGKB_mm10/raw/refs/heads/main/ChromHMM.20220414.cm" query_url <- "https://github.com/zhou-lab/YAME/raw/refs/heads/main/test/input/single_cell_10_samples.cg" # Download the files download.file(knowledgebase_url, destfile = knowledgebase) download.file(query_url, destfile = query) # Confirm file download list.files(temp_dir) res = testEnrichment(query, knowledgebase) ## End(Not run)
library(SummarizedExperiment) sesameData::sesameDataCache(data_titles= c("MM285.tissueSignature","KYCG.MM285.chromHMM.20210210","MM285.address")) df <- rowData(sesameData::sesameDataGet("MM285.tissueSignature")) probes <- df$Probe_ID[df$branch == "B_cell"] res <- testEnrichment(probes, "chromHMM", platform="MM285") sesameData::sesameDataGet_resetEnv() ## Not run: # Define temporary directory and file URLs temp_dir <- tempdir() knowledgebase <- file.path(temp_dir, "ChromHMM.20220414.cm") query <- file.path(temp_dir, "single_cell_10_samples.cg") # URLs for the knowledgebase and query files knowledgebase_url <- "https://github.com/zhou-lab/KYCGKB_mm10/raw/refs/heads/main/ChromHMM.20220414.cm" query_url <- "https://github.com/zhou-lab/YAME/raw/refs/heads/main/test/input/single_cell_10_samples.cg" # Download the files download.file(knowledgebase_url, destfile = knowledgebase) download.file(query_url, destfile = query) # Confirm file download list.files(temp_dir) res = testEnrichment(query, knowledgebase) ## End(Not run)
Test enrichment from YAME-compressed CG sets
testEnrichment2( query_fn, knowledge_fn, universe_fn = NULL, alternative = "greater" )
testEnrichment2( query_fn, knowledge_fn, universe_fn = NULL, alternative = "greater" )
query_fn |
File path to query |
knowledge_fn |
File path to knowledgebase |
universe_fn |
optional file path to universe |
alternative |
greater, less |
A single concatenated string.
if (.Platform$OS.type!="windows") { kfn = system.file("extdata", "chromhmm.cm", package = "knowYourCG") qfn = system.file("extdata", "onecell.cg", package = "knowYourCG") testEnrichment2(qfn, kfn) }
if (.Platform$OS.type!="windows") { kfn = system.file("extdata", "chromhmm.cm", package = "knowYourCG") qfn = system.file("extdata", "onecell.cg", package = "knowYourCG") testEnrichment2(qfn, kfn) }
Estimates log2 Odds ratio
testEnrichmentFisher(query, database, universe, alternative = "greater")
testEnrichmentFisher(query, database, universe, alternative = "greater")
query |
Vector of probes of interest (e.g., significant probes) |
database |
Vectors corresponding to the database set of interest with associated meta data as an attribute to each element. |
universe |
Vector of probes in the universe set containing all of |
alternative |
greater or two.sided (default: greater) the probes to be considered in the test. (Default: NULL) |
A DataFrame with the estimate/statistic, p-value, and name of test for the given results.
estimate represent enrichment score and negative estimate indicate a test for depletion
testEnrichmentSEA( query, databases, platform = NULL, silent = FALSE, precise = FALSE, prepPlot = FALSE )
testEnrichmentSEA( query, databases, platform = NULL, silent = FALSE, precise = FALSE, prepPlot = FALSE )
query |
query, if numerical, expect categorical database, if categorical expect numerical database |
databases |
database, numerical or categorical, but needs to be different from query |
platform |
EPIC, MM285, ..., infer if not given |
silent |
suppress message (default: FALSE) |
precise |
whether to compute precise p-value (up to numerical limit) of interest. |
prepPlot |
return the raw enrichment scores and presence vectors for plotting |
A DataFrame with the estimate/statistic, p-value, and name of test for the given results.
sesameData::sesameDataCache(data_titles= c("KYCG.MM285.designGroup.20210210","KYCG.MM285.seqContextN.20210630", "probeIDSignature")) query <- getDBs("KYCG.MM285.designGroup")[["TSS"]] res <- testEnrichmentSEA(query, "MM285.seqContextN")
sesameData::sesameDataCache(data_titles= c("KYCG.MM285.designGroup.20210210","KYCG.MM285.seqContextN.20210630", "probeIDSignature")) query <- getDBs("KYCG.MM285.designGroup")[["TSS"]] res <- testEnrichmentSEA(query, "MM285.seqContextN")
testEnrichmentSpearman uses the Spearman statistical test to estimate the association between two continuous variables.
testEnrichmentSpearman(num_query, num_db)
testEnrichmentSpearman(num_query, num_db)
num_query |
named numeric vector of probes of interest where names are probe IDs (e.g significant probes) |
num_db |
List of vectors corresponding to the database set of interest with associated meta data as an attribute to each element. |
A DataFrame with the estimate/statistic, p-value, and name of test for the given results.
estimate represent enrichment score and negative estimate indicate a test for depletion
testGO(probeIDs, platform = NULL, organism = "hsapiens", gene_name = TRUE, ...)
testGO(probeIDs, platform = NULL, organism = "hsapiens", gene_name = TRUE, ...)
probeIDs |
Vector of CpG probes IDs or a data frame with gene_name column, usually the output of testEnrichment() function |
platform |
EPIC, MM285, ..., infer if not given |
organism |
The organism corresponding to the CpG platform or genes in gene_name column |
gene_name |
If query is data frame from testEnrichment output, whether to use the gene_name column. If set to FALSE, TFBS will be used (default: FALSE) |
... |
Additional arguments to sesameData_getGenesByProbes and gost() |
A list of enriched terms and meta data from gprofiler2 output
library(SummarizedExperiment) sesameData::sesameDataCache(data_titles= c("MM285.tissueSignature","probeIDSignature", "MM285.address","genomeInfo.mm10")) df <- rowData(sesameData::sesameDataGet('MM285.tissueSignature')) query <- df$Probe_ID[df$branch == "fetal_liver" & df$type == "Hypo"] res <- testGO(query,platform="MM285")
library(SummarizedExperiment) sesameData::sesameDataCache(data_titles= c("MM285.tissueSignature","probeIDSignature", "MM285.address","genomeInfo.mm10")) df <- rowData(sesameData::sesameDataGet('MM285.tissueSignature')) query <- df$Probe_ID[df$branch == "fetal_liver" & df$type == "Hypo"] res <- testGO(query,platform="MM285")
testProbeProximity tests if a query set of probes share closer genomic proximity than if randomly distributed
testProbeProximity( probeIDs, gr = NULL, platform = NULL, iterations = 100, bin_size = 1500 )
testProbeProximity( probeIDs, gr = NULL, platform = NULL, iterations = 100, bin_size = 1500 )
probeIDs |
Vector of probes of interest (e.g., significant probes) |
gr |
GRanges to draw samples and compute genomic distances |
platform |
String corresponding to the type of platform to use. Either MM285, EPIC, HM450, or HM27. If it is not provided, it will be inferred from the query set probeIDs (Default: NA). |
iterations |
Number of random samples to generate null distribution (Default: 100). |
bin_size |
the poisson interval size for computing neighboring hits |
list containing a dataframe for the poisson statistics and a data frame for the probes in close proximity
sesameData::sesameDataCache(data_titles= c("MM285.tissueSignature","MM285.address","probeIDSignature")) library(SummarizedExperiment) df <- rowData(sesameData::sesameDataGet("MM285.tissueSignature")) probes <- df$Probe_ID[df$branch == "B_cell"] res <- testProbeProximity(probeIDs=probes,platform="MM285") sesameData::sesameDataGet_resetEnv()
sesameData::sesameDataCache(data_titles= c("MM285.tissueSignature","MM285.address","probeIDSignature")) library(SummarizedExperiment) df <- rowData(sesameData::sesameDataGet("MM285.tissueSignature")) probes <- df$Probe_ID[df$branch == "B_cell"] res <- testProbeProximity(probeIDs=probes,platform="MM285") sesameData::sesameDataGet_resetEnv()