Title: | MOSt frequently used and useful Differential Expression Functions |
---|---|
Description: | This package provides functionality to run a number of tasks in the differential expression analysis workflow. This encompasses the most widely used steps, from running various enrichment analysis tools with a unified interface to creating plots and beautifying table components linking to external websites and databases. This streamlines the generation of comprehensive analysis reports. |
Authors: | Leon Dammer [aut] , Federico Marini [aut, cre] |
Maintainer: | Federico Marini <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.3.0 |
Built: | 2024-11-01 06:32:53 UTC |
Source: | https://github.com/bioc/mosdef |
Printing some info before the enrichment runs
.info_enrichrun(n_de, n_de_selected, de_type, res_de = NULL)
.info_enrichrun(n_de, n_de_selected, de_type, res_de = NULL)
n_de |
Numeric, number of DE genes (in total) |
n_de_selected |
Character vector, containing the selected DE genes |
de_type |
Character string, specifying up/down/both direction of DE regulation |
res_de |
The |
Prints out an informative summary message.
# .info_enrichrun(10, length(c("geneA", "geneB")), "up")
# .info_enrichrun(10, length(c("geneA", "geneB")), "up")
A function to turn Gene Symbols into buttons in an Rmarkdown linking to various portals for further info about these genes.
buttonifier( df, create_buttons_to = c("PUBMED", "GC", "UNIPROT"), col_to_use = "SYMBOL", output_format = "DT", ens_col = NULL, ens_species = NULL )
buttonifier( df, create_buttons_to = c("PUBMED", "GC", "UNIPROT"), col_to_use = "SYMBOL", output_format = "DT", ens_col = NULL, ens_species = NULL )
df |
A dataframe with at least on column with gene Symbols named: SYMBOL |
create_buttons_to |
At least one of: "GC", "NCBI", "GTEX", "UNIPROT", "dbPTM", "HPA" "PUBMED" |
col_to_use |
name of the columns were the gene symbols are stored. Default is SYMBOL |
output_format |
a parameter deciding which output format to return,
either a "DT" ( |
ens_col |
Character string, name of the columns were the ENSEMBL IDs are stored. |
ens_species |
The species you are working with to link to the correct gene on ENSEMBL |
Current supported portals are: GeneCards, NCBI, GTEx, Uniprot, dbPTM, Human Protein Atlas
A data.frame or a DT::datatable
object with columns adding HTML
objects that link to websites with further information on the genes in
question.
data(res_de_macrophage, package = "mosdef") res_de <- res_macrophage_IFNg_vs_naive res_df <- deresult_to_df(res_de) ## Subsetting for quicker run res_df <- res_df[1:100, ] buttonifier(res_df) buttonifier(res_df, create_buttons_to = c("NCBI", "HPA"), ens_col = "id", ens_species = "Homo_sapiens" )
data(res_de_macrophage, package = "mosdef") res_de <- res_macrophage_IFNg_vs_naive res_df <- deresult_to_df(res_de) ## Subsetting for quicker run res_df <- res_df[1:100, ] buttonifier(res_df) buttonifier(res_df, create_buttons_to = c("NCBI", "HPA"), ens_col = "id", ens_species = "Homo_sapiens" )
Link to dbPTM database
create_link_dbPTM(val)
create_link_dbPTM(val)
val |
Character, the gene symbol |
HTML for an action button
create_link_dbPTM("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_dbPTM(res_macrophage_IFNg_vs_naive$SYMBOL)
create_link_dbPTM("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_dbPTM(res_macrophage_IFNg_vs_naive$SYMBOL)
Link to ENSEMBL database
create_link_ENSEMBL(val, species = "Mus_musculus")
create_link_ENSEMBL(val, species = "Mus_musculus")
val |
Character, the gene symbol |
species |
The species to be analyzed e.g "Mus_musculus" |
HTML for an action button
create_link_ENSEMBL("ENSMUSG00000024406") data(res_de_macrophage, package = "mosdef") rownames(res_macrophage_IFNg_vs_naive) <- create_link_ENSEMBL( rownames(res_macrophage_IFNg_vs_naive))
create_link_ENSEMBL("ENSMUSG00000024406") data(res_de_macrophage, package = "mosdef") rownames(res_macrophage_IFNg_vs_naive) <- create_link_ENSEMBL( rownames(res_macrophage_IFNg_vs_naive))
Link to the GeneCards database
create_link_GeneCards(val)
create_link_GeneCards(val)
val |
Character, the gene symbol of interest |
HTML for an action button
create_link_GeneCards("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_GeneCards(res_macrophage_IFNg_vs_naive$SYMBOL)
create_link_GeneCards("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_GeneCards(res_macrophage_IFNg_vs_naive$SYMBOL)
Link to AMIGO database
create_link_GO(val)
create_link_GO(val)
val |
Character, the GOID |
HTML for an action button
create_link_GO("GO:0008150")
create_link_GO("GO:0008150")
Link to the GTEx Portal
create_link_GTEX(val)
create_link_GTEX(val)
val |
Character, the gene symbol of interest |
HTML for an action button
create_link_GTEX("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_GTEX(res_macrophage_IFNg_vs_naive$SYMBOL)
create_link_GTEX("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_GTEX(res_macrophage_IFNg_vs_naive$SYMBOL)
Link to the Human Protein Atlas
create_link_HPA(val)
create_link_HPA(val)
val |
Character, the gene symbol |
HTML for an action button
create_link_HPA("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_HPA(res_macrophage_IFNg_vs_naive$SYMBOL)
create_link_HPA("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_HPA(res_macrophage_IFNg_vs_naive$SYMBOL)
Link to NCBI database
create_link_NCBI(val)
create_link_NCBI(val)
val |
Character, the gene symbol |
HTML for an action button
create_link_NCBI("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_NCBI(res_macrophage_IFNg_vs_naive$SYMBOL)
create_link_NCBI("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_NCBI(res_macrophage_IFNg_vs_naive$SYMBOL)
Link to Pubmed
create_link_PubMed(val)
create_link_PubMed(val)
val |
Character, the gene symbol |
HTML for an action button
create_link_PubMed("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_PubMed(res_macrophage_IFNg_vs_naive$SYMBOL)
create_link_PubMed("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_PubMed(res_macrophage_IFNg_vs_naive$SYMBOL)
Link to UniProt database
create_link_UniProt(val)
create_link_UniProt(val)
val |
Character, the gene symbol |
HTML for an action button
create_link_UniProt("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_UniProt(res_macrophage_IFNg_vs_naive$SYMBOL)
create_link_UniProt("Oct4") data(res_de_macrophage, package = "mosdef") res_macrophage_IFNg_vs_naive$SYMBOL <- create_link_UniProt(res_macrophage_IFNg_vs_naive$SYMBOL)
Beautifying the aspect and looks of a DE results table
de_table_painter( res_de, rounding_digits = NULL, signif_digits = NULL, up_DE_color = "darkred", down_DE_color = "navyblue", logfc_column = "log2FoldChange", basemean_column = "baseMean", lfcse_column = "lfcSE", stat_column = "stat", pvalue_column = "pvalue", padj_column = "padj" )
de_table_painter( res_de, rounding_digits = NULL, signif_digits = NULL, up_DE_color = "darkred", down_DE_color = "navyblue", logfc_column = "log2FoldChange", basemean_column = "baseMean", lfcse_column = "lfcSE", stat_column = "stat", pvalue_column = "pvalue", padj_column = "padj" )
res_de |
An object containing the results of the Differential Expression
analysis workflow (e.g. |
rounding_digits |
Numeric value, specifying the number of digits to round the numeric values of the DE table (except the p-values) |
signif_digits |
Numeric value, specifying the number of significant digits to display for the p-values in the DE table |
up_DE_color |
Character string, specifying the color to use for coloring the bar of upregulated genes. |
down_DE_color |
Character string, specifying the color to use for coloring the bar of downregulated genes. |
logfc_column |
Character string, defining the name of the column in which to find the log2 fold change. |
basemean_column |
Character string, defining the name of the column in which to find the average expression value. |
lfcse_column |
Character string, defining the name of the column in which to find the standard error of the log2 fold change. |
stat_column |
Character string, defining the name of the column in which to find the values of the test statistic. |
pvalue_column |
Character string, defining the name of the column in which to find the unadjusted p-values. |
padj_column |
Character string, defining the name of the column in which to find the adjusted p-values. |
Feeding on the classical results of DE workflows, this function formats and tries to prettify the representation of the key values in it.
A datatable
object, ready to be rendered as a widget inside an
analysis Rmarkdown report.
data(res_de_macrophage, package = "mosdef") de_table_painter(res_macrophage_IFNg_vs_naive, rounding_digits = 3, signif_digits = 5) ## It is also possible to pass the "buttonified" table, res_df_small <- deresult_to_df(res_macrophage_IFNg_vs_naive)[1:100, ] buttonified_df <- buttonifier(res_df_small, create_buttons_to = c("NCBI", "HPA"), ens_col = "id", ens_species = "Homo_sapiens", output_format = "DF" ) de_table_painter(buttonified_df, rounding_digits = 3, signif_digits = 5)
data(res_de_macrophage, package = "mosdef") de_table_painter(res_macrophage_IFNg_vs_naive, rounding_digits = 3, signif_digits = 5) ## It is also possible to pass the "buttonified" table, res_df_small <- deresult_to_df(res_macrophage_IFNg_vs_naive)[1:100, ] buttonified_df <- buttonifier(res_df_small, create_buttons_to = c("NCBI", "HPA"), ens_col = "id", ens_species = "Homo_sapiens", output_format = "DF" ) de_table_painter(buttonified_df, rounding_digits = 3, signif_digits = 5)
This function generates a base volcanoplot for differentially expressed genes that can then be expanded upon using further ggplot functions.
de_volcano( res_de, mapping = "org.Mm.eg.db", logfc_cutoff = 1, FDR = 0.05, labeled_genes = 30 )
de_volcano( res_de, mapping = "org.Mm.eg.db", logfc_cutoff = 1, FDR = 0.05, labeled_genes = 30 )
res_de |
An object containing the results of the Differential Expression
analysis workflow (e.g. |
mapping |
Which |
logfc_cutoff |
A numeric value that sets the cutoff for the xintercept argument of ggplot |
FDR |
The pvalue threshold to us for counting genes as de and therefore also where to draw the line in the plot. Default is 0.05 |
labeled_genes |
A numeric value describing the amount of genes to be labeled. This uses the Top(x) highest differentially expressed genes |
A ggplot2
volcano plot object that can be extended upon by the
user
library("ggplot2") library("RColorBrewer") library("ggrepel") library("DESeq2") library("org.Hs.eg.db") data(res_de_macrophage, package = "mosdef") p <- de_volcano(res_macrophage_IFNg_vs_naive, logfc_cutoff = 1, labeled_genes = 20, mapping = "org.Hs.eg.db" ) p
library("ggplot2") library("RColorBrewer") library("ggrepel") library("DESeq2") library("org.Hs.eg.db") data(res_de_macrophage, package = "mosdef") p <- de_volcano(res_macrophage_IFNg_vs_naive, logfc_cutoff = 1, labeled_genes = 20, mapping = "org.Hs.eg.db" ) p
DESeq2
resultsGenerate a tidy table with the results of DESeq2
deresult_to_df(res_de, FDR = NULL)
deresult_to_df(res_de, FDR = NULL)
res_de |
An object containing the results of the Differential Expression
analysis workflow (e.g. |
FDR |
Numeric value, specifying the significance level for thresholding adjusted p-values. Defaults to NULL, which would return the full set of results without performing any subsetting based on FDR. |
A tidy data.frame
with the results from differential expression,
sorted by adjusted p-value. If FDR is specified, the table contains only
genes with adjusted p-value smaller than the value.
library("DESeq2") library("macrophage") data(res_de_macrophage, package = "mosdef") head(res_macrophage_IFNg_vs_naive) res_df <- deresult_to_df(res_macrophage_IFNg_vs_naive) head(res_df)
library("DESeq2") library("macrophage") data(res_de_macrophage, package = "mosdef") head(res_macrophage_IFNg_vs_naive) res_df <- deresult_to_df(res_macrophage_IFNg_vs_naive) head(res_df)
Plot expression values (e.g. normalized counts) for a gene of interest, grouped by experimental group(s) of interest
gene_plot( de_container, gene, intgroup = "condition", assay = "counts", annotation_obj = NULL, normalized = TRUE, transform = TRUE, labels_display = TRUE, labels_repel = TRUE, plot_type = "auto", return_data = FALSE )
gene_plot( de_container, gene, intgroup = "condition", assay = "counts", annotation_obj = NULL, normalized = TRUE, transform = TRUE, labels_display = TRUE, labels_repel = TRUE, plot_type = "auto", return_data = FALSE )
de_container |
An object containing the data for a Differential
Expression workflow (e.g. |
gene |
Character, specifies the identifier of the feature (gene) to be plotted |
intgroup |
A character vector of names in |
assay |
Character, specifies with assay of the |
annotation_obj |
A |
normalized |
Logical value, whether the expression values should be
normalized by their size factor. Defaults to TRUE, applies when |
transform |
Logical value, corresponding whether to have log scale y-axis or not. Defaults to TRUE. |
labels_display |
Logical value. Whether to display the labels of samples, defaults to TRUE. |
labels_repel |
Logical value. Whether to use |
plot_type |
Character, one of "auto", "jitteronly", "boxplot", "violin",
or "sina". Defines the type of |
return_data |
Logical, whether the function should just return the data.frame of expression values and covariates for custom plotting. Defaults to FALSE. |
The result of this function can be fed directly to plotly::ggplotly()
for interactive visualization, instead of the static ggplot
viz.
A ggplot
object
library("macrophage") library("DESeq2") library("org.Hs.eg.db") # dds object data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] # dds_macrophage <- DESeq(dds_macrophage) # annotation object anno_df <- data.frame( gene_id = rownames(dds_macrophage), gene_name = mapIds(org.Hs.eg.db, keys = rownames(dds_macrophage), column = "SYMBOL", keytype = "ENSEMBL" ), stringsAsFactors = FALSE, row.names = rownames(dds_macrophage) ) gene_plot( de_container = dds_macrophage, gene = "ENSG00000125347", intgroup = "condition", annotation_obj = anno_df )
library("macrophage") library("DESeq2") library("org.Hs.eg.db") # dds object data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] # dds_macrophage <- DESeq(dds_macrophage) # annotation object anno_df <- data.frame( gene_id = rownames(dds_macrophage), gene_name = mapIds(org.Hs.eg.db, keys = rownames(dds_macrophage), column = "SYMBOL", keytype = "ENSEMBL" ), stringsAsFactors = FALSE, row.names = rownames(dds_macrophage) ) gene_plot( de_container = dds_macrophage, gene = "ENSG00000125347", intgroup = "condition", annotation_obj = anno_df )
Assembles information, in HTML format, regarding a gene symbol identifier
geneinfo_to_html(gene_id, res_de = NULL, col_to_use = "SYMBOL")
geneinfo_to_html(gene_id, res_de = NULL, col_to_use = "SYMBOL")
gene_id |
Character specifying the gene identifier for which to retrieve information |
res_de |
An object containing the results of the Differential Expression
analysis workflow (e.g. |
col_to_use |
The column of your res_de object containing the gene symbols. Default is "SYMBOL" |
Creates links to the NCBI and the GeneCards databases
HTML content related to a gene identifier, to be displayed in web applications (or inserted in Rmd documents)
geneinfo_to_html("ACTB") geneinfo_to_html("Pf4")
geneinfo_to_html("ACTB") geneinfo_to_html("Pf4")
Get an annotation data frame from org db packages
get_annotation_orgdb( de_container, orgdb_package, id_type, key_for_genenames = "SYMBOL" )
get_annotation_orgdb( de_container, orgdb_package, id_type, key_for_genenames = "SYMBOL" )
de_container |
An object containing the data for a Differential
Expression workflow (e.g. |
orgdb_package |
Character string, named as the |
id_type |
Character, the ID type of the genes as in the row names of
the |
key_for_genenames |
Character, corresponding to the column name for the
key in the orgDb package containing the official gene name (often called
gene symbol).
This parameter defaults to "SYMBOL", but can be adjusted in case the key is not
found in the annotation package (e.g. for |
A data frame to be used for annotation of genes, with the main
information encoded in the gene_id
and gene_name
columns.
library("macrophage") library("DESeq2") library("org.Hs.eg.db") # dds object data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) anno_df <- get_annotation_orgdb(dds_macrophage, "org.Hs.eg.db", "ENSEMBL") head(anno_df)
library("macrophage") library("DESeq2") library("org.Hs.eg.db") # dds object data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) anno_df <- get_annotation_orgdb(dds_macrophage, "org.Hs.eg.db", "ENSEMBL") head(anno_df)
Extract expression values, with the possibility to select other assay slots
get_expr_values( de_container, gene, intgroup, assay = "counts", normalized = TRUE )
get_expr_values( de_container, gene, intgroup, assay = "counts", normalized = TRUE )
de_container |
An object containing the data for a Differential
Expression workflow (e.g. |
gene |
Character, specifies the identifier of the feature (gene) to be extracted |
intgroup |
A character vector of names in |
assay |
Character, specifies with assay of the |
normalized |
Logical value, whether the expression values should be
normalized by their size factor. Defaults to TRUE, applies when |
A tidy data.frame with the expression values and covariates for further processing
library("macrophage") library("DESeq2") library("org.Hs.eg.db") library("AnnotationDbi") # dds object data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] # dds_macrophage <- DESeq(dds_macrophage) df_exp <- get_expr_values( de_container = dds_macrophage, gene = "ENSG00000125347", intgroup = "condition" ) head(df_exp)
library("macrophage") library("DESeq2") library("org.Hs.eg.db") library("AnnotationDbi") # dds object data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] # dds_macrophage <- DESeq(dds_macrophage) df_exp <- get_expr_values( de_container = dds_macrophage, gene = "ENSG00000125347", intgroup = "condition" ) head(df_exp)
Assembles information, in HTML format, regarding a Gene Ontology identifier
go_to_html(go_id, res_enrich = NULL)
go_to_html(go_id, res_enrich = NULL)
go_id |
Character, specifying the GeneOntology identifier for which to retrieve information |
res_enrich |
A |
Also creates a link to the AmiGO database
HTML content related to a GeneOntology identifier, to be displayed in web applications (or inserted in Rmd documents)
go_to_html("GO:0002250") go_to_html("GO:0043368")
go_to_html("GO:0002250") go_to_html("GO:0043368")
Generates a volcano plot using ggplot2 This function generates a base volcano plot highlighting genes associated with a certain GOterm that can then be expanded upon using further ggplot functions.
go_volcano( res_de, res_enrich, mapping = "org.Hs.eg.db", term_index, logfc_cutoff = 1, FDR = 0.05, col_to_use = NULL, enrich_col = "genes", gene_col_separator = ",", down_col = "black", up_col = "black", highlight_col = "tomato", n_overlaps = 20 )
go_volcano( res_de, res_enrich, mapping = "org.Hs.eg.db", term_index, logfc_cutoff = 1, FDR = 0.05, col_to_use = NULL, enrich_col = "genes", gene_col_separator = ",", down_col = "black", up_col = "black", highlight_col = "tomato", n_overlaps = 20 )
res_de |
An object containing the results of the Differential Expression
analysis workflow (e.g. |
res_enrich |
A enrichment result object created by for example using
|
mapping |
Which |
term_index |
The location (row) of your GO term of interest in your enrichment result |
logfc_cutoff |
A numeric value that sets the cutoff for the xintercept argument of ggplot |
FDR |
The pvalue threshold to us for counting genes as de and therefore also where to draw the line in the plot. Default is 0.05 |
col_to_use |
The column in your differential expression results containing your gene symbols. If you don't have one it is created automatically |
enrich_col |
column name from your res_enrich where the genes associated
with your GOterm are stored (for example see the |
gene_col_separator |
The separator used to split the genes.
If you used topGO or goseq this is a "," which is the default. (For an
example see the |
down_col |
The colour for your downregulated genes, default is "gray" |
up_col |
The colour for your upregulated genes, default is "gray" |
highlight_col |
The colour for the genes associated with your GOterm default is "tomato" |
n_overlaps |
Number of overlaps ggrepel is supposed to allow when labeling (for more info check ggrepel documentation) |
A ggplot2
volcano plot object that can be extended upon by the user
library("org.Hs.eg.db") data(res_de_macrophage, package = "mosdef") data(res_enrich_macrophage_topGO, package = "mosdef") p <- go_volcano( res_macrophage_IFNg_vs_naive, res_enrich = res_enrich_macrophage_topGO, term_index = 1, logfc_cutoff = 1, mapping = "org.Hs.eg.db", n_overlaps = 20 ) p
library("org.Hs.eg.db") data(res_de_macrophage, package = "mosdef") data(res_enrich_macrophage_topGO, package = "mosdef") p <- go_volcano( res_macrophage_IFNg_vs_naive, res_enrich = res_enrich_macrophage_topGO, term_index = 1, logfc_cutoff = 1, mapping = "org.Hs.eg.db", n_overlaps = 20 ) p
Maps numeric continuous values to values in a color palette
map_to_color(x, pal, symmetric = TRUE, limits = NULL)
map_to_color(x, pal, symmetric = TRUE, limits = NULL)
x |
A character vector of numeric values (e.g. log2FoldChange values) to be converted to a vector of colors |
pal |
A vector of characters specifying the definition of colors for the
palette, e.g. obtained via |
symmetric |
Logical value, whether to return a palette which is symmetrical
with respect to the minimum and maximum values - "respecting" the zero.
Defaults to |
limits |
A vector containing the limits of the values to be mapped. If
not specified, defaults to the range of values in the |
A vector of colors, each corresponding to an element in the original vector
a <- 1:9 pal <- RColorBrewer::brewer.pal(9, "Set1") map_to_color(a, pal) plot(a, col = map_to_color(a, pal), pch = 20, cex = 4) b <- 1:50 pal2 <- grDevices::colorRampPalette( RColorBrewer::brewer.pal(name = "RdYlBu", 11) )(50) plot(b, col = map_to_color(b, pal2), pch = 20, cex = 3)
a <- 1:9 pal <- RColorBrewer::brewer.pal(9, "Set1") map_to_color(a, pal) plot(a, col = map_to_color(a, pal), pch = 20, cex = 4) b <- 1:50 pal2 <- grDevices::colorRampPalette( RColorBrewer::brewer.pal(name = "RdYlBu", 11) )(50) plot(b, col = map_to_color(b, pal2), pch = 20, cex = 3)
de_container
contains everything you needA function checking if your de_container
contains everything you need
mosdef_de_container_check(de_container, verbose = FALSE)
mosdef_de_container_check(de_container, verbose = FALSE)
de_container |
An object containing the data for a Differential
Expression workflow (e.g. |
verbose |
Logical, whether to add messages telling the user which steps were taken. |
An invisible NULL
after performing the checks
library("macrophage") library("DESeq2") data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] # dds_macrophage <- DESeq(dds_macrophage) mosdef_de_container_check(dds_macrophage)
library("macrophage") library("DESeq2") data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] # dds_macrophage <- DESeq(dds_macrophage) mosdef_de_container_check(dds_macrophage)
A function checking if your res_de contains everything you need
mosdef_res_check(res_de, verbose = FALSE)
mosdef_res_check(res_de, verbose = FALSE)
res_de |
An object containing the results of the Differential Expression
analysis workflow (e.g. |
verbose |
Logical, whether to add messages telling the user which steps were taken |
An invisible NULL
after performing the checks
data(res_de_macrophage, package = "mosdef") mosdef_res_check(res_macrophage_IFNg_vs_naive)
data(res_de_macrophage, package = "mosdef") mosdef_res_check(res_macrophage_IFNg_vs_naive)
Pairwise scatter plot matrix and correlation plot of counts
pair_corr(df, log = TRUE, method = "pearson", use_subset = TRUE)
pair_corr(df, log = TRUE, method = "pearson", use_subset = TRUE)
df |
A data frame, containing the (raw/normalized/transformed) counts |
log |
Logical, whether to convert the input values to log2 (with addition of a pseudocount). Defaults to TRUE. |
method |
Character string, one of |
use_subset |
Logical value. If TRUE, only 1000 values per sample will be used to speed up the plotting operations. |
A plot with pairwise scatter plots and correlation coefficients
library("macrophage") library("DESeq2") data(gse, package = "macrophage") ## dds object dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) dds_macrophage <- estimateSizeFactors(dds_macrophage) ## Using just a subset for the example pair_corr(counts(dds_macrophage, normalized = TRUE)[1:100, 1:8])
library("macrophage") library("DESeq2") data(gse, package = "macrophage") ## dds object dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) dds_macrophage <- estimateSizeFactors(dds_macrophage) ## Using just a subset for the example pair_corr(counts(dds_macrophage, normalized = TRUE)[1:100, 1:8])
MA-plot from base means and log fold changes, in the ggplot2 framework, with additional support to annotate genes if provided.
plot_ma( res_de, FDR = 0.05, point_alpha = 0.2, sig_color = "red", annotation_obj = NULL, draw_y0 = TRUE, hlines = NULL, title = NULL, xlab = "mean of normalized counts - log10 scale", ylim = NULL, add_rug = TRUE, intgenes = NULL, intgenes_color = "steelblue", labels_intgenes = TRUE, labels_repel = TRUE )
plot_ma( res_de, FDR = 0.05, point_alpha = 0.2, sig_color = "red", annotation_obj = NULL, draw_y0 = TRUE, hlines = NULL, title = NULL, xlab = "mean of normalized counts - log10 scale", ylim = NULL, add_rug = TRUE, intgenes = NULL, intgenes_color = "steelblue", labels_intgenes = TRUE, labels_repel = TRUE )
res_de |
An object containing the results of the Differential Expression
analysis workflow (e.g. |
FDR |
Numeric value, the significance level for thresholding adjusted p-values |
point_alpha |
Alpha transparency value for the points (0 = transparent, 1 = opaque) |
sig_color |
Color to use to mark differentially expressed genes. Defaults to red |
annotation_obj |
A |
draw_y0 |
Logical, whether to draw the horizontal line at y=0. Defaults to TRUE. |
hlines |
The y coordinate (in absolute value) where to draw horizontal lines, optional |
title |
A title for the plot, optional |
xlab |
X axis label, defaults to "mean of normalized counts - log10 scale" |
ylim |
Vector of two numeric values, Y axis limits to restrict the view |
add_rug |
Logical, whether to add rug plots in the margins |
intgenes |
Vector of genes of interest. Gene symbols if a |
intgenes_color |
The color to use to mark the genes on the main plot. |
labels_intgenes |
Logical, whether to add the gene identifiers/names close to the marked plots |
labels_repel |
Logical, whether to use |
The genes of interest are to be provided as gene symbols if a symbol
column is provided in res_de
, or else by using the identifiers
specified in the row names
An object created by ggplot
data(res_de_macrophage, package = "mosdef") plot_ma(res_macrophage_IFNg_vs_naive, FDR = 0.05, hlines = 1) plot_ma(res_macrophage_IFNg_vs_naive, FDR = 0.1, intgenes = c( "ENSG00000103196", # CRISPLD2 "ENSG00000120129", # DUSP1 "ENSG00000163884", # KLF15 "ENSG00000179094" # PER1 ) )
data(res_de_macrophage, package = "mosdef") plot_ma(res_macrophage_IFNg_vs_naive, FDR = 0.05, hlines = 1) plot_ma(res_macrophage_IFNg_vs_naive, FDR = 0.1, intgenes = c( "ENSG00000103196", # CRISPLD2 "ENSG00000120129", # DUSP1 "ENSG00000163884", # KLF15 "ENSG00000179094" # PER1 ) )
A sample enrichment object, generated in the mosdef
and clusterProfiler
framework
An enrichResult
object
This enrichment object is on the data from the macrophage
package
Specifically, this set of enrichment results was created using the
Biological Process ontology, mapping the gene identifiers through the
org.Hs.eg.db
package.
Details on how this object has been created are included in the
create_mosdef_data.R
script, included in the (installed) inst/scripts
folder of the mosdef
package. This is also available at
https://github.com/imbeimainz/mosdef/blob/devel/inst/scripts/create_mosdef_data.R
Alasoo, et al. "Shared genetic effects on chromatin and gene expression indicate a role for enhancer priming in immune response", Nature Genetics, January 2018 doi: 10.1038/s41588-018-0046-7.
A sample enrichment object, generated in the mosdef
and goseq
framework
A data.frame
object
This enrichment object is on the data from the macrophage
package
Specifically, this set of enrichment results was created using the
Biological Process ontology, mapping the gene symbol identifiers through the
org.Hs.eg.db
package - the gene length information is retrieved by the
internal routines of goseq
.
Details on how this object has been created are included in the
create_mosdef_data.R
script, included in the (installed) inst/scripts
folder of the mosdef
package. This is also available at
https://github.com/imbeimainz/mosdef/blob/devel/inst/scripts/create_mosdef_data.R
Alasoo, et al. "Shared genetic effects on chromatin and gene expression indicate a role for enhancer priming in immune response", Nature Genetics, January 2018 doi: 10.1038/s41588-018-0046-7.
A sample enrichment object, generated in the mosdef
and topGO
framework
A data.frame
object
This enrichment object is on the data from the macrophage
package.
Specifically, this set of enrichment results was created using the
Biological Process ontology, mapping the gene symbol identifiers through the
org.Hs.eg.db
package.
Details on how this object has been created are included in the
create_mosdef_data.R
script, included in the (installed) inst/scripts
folder of the mosdef
package. This is also available at
https://github.com/imbeimainz/mosdef/blob/devel/inst/scripts/create_mosdef_data.R
Alasoo, et al. "Shared genetic effects on chromatin and gene expression indicate a role for enhancer priming in immune response", Nature Genetics, January 2018 doi: 10.1038/s41588-018-0046-7.
DESeqResults
objectA sample DESeqResults
object, generated in the DESeq2
framework
A DESeqResults
object
This DESeqResults
object is on the data from the macrophage
package. This result set has been created by setting the design to
~line + condition
to detect the effect of the condition
while accounting
for the different cell line
s included.
Specifically, this object contains the differences between the IFNg
vs
naive
samples, testing against a logFC threshold of 1 for robustness.
Details on how this object has been created are included in the
create_mosdef_data.R
script, included in the (installed) inst/scripts
folder of the mosdef
package. This is also available at
https://github.com/imbeimainz/mosdef/blob/devel/inst/scripts/create_mosdef_data.R
Alasoo, et al. "Shared genetic effects on chromatin and gene expression indicate a role for enhancer priming in immune response", Nature Genetics, January 2018 doi: 10.1038/s41588-018-0046-7.
A wrapper for extracting functional GO terms enriched in a list of (DE) genes, based on the algorithm and the implementation in the clusterProfiler package
run_cluPro( de_container = NULL, res_de = NULL, de_genes = NULL, bg_genes = NULL, top_de = NULL, FDR_threshold = 0.05, min_counts = 0, mapping = "org.Hs.eg.db", de_type = "up_and_down", keyType = "SYMBOL", verbose = TRUE, ... )
run_cluPro( de_container = NULL, res_de = NULL, de_genes = NULL, bg_genes = NULL, top_de = NULL, FDR_threshold = 0.05, min_counts = 0, mapping = "org.Hs.eg.db", de_type = "up_and_down", keyType = "SYMBOL", verbose = TRUE, ... )
de_container |
An object containing the data for a Differential
Expression workflow (e.g. |
res_de |
An object containing the results of the Differential Expression
analysis workflow (e.g. |
de_genes |
A vector of (differentially expressed) genes |
bg_genes |
A vector of background genes, e.g. all (expressed) genes in the assays |
top_de |
numeric, how many of the top differentially expressed genes to use for the enrichment analysis. Attempts to reduce redundancy. Assumes the data is sorted by padj (default in DESeq2). |
FDR_threshold |
The pvalue threshold to us for counting genes as de. Default is 0.05 |
min_counts |
numeric, min number of counts a gene needs to have to be included in the geneset that the de genes are compared to. Default is 0, recommended only for advanced users. |
mapping |
Which |
de_type |
One of: 'up', 'down', or 'up_and_down' Which genes to use for GOterm calculations |
keyType |
Gene format to input into enrichGO from clusterProfiler. If res_de and de_container are used use "SYMBOL" for more information check the enrichGO documentation |
verbose |
Logical, whether to add messages telling the user which steps were taken |
... |
Further parameters to use for the |
A table containing the computed GO Terms and related enrichment scores.
clusterProfiler::enrichGO()
for the underlying method
Other Enrichment functions:
run_goseq()
,
run_topGO()
library("macrophage") library("DESeq2") data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] dds_macrophage <- DESeq(dds_macrophage) data(res_de_macrophage, package = "mosdef") library("AnnotationDbi") library("org.Hs.eg.db") library("clusterProfiler") CluProde_macrophage <- run_cluPro( res_de = res_macrophage_IFNg_vs_naive, de_container = dds_macrophage, mapping = "org.Hs.eg.db" )
library("macrophage") library("DESeq2") data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] dds_macrophage <- DESeq(dds_macrophage) data(res_de_macrophage, package = "mosdef") library("AnnotationDbi") library("org.Hs.eg.db") library("clusterProfiler") CluProde_macrophage <- run_cluPro( res_de = res_macrophage_IFNg_vs_naive, de_container = dds_macrophage, mapping = "org.Hs.eg.db" )
A wrapper for extracting functional GO terms enriched in a list of (DE) genes, based on the algorithm and the implementation in the goseq package
run_goseq( de_container = NULL, res_de = NULL, de_genes = NULL, bg_genes = NULL, top_de = NULL, FDR_threshold = 0.05, min_counts = 0, genome = "hg38", id = "ensGene", de_type = "up_and_down", testCats = c("GO:BP", "GO:MF", "GO:CC"), mapping = "org.Hs.eg.db", add_gene_to_terms = TRUE, verbose = TRUE )
run_goseq( de_container = NULL, res_de = NULL, de_genes = NULL, bg_genes = NULL, top_de = NULL, FDR_threshold = 0.05, min_counts = 0, genome = "hg38", id = "ensGene", de_type = "up_and_down", testCats = c("GO:BP", "GO:MF", "GO:CC"), mapping = "org.Hs.eg.db", add_gene_to_terms = TRUE, verbose = TRUE )
de_container |
An object containing the data for a Differential
Expression workflow (e.g. |
res_de |
An object containing the results of the Differential Expression
analysis workflow (e.g. |
de_genes |
A vector of (differentially expressed) genes |
bg_genes |
A vector of background genes, e.g. all (expressed) genes in the assays |
top_de |
numeric, how many of the top differentially expressed genes to use for the enrichment analysis. Attempts to reduce redundancy. Assumes the data is sorted by padj (default in DESeq2). |
FDR_threshold |
The pvalue threshold to us for counting genes as de. Default is 0.05 |
min_counts |
numeric, min number of counts a gene needs to have to be included in the geneset that the de genes are compared to. Default is 0, recommended only for advanced users. |
genome |
A string identifying the genome that genes refer to, as in the
|
id |
A string identifying the gene identifier used by genes, as in the
|
de_type |
One of: 'up', 'down', or 'up_and_down' Which genes to use for GOterm calculations: upregulated, downregulated or both |
testCats |
A vector specifying which categories to test for overrepresentation amongst DE genes - can be any combination of "GO:CC", "GO:BP", "GO:MF" & "KEGG" |
mapping |
Character string, named as the |
add_gene_to_terms |
Logical, whether to add a column with all genes annotated to each GO term |
verbose |
Logical, whether to add messages telling the user which steps were taken |
Note: the feature length retrieval is based on the goseq::goseq()
function, and requires that the corresponding TxDb packages are installed
and available
A table containing the computed GO Terms and related enrichment scores
goseq::goseq()
for the underlying method
Other Enrichment functions:
run_cluPro()
,
run_topGO()
library("macrophage") library("DESeq2") data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] dds_macrophage <- DESeq(dds_macrophage) data(res_de_macrophage, package = "mosdef") res_de <- res_macrophage_IFNg_vs_naive mygo <- run_goseq( res_de = res_macrophage_IFNg_vs_naive, de_container = dds_macrophage, mapping = "org.Hs.eg.db", testCats = "GO:BP", add_gene_to_terms = TRUE ) head(mygo)
library("macrophage") library("DESeq2") data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] dds_macrophage <- DESeq(dds_macrophage) data(res_de_macrophage, package = "mosdef") res_de <- res_macrophage_IFNg_vs_naive mygo <- run_goseq( res_de = res_macrophage_IFNg_vs_naive, de_container = dds_macrophage, mapping = "org.Hs.eg.db", testCats = "GO:BP", add_gene_to_terms = TRUE ) head(mygo)
A wrapper for extracting functional GO terms enriched in the DE genes, based on the algorithm and the implementation in the topGO package
run_topGO( de_container = NULL, res_de = NULL, de_genes = NULL, bg_genes = NULL, top_de = NULL, FDR_threshold = 0.05, min_counts = 0, ontology = "BP", annot = annFUN.org, mapping = "org.Mm.eg.db", gene_id = "symbol", full_names_in_rows = TRUE, add_gene_to_terms = TRUE, de_type = "up_and_down", topGO_method2 = "elim", do_padj = FALSE, verbose = TRUE )
run_topGO( de_container = NULL, res_de = NULL, de_genes = NULL, bg_genes = NULL, top_de = NULL, FDR_threshold = 0.05, min_counts = 0, ontology = "BP", annot = annFUN.org, mapping = "org.Mm.eg.db", gene_id = "symbol", full_names_in_rows = TRUE, add_gene_to_terms = TRUE, de_type = "up_and_down", topGO_method2 = "elim", do_padj = FALSE, verbose = TRUE )
de_container |
An object containing the data for a Differential
Expression workflow (e.g. |
res_de |
An object containing the results of the Differential Expression
analysis workflow (e.g. |
de_genes |
A vector of (differentially expressed) genes |
bg_genes |
A vector of background genes, e.g. all (expressed) genes in the assays |
top_de |
numeric, how many of the top differentially expressed genes to use for the enrichment analysis. Attempts to reduce redundancy. Assumes the data is sorted by padj (default in DESeq2). |
FDR_threshold |
The pvalue threshold to us for counting genes as de. Default is 0.05 |
min_counts |
numeric, min number of counts a gene needs to have to be included in the geneset that the de genes are compared to. Default is 0, recommended only for advanced users. |
ontology |
Which Gene Ontology domain to analyze: |
annot |
Which function to use for annotating genes to GO terms. Defaults
to |
mapping |
Which |
gene_id |
Which format the genes are provided. Defaults to |
full_names_in_rows |
Logical, whether to display or not the full names for the GO terms |
add_gene_to_terms |
Logical, whether to add a column with all genes annotated to each GO term |
de_type |
One of: 'up', 'down', or 'up_and_down' Which genes to use for GOterm calculations: upregulated, downregulated or both |
topGO_method2 |
Character, specifying which of the methods implemented
by |
do_padj |
Logical, whether to perform the adjustment on the p-values from the specific topGO method, based on the FDR correction. Defaults to FALSE, since the assumption of independent hypotheses is somewhat violated by the intrinsic DAG-structure of the Gene Ontology Terms |
verbose |
Logical, whether to add messages telling the user which steps were taken |
Allowed values assumed by the topGO_method2
parameter are one of the
following: elim
, weight
, weight01
, lea
, parentchild
.
For more details on this, please refer to the original
documentation of the topGO
package itself
A table containing the computed GO Terms and related enrichment scores
topGO::topGOdata-class()
and topGO::runTest()
for the
class objects and underlying methods
Other Enrichment functions:
run_cluPro()
,
run_goseq()
library("macrophage") library("DESeq2") data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] dds_macrophage <- DESeq(dds_macrophage) data(res_de_macrophage, package = "mosdef") library("AnnotationDbi") library("org.Hs.eg.db") library("topGO") topgoDE_macrophage <- run_topGO( de_container = dds_macrophage, res_de = res_macrophage_IFNg_vs_naive, ontology = "BP", mapping = "org.Hs.eg.db", gene_id = "symbol", )
library("macrophage") library("DESeq2") data(gse, package = "macrophage") dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition) rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15) keep <- rowSums(counts(dds_macrophage) >= 10) >= 6 dds_macrophage <- dds_macrophage[keep, ] dds_macrophage <- DESeq(dds_macrophage) data(res_de_macrophage, package = "mosdef") library("AnnotationDbi") library("org.Hs.eg.db") library("topGO") topgoDE_macrophage <- run_topGO( de_container = dds_macrophage, res_de = res_macrophage_IFNg_vs_naive, ontology = "BP", mapping = "org.Hs.eg.db", gene_id = "symbol", )
Style DT color bars for values that diverge from 0.
styleColorBar_divergent(data, color_pos, color_neg)
styleColorBar_divergent(data, color_pos, color_neg)
data |
The numeric vector whose range will be used for scaling the table data from 0-100 before being represented as color bars. A vector of length 2 is acceptable here for specifying a range possibly wider or narrower than the range of the table data itself. |
color_pos |
The color of the bars for the positive values |
color_neg |
The color of the bars for the negative values |
This function draws background color bars behind table cells in a column, width the width of bars being proportional to the column values and the color dependent on the sign of the value.
A typical usage is for values such as log2FoldChange
for tables resulting from
differential expression analysis.
Still, the functionality of this can be quickly generalized to other cases -
see in the examples.
The code of this function is heavily inspired from styleColorBar, and borrows at full hands from an excellent post on StackOverflow - https://stackoverflow.com/questions/33521828/stylecolorbar-center-and-shift-left-right-dependent-on-sign/33524422#33524422
This function generates JavaScript and CSS code from the values specified in R, to be used in DT tables formatting.
# With a very simple data frame simplest_df <- data.frame( a = c(rep("a", 9)), value = c(-4, -3, -2, -1, 0, 1, 2, 3, 4) ) library("DT") DT::datatable(simplest_df) |> formatStyle( "value", background = styleColorBar_divergent( simplest_df$value, scales::alpha("forestgreen", 0.4), scales::alpha("gold", 0.4) ), backgroundSize = "100% 90%", backgroundRepeat = "no-repeat", backgroundPosition = "center" )
# With a very simple data frame simplest_df <- data.frame( a = c(rep("a", 9)), value = c(-4, -3, -2, -1, 0, 1, 2, 3, 4) ) library("DT") DT::datatable(simplest_df) |> formatStyle( "value", background = styleColorBar_divergent( simplest_df$value, scales::alpha("forestgreen", 0.4), scales::alpha("gold", 0.4) ), backgroundSize = "100% 90%", backgroundRepeat = "no-repeat", backgroundPosition = "center" )