Title: | Visualise methylation data from Oxford Nanopore sequencing |
---|---|
Description: | NanoMethViz is a toolkit for visualising methylation data from Oxford Nanopore sequencing. It can be used to explore methylation patterns from reads derived from Oxford Nanopore direct DNA sequencing with methylation called by callers including nanopolish, f5c and megalodon. The plots in this package allow the visualisation of methylation profiles aggregated over experimental groups and across classes of genomic features. |
Authors: | Shian Su [cre, aut] |
Maintainer: | Shian Su <[email protected]> |
License: | Apache License (>= 2.0) |
Version: | 3.3.0 |
Built: | 2024-10-30 08:33:16 UTC |
Source: | https://github.com/bioc/NanoMethViz |
Convert BSseq object to edgeR methylation matrix
bsseq_to_edger(bsseq, regions = NULL)
bsseq_to_edger(bsseq, regions = NULL)
bsseq |
the BSseq object. |
regions |
the regions to calculate log-methylation ratios over. If left NULL, ratios will be calculated per site. |
a matrix compatible with the edgeR differential methylation pipeline
methy <- system.file("methy_subset.tsv.bgz", package = "NanoMethViz") bsseq <- methy_to_bsseq(methy) edger_mat <- bsseq_to_edger(bsseq)
methy <- system.file("methy_subset.tsv.bgz", package = "NanoMethViz") bsseq <- methy_to_bsseq(methy) edger_mat <- bsseq_to_edger(bsseq)
Creates a log-methylation-ratio matrix from a BSseq object that is useful for dimensionality reduction plots.
bsseq_to_log_methy_ratio( bsseq, regions = NULL, prior_count = 2, drop_na = TRUE )
bsseq_to_log_methy_ratio( bsseq, regions = NULL, prior_count = 2, drop_na = TRUE )
bsseq |
the BSseq object. |
regions |
the regions to calculate log-methylation ratios over. If left NULL, ratios will be calculated per site. |
prior_count |
the prior count added to avoid taking log of 0. |
drop_na |
whether to drop rows with all NA values. |
a matrix containing log-methylation-ratios.
nmr <- load_example_nanomethresult() bsseq <- methy_to_bsseq(nmr) regions <- exons_to_genes(NanoMethViz::exons(nmr)) log_m_ratio <- bsseq_to_log_methy_ratio(bsseq, regions)
nmr <- load_example_nanomethresult() bsseq <- methy_to_bsseq(nmr) regions <- exons_to_genes(NanoMethViz::exons(nmr)) log_m_ratio <- bsseq_to_log_methy_ratio(bsseq, regions)
Cluster reads based on methylation
cluster_reads(x, chr, start, end, min_pts = 5)
cluster_reads(x, chr, start, end, min_pts = 5)
x |
a ModBamResult object. |
chr |
the chromosome name where to find the region. |
start |
the start position of the region. |
end |
the end position of the region. |
min_pts |
the minimum number of points needed to form a cluster (default = 10). |
A tibble with information about each read's cluster assignment and read statistics.
Cluster regions by k-means based on their methylation profiles. In order to cluster using k-means the methylation profile of each region is interpolated and sampled at fixed points. The first 10 principal components are used for the k-means clustering. The clustering is best behaved in regions of similar width and CpG density.
cluster_regions(x, regions, centers = 2, grid_method = c("density", "uniform"))
cluster_regions(x, regions, centers = 2, grid_method = c("density", "uniform"))
x |
the NanoMethResult object. |
regions |
a table of regions containing at least columns chr, strand, start and end. |
centers |
number of centers for k-means, identical to the number of output clusters. |
grid_method |
the method for generating the sampling grid. The default option "density" attempts to create a grid with similar density as the data, "uniform" creates a grid of uniform density. |
the table of regions given by the 'regions' argument with the column 'cluster' added.
nmr <- load_example_nanomethresult() gene_anno <- exons_to_genes(NanoMethViz::exons(nmr)) # uniform grid due to low number of input features gene_anno_clustered <- cluster_regions(nmr, gene_anno, centers = 2, grid_method = "uniform") plot_agg_regions(nmr, gene_anno_clustered, group_col = "cluster")
nmr <- load_example_nanomethresult() gene_anno <- exons_to_genes(NanoMethViz::exons(nmr)) # uniform grid due to low number of input features gene_anno_clustered <- cluster_regions(nmr, gene_anno, centers = 2, grid_method = "uniform") plot_agg_regions(nmr, gene_anno_clustered, group_col = "cluster")
Create a tabix file using methylation calls
create_tabix_file( input_files, output_file, samples = extract_file_names(input_files), verbose = TRUE )
create_tabix_file( input_files, output_file, samples = extract_file_names(input_files), verbose = TRUE )
input_files |
the files to convert |
output_file |
the output file to write results to (must end in .bgz) |
samples |
the names of samples corresponding to each file |
verbose |
TRUE if progress messages are to be printed |
invisibly returns the output file path, creates a tabix file (.bgz) and its index (.bgz.tbi)
methy_calls <- system.file(package = "NanoMethViz", c("sample1_nanopolish.tsv.gz", "sample2_nanopolish.tsv.gz")) temp_file <- paste0(tempfile(), ".tsv.bgz") create_tabix_file(methy_calls, temp_file)
methy_calls <- system.file(package = "NanoMethViz", c("sample1_nanopolish.tsv.gz", "sample2_nanopolish.tsv.gz")) temp_file <- paste0(tempfile(), ".tsv.bgz") create_tabix_file(methy_calls, temp_file)
Convert exon annotation to genes
exons_to_genes(x)
exons_to_genes(x)
x |
the exon level annotation containing columns "gene_id", "chr", "strand" and "symbol". |
the gene level annotation where each gene is taken to span the earliest start position and latest end position of its exons.
nmr <- load_example_nanomethresult() exons_to_genes(NanoMethViz::exons(nmr))
nmr <- load_example_nanomethresult() exons_to_genes(NanoMethViz::exons(nmr))
Create a filtered methylation file from an existing one.
filter_methy(x, output_file, ...)
filter_methy(x, output_file, ...)
x |
the path to the methylation file or a NanoMethResult object. |
output_file |
the output file to write results to (must end in .bgz). |
... |
filtering criteria given in dplyr syntax. Use methy_col_names() to get available column names. |
invisibly returns 'output_file' if x is a file path, otherwise returns NanoMethResult object with methy(x) replaced with filtered value.
nmr <- load_example_nanomethresult() output_file <- paste0(tempfile(), ".tsv.bgz") filter_methy(nmr, output_file = output_file, chr == "chrX") filter_methy(methy(nmr), output_file = output_file, chr == "chrX")
nmr <- load_example_nanomethresult() output_file <- paste0(tempfile(), ".tsv.bgz") filter_methy(nmr, output_file = output_file, chr == "chrX") filter_methy(methy(nmr), output_file = output_file, chr == "chrX")
Helper functions are provided for obtaining exon annotations from relevant TxDb packages on Bioconductor for the construction of NanoMethResults objects.
get_cgi_mm10() get_cgi_grcm39() get_cgi_hg19() get_cgi_hg38() get_exons_mm10() get_exons_grcm39() get_exons_hg19() get_exons_hg38()
get_cgi_mm10() get_cgi_grcm39() get_cgi_hg19() get_cgi_hg38() get_exons_mm10() get_exons_grcm39() get_exons_hg19() get_exons_hg38()
tibble (data.frame) object containing exon annotation.
cgi_mm10 <- get_cgi_mm10() cgi_GRCm39 <- get_cgi_grcm39() cgi_hg19 <- get_cgi_hg19() cgi_hg38 <- get_cgi_hg38() mm10_exons <- get_exons_mm10() grcm39_exons <- get_exons_grcm39() hg19_exons <- get_exons_hg19() hg38_exons <- get_exons_hg38()
cgi_mm10 <- get_cgi_mm10() cgi_GRCm39 <- get_cgi_grcm39() cgi_hg19 <- get_cgi_hg19() cgi_hg38 <- get_cgi_hg38() mm10_exons <- get_exons_mm10() grcm39_exons <- get_exons_grcm39() hg19_exons <- get_exons_hg19() hg38_exons <- get_exons_hg38()
This is a small subset of the exons returned by
get_exons_mus_musculus()
for demonstrative purposes. It contains
the exons for the genes Brca1, Brca2, Impact, Meg3, Peg3 and Xist.
get_example_exons_mus_musculus()
get_example_exons_mus_musculus()
data.frame containing exons
example_exons <- get_example_exons_mus_musculus()
example_exons <- get_example_exons_mus_musculus()
Get exon annotations for Homo sapiens (hg19)
get_exons_homo_sapiens()
get_exons_homo_sapiens()
data.frame containing exons
h_sapiens_exons <- get_exons_homo_sapiens()
h_sapiens_exons <- get_exons_homo_sapiens()
Get exon annotations for Mus musculus (mm10)
get_exons_mus_musculus()
get_exons_mus_musculus()
data.frame containing exons
m_musculus_exons <- get_exons_mus_musculus()
m_musculus_exons <- get_exons_mus_musculus()
Load an example ModBamResult object for demonstration of plotting
functions. Run load_example_modbamresult
without the function call to
see how the object is constructed.
load_example_modbamresult()
load_example_modbamresult()
a ModBamResult object
mbr <- load_example_modbamresult()
mbr <- load_example_modbamresult()
Load an example NanoMethResult object for demonstration of plotting
functions. Run load_example_nanomethresults
without the function call to
see how the object is constructed.
load_example_nanomethresult()
load_example_nanomethresult()
a NanoMethResults object
nmr <- load_example_nanomethresult()
nmr <- load_example_nanomethresult()
Column names for methylation data
methy_col_names()
methy_col_names()
column names for methylation data
methy_col_names()
methy_col_names()
Create BSSeq object from methylation tabix file
methy_to_bsseq(methy, out_folder = tempdir(), verbose = TRUE)
methy_to_bsseq(methy, out_folder = tempdir(), verbose = TRUE)
methy |
the NanoMethResult object or path to the methylation tabix file. |
out_folder |
the folder to store intermediate files. One file is created for each sample and contains columns "chr", "pos", "total" and "methylated". |
verbose |
TRUE if progress messages are to be printed |
a BSSeq object.
nmr <- load_example_nanomethresult() bsseq <- methy_to_bsseq(nmr)
nmr <- load_example_nanomethresult() bsseq <- methy_to_bsseq(nmr)
Convert NanoMethResult object to edgeR methylation matrix
methy_to_edger(methy, regions = NULL, out_folder = tempdir(), verbose = TRUE)
methy_to_edger(methy, regions = NULL, out_folder = tempdir(), verbose = TRUE)
methy |
the NanoMethResult object or path to the methylation tabix file. |
regions |
the regions to calculate log-methylation ratios over. If left NULL, ratios will be calculated per site. |
out_folder |
the folder to store intermediate files. One file is created for each sample and contains columns "chr", "pos", "total" and "methylated". |
verbose |
TRUE if progress messages are to be printed |
a matrix compatible with the edgeR differential methylation pipeline
nmr <- load_example_nanomethresult() edger_mat <- methy_to_edger(nmr)
nmr <- load_example_nanomethresult() edger_mat <- methy_to_edger(nmr)
The modbam_to_tabix
function takes a ModBamResult object and
converts it into a tabix file format, which is efficient for indexing and
querying large datasets.
modbam_to_tabix(x, out_file, mod_code = NanoMethViz::mod_code(x))
modbam_to_tabix(x, out_file, mod_code = NanoMethViz::mod_code(x))
x |
the |
out_file |
the path of the output tabix. |
mod_code |
the modification code to use, defaults to 'm' for 5mC methylation. |
The possible tags for mod_code can be found at https://samtools.github.io/hts-specs/SAMtags.pdf under the 'Base modifications' section.
invisibly returns the name of the created tabix file.
out_file <- paste0(tempfile(), ".tsv.bgz") mbr <- ModBamResult( methy = ModBamFiles( samples = "sample1", paths = system.file("peg3.bam", package = "NanoMethViz") ), samples = data.frame( sample = "sample1", group = "group1" ) ) modbam_to_tabix(mbr, out_file)
out_file <- paste0(tempfile(), ".tsv.bgz") mbr <- ModBamResult( methy = ModBamFiles( samples = "sample1", paths = system.file("peg3.bam", package = "NanoMethViz") ), samples = data.frame( sample = "sample1", group = "group1" ) ) modbam_to_tabix(mbr, out_file)
This function creates a ModBamFiles object containing information about the samples and file paths. This constructor checks that the files are readable and have an index.
ModBamFiles(samples, paths) ## S4 method for signature 'ModBamFiles' show(object)
ModBamFiles(samples, paths) ## S4 method for signature 'ModBamFiles' show(object)
samples |
a character vector with the names of the samples. |
paths |
a character vector with the file paths for the BAM files. |
object |
a ModBamFiles object. |
A ModBamFiles object with the sample and path information.
This is a class for holding information about modbam files. It is a data.frame containing information about samples and paths to modbam files.
A ModBamResult object stores modbam data used for NanoMethViz visualisation. It contains stores a ModBamFiles object, sample information and optional exon information. The object is constructed using the ModBamResult() constructor function described in "Usage".
## S4 method for signature 'ModBamResult' methy(object) ## S4 replacement method for signature 'ModBamResult,ModBamFiles' methy(object) <- value ## S4 method for signature 'ModBamResult' samples(object) ## S4 replacement method for signature 'ModBamResult,data.frame' samples(object) <- value ## S4 method for signature 'ModBamResult' exons(object) ## S4 replacement method for signature 'ModBamResult,data.frame' exons(object) <- value ## S4 method for signature 'ModBamResult' mod_code(object) ## S4 replacement method for signature 'ModBamResult,character' mod_code(object) <- value ModBamResult(methy, samples, exons = NULL, mod_code = "m")
## S4 method for signature 'ModBamResult' methy(object) ## S4 replacement method for signature 'ModBamResult,ModBamFiles' methy(object) <- value ## S4 method for signature 'ModBamResult' samples(object) ## S4 replacement method for signature 'ModBamResult,data.frame' samples(object) <- value ## S4 method for signature 'ModBamResult' exons(object) ## S4 replacement method for signature 'ModBamResult,data.frame' exons(object) <- value ## S4 method for signature 'ModBamResult' mod_code(object) ## S4 replacement method for signature 'ModBamResult,character' mod_code(object) <- value ModBamResult(methy, samples, exons = NULL, mod_code = "m")
object |
the ModBamResult object. |
value |
the mod code. |
methy |
a ModBamFiles object. |
samples |
the data.frame of sample annotation containing at least columns sample and group. |
exons |
(optional) the data.frame of exon information containing at least columns gene_id, chr, strand, start, end, transcript_id and symbol. |
mod_code |
a character with the mod code of interest. Defaults to "m" for 5mC. See details for other options. |
The possible tags for mod_code can be found at https://samtools.github.io/hts-specs/SAMtags.pdf under the 'Base modifications' section.
a NanoMethResult object to be used with plotting functions
a ModBamFiles data.frame.
the sample annotation.
the exon annotation.
the mod code.
methy(ModBamResult)
: modbam information getter.
methy(object = ModBamResult) <- value
: modbam information setter.
samples(ModBamResult)
: sample annotation getter.
samples(object = ModBamResult) <- value
: sample annotation setter.
exons(ModBamResult)
: exon annotation getter.
exons(object = ModBamResult) <- value
: exon annotation setter.
mod_code(ModBamResult)
: mod code getter.
mod_code(object = ModBamResult) <- value
: mod code setter.
ModBamResult()
: Constructor
methy
a ModBamFiles data.frame specifying the samples and paths to bam files.
samples
the data.frame of sample annotation containing at least columns sample and group.
exons
the data.frame of exon information containing at least columns gene_id, chr, strand, start, end, transcript_id and symbol.
mod_code
the modification code of interest.
A NanoMethResult object stores data used for NanoMethViz visualisation. It contains stores a path to the methylation data, sample information and optional exon information. The object is constructed using the NanoMethResult() constructor function described in "Usage".
NanoMethResult(methy, samples, exons = NULL) ## S4 method for signature 'NanoMethResult' methy(object) ## S4 replacement method for signature 'NanoMethResult,ANY' methy(object) <- value ## S4 method for signature 'NanoMethResult' samples(object) ## S4 replacement method for signature 'NanoMethResult,data.frame' samples(object) <- value ## S4 method for signature 'NanoMethResult' exons(object) ## S4 replacement method for signature 'NanoMethResult,data.frame' exons(object) <- value
NanoMethResult(methy, samples, exons = NULL) ## S4 method for signature 'NanoMethResult' methy(object) ## S4 replacement method for signature 'NanoMethResult,ANY' methy(object) <- value ## S4 method for signature 'NanoMethResult' samples(object) ## S4 replacement method for signature 'NanoMethResult,data.frame' samples(object) <- value ## S4 method for signature 'NanoMethResult' exons(object) ## S4 replacement method for signature 'NanoMethResult,data.frame' exons(object) <- value
methy |
the path to the methylation tabix file. |
samples |
the data.frame of sample annotation containing at least columns sample and group. |
exons |
(optional) the data.frame of exon information containing at least columns gene_id, chr, strand, start, end, transcript_id and symbol. |
object |
the NanoMethResult object. |
value |
the exon annotation. |
a NanoMethResult object to be used with plotting functions
the path to the methylation data.
the sample annotation.
the exon annotation.
NanoMethResult()
: Constructor
methy(NanoMethResult)
: methylation data path getter.
methy(object = NanoMethResult) <- value
: methylation data path setter.
samples(NanoMethResult)
: sample annotation getter.
samples(object = NanoMethResult) <- value
: sample annotation setter.
exons(NanoMethResult)
: exon annotation getter.
exons(object = NanoMethResult) <- value
: exon annotation setter.
methy
the path to the methylation tabix file.
samples
the data.frame of sample annotation containing at least columns sample and group.
exons
the data.frame of exon information containing at least columns gene_id, chr, strand, start, end, transcript_id and symbol.
methy <- system.file(package = "NanoMethViz", "methy_subset.tsv.bgz") sample <- c( "B6Cast_Prom_1_bl6", "B6Cast_Prom_1_cast", "B6Cast_Prom_2_bl6", "B6Cast_Prom_2_cast", "B6Cast_Prom_3_bl6", "B6Cast_Prom_3_cast" ) group <- c( "bl6", "cast", "bl6", "cast", "bl6", "cast" ) sample_anno <- data.frame(sample, group, stringsAsFactors = FALSE) exon_tibble <- get_example_exons_mus_musculus() NanoMethResult(methy, sample_anno, exon_tibble) x <- load_example_nanomethresult() methy(x)
methy <- system.file(package = "NanoMethViz", "methy_subset.tsv.bgz") sample <- c( "B6Cast_Prom_1_bl6", "B6Cast_Prom_1_cast", "B6Cast_Prom_2_bl6", "B6Cast_Prom_2_cast", "B6Cast_Prom_3_bl6", "B6Cast_Prom_3_cast" ) group <- c( "bl6", "cast", "bl6", "cast", "bl6", "cast" ) sample_anno <- data.frame(sample, group, stringsAsFactors = FALSE) exon_tibble <- get_example_exons_mus_musculus() NanoMethResult(methy, sample_anno, exon_tibble) x <- load_example_nanomethresult() methy(x)
Plot gene aggregate plot
plot_agg_genes( x, genes = NULL, binary_threshold = 0.5, group_col = NULL, flank = 2000, stranded = TRUE, span = 0.05, palette = ggplot2::scale_colour_brewer(palette = "Set1") )
plot_agg_genes( x, genes = NULL, binary_threshold = 0.5, group_col = NULL, flank = 2000, stranded = TRUE, span = 0.05, palette = ggplot2::scale_colour_brewer(palette = "Set1") )
x |
the NanoMethResult object. |
genes |
a character vector of genes to include in aggregate plot, if NULL then all genes are used. |
binary_threshold |
the modification probability such that calls with modification probability above the threshold are considered methylated, and those with probability equal or below are considered unmethylated. |
group_col |
the column to group aggregated trends by. This column can be in from the regions table or samples(x). |
flank |
the number of flanking bases to add to each side of each region. |
stranded |
TRUE if negative strand features should have coordinates flipped to reflect features like transcription start sites. |
span |
the span for loess smoothing. |
palette |
the ggplot colour palette used for groups. |
a ggplot object containing the aggregate methylation trend of genes.
nmr <- load_example_nanomethresult() plot_agg_genes(nmr)
nmr <- load_example_nanomethresult() plot_agg_genes(nmr)
Plot aggregate regions
plot_agg_regions( x, regions, binary_threshold = 0.5, group_col = NULL, flank = 2000, stranded = TRUE, span = 0.05, palette = ggplot2::scale_colour_brewer(palette = "Set1") )
plot_agg_regions( x, regions, binary_threshold = 0.5, group_col = NULL, flank = 2000, stranded = TRUE, span = 0.05, palette = ggplot2::scale_colour_brewer(palette = "Set1") )
x |
the NanoMethResult object. |
regions |
a table of regions containing at least columns chr, strand, start and end. Any additional columns can be used for grouping. |
binary_threshold |
the modification probability such that calls with modification probability above the threshold are considered methylated, and those with probability equal or below are considered unmethylated. |
group_col |
the column to group aggregated trends by. This column can be in from the regions table or samples(x). |
flank |
the number of flanking bases to add to each side of each region. |
stranded |
TRUE if negative strand features should have coordinates flipped to reflect features like transcription start sites. |
span |
the span for loess smoothing. |
palette |
the ggplot colour palette used for groups. |
a ggplot object containing the aggregate methylation trend.
nmr <- load_example_nanomethresult() gene_anno <- exons_to_genes(NanoMethViz::exons(nmr)) plot_agg_regions(nmr, gene_anno) plot_agg_regions(nmr, gene_anno, group_col = "sample") plot_agg_regions(nmr, gene_anno, group_col = "group")
nmr <- load_example_nanomethresult() gene_anno <- exons_to_genes(NanoMethViz::exons(nmr)) plot_agg_regions(nmr, gene_anno) plot_agg_regions(nmr, gene_anno, group_col = "sample") plot_agg_regions(nmr, gene_anno, group_col = "group")
Plot the methylation of a gene symbol specified within the exon(x) slot.
plot_gene(x, gene, ...) ## S4 method for signature 'NanoMethResult,character' plot_gene( x, gene, window_prop = 0.3, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL ) ## S4 method for signature 'ModBamResult,character' plot_gene( x, gene, window_prop = 0.3, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL )
plot_gene(x, gene, ...) ## S4 method for signature 'NanoMethResult,character' plot_gene( x, gene, window_prop = 0.3, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL ) ## S4 method for signature 'ModBamResult,character' plot_gene( x, gene, window_prop = 0.3, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL )
x |
the NanoMethResult or ModBamResult object. |
gene |
the gene symbol for the gene to plot. |
... |
additional arguments. |
window_prop |
the size of flanking region to plot. Can be a vector of two values for left and right window size. Values indicate proportion of gene length. |
anno_regions |
the data.frame of regions to be annotated. |
binary_threshold |
the modification probability such that calls with modification probability above the threshold are set to 1 and probabilities equal to or below the threshold are set to 0. |
avg_method |
the average method for pre-smoothing at each genomic position. Data is pre-smoothed at each genomic position before the smoothed aggregate line is generated for performance reasons. The default is "mean" which corresponds to the average methylation fraction. The alternative "median" option is closer to an average within the more common methylation state. |
spaghetti |
whether or not individual reads should be shown. |
heatmap |
whether or not read-methylation heatmap should be shown. |
heatmap_subsample |
how many packed rows of reads to subsample to. |
smoothing_window |
the window size for smoothing the trend line. |
gene_anno |
whether to show gene annotation. |
palette |
the ggplot colour palette used for groups. |
line_size |
the size of the lines. |
mod_scale |
the scale range for modification probabilities. Default c(0, 1), set to "auto" for automatic limits. |
span |
DEPRECATED, use smoothing_window instead. Will be removed in next version. |
This function plots the methylation data for a given gene. Since V3.0.0 NanoMethViz has changed the smoothing strategy from a loess smoothing to a weighted moving average. This is because the loess smoothing was too computationally expensive for large datasets and had a span parameter that was difficult to tune. The new smoothing strategy is controlled by the smoothing_window argument.
a patchwork plot containing the methylation profile in the specified region.
plot_gene(x = ModBamResult, gene = character)
: S4 method for ModBamResult
nmr <- load_example_nanomethresult() plot_gene(nmr, "Peg3")
nmr <- load_example_nanomethresult() plot_gene(nmr, "Peg3")
Plot the methylation heatmap of a gene symbol specified within the exon(x) slot.
plot_gene_heatmap(x, gene, ...) ## S4 method for signature 'NanoMethResult,character' plot_gene_heatmap( x, gene, window_prop = 0.3, pos_style = c("to_scale", "compact"), subsample = 50 ) ## S4 method for signature 'ModBamResult,character' plot_gene_heatmap( x, gene, window_prop = 0.3, pos_style = c("to_scale", "compact"), subsample = 50 )
plot_gene_heatmap(x, gene, ...) ## S4 method for signature 'NanoMethResult,character' plot_gene_heatmap( x, gene, window_prop = 0.3, pos_style = c("to_scale", "compact"), subsample = 50 ) ## S4 method for signature 'ModBamResult,character' plot_gene_heatmap( x, gene, window_prop = 0.3, pos_style = c("to_scale", "compact"), subsample = 50 )
x |
the NanoMethResult or ModBamResult object. |
gene |
the gene symbol for the gene to plot. |
... |
additional arguments. |
window_prop |
the size of flanking region to plot. Can be a vector of two values for left and right window size. Values indicate proportion of gene length. |
pos_style |
the style for plotting the base positions along the x-axis. Defaults to "to_scale", plotting (potentially) overlapping squares along the genomic position to scale. The "compact" options plots only the positions with measured modification. |
subsample |
the number of read of packed read rows to subsample to. |
a ggplot object of the heatmap
a ggplot plot containing the heatmap.
nmr <- load_example_nanomethresult() plot_gene_heatmap(nmr, "Peg3")
nmr <- load_example_nanomethresult() plot_gene_heatmap(nmr, "Peg3")
Plot GRanges
plot_grange( x, grange, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, gene_anno = TRUE, smoothing_window = 2000, window_prop = 0, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, span = NULL )
plot_grange( x, grange, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, gene_anno = TRUE, smoothing_window = 2000, window_prop = 0, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, span = NULL )
x |
the NanoMethResult object. |
grange |
the GRanges object with one entry. |
anno_regions |
the data.frame of regions to be annotated. |
binary_threshold |
the modification probability such that calls with modification probability above the threshold are set to 1 and probabilities equal to or below the threshold are set to 0. |
avg_method |
the average method for pre-smoothing at each genomic position. Data is pre-smoothed at each genomic position before the smoothed aggregate line is generated for performance reasons. The default is "mean" which corresponds to the average methylation fraction. The alternative "median" option is closer to an average within the more common methylation state. |
spaghetti |
whether or not individual reads should be shown. |
heatmap |
whether or not read-methylation heatmap should be shown. |
heatmap_subsample |
how many packed rows of reads to subsample to. |
gene_anno |
whether to show gene annotation. |
smoothing_window |
the window size for smoothing the trend line. |
window_prop |
the size of flanking region to plot. Can be a vector of two values for left and right window size. Values indicate proportion of gene length. |
palette |
the ggplot colour palette used for groups. |
line_size |
the size of the lines. |
span |
DEPRECATED, use smoothing_window instead. Will be removed in next version. |
a patchwork plot containing the methylation profile in the specified region.
nmr <- load_example_nanomethresult() plot_grange(nmr, GenomicRanges::GRanges("chr7:6703892-6730431"))
nmr <- load_example_nanomethresult() plot_grange(nmr, GenomicRanges::GRanges("chr7:6703892-6730431"))
Plot GRanges heatmap
plot_grange_heatmap( x, grange, pos_style = c("to_scale", "compact"), window_prop = 0, subsample = 50 )
plot_grange_heatmap( x, grange, pos_style = c("to_scale", "compact"), window_prop = 0, subsample = 50 )
x |
the NanoMethResult object. |
grange |
the GRanges object with one entry. |
pos_style |
the style for plotting the base positions along the x-axis. Defaults to "to_scale", plotting (potentially) overlapping squares along the genomic position to scale. The "compact" options plots only the positions with measured modification. |
window_prop |
the size of flanking region to plot. Can be a vector of two values for left and right window size. Values indicate proportion of gene length. |
subsample |
the number of read of packed read rows to subsample to. |
a ggplot plot containing the heatmap.
nmr <- load_example_nanomethresult() plot_grange_heatmap(nmr, GenomicRanges::GRanges("chr7:6703892-6730431"))
nmr <- load_example_nanomethresult() plot_grange_heatmap(nmr, GenomicRanges::GRanges("chr7:6703892-6730431"))
Plot multi-dimensional scaling plot using algorithm of limma::plotMDS(). It is recommended this be done with the log-methylation-ratio matrix generated by bsseq_to_log_methy_ratio().
plot_mds( x, top = 500, plot_dims = c(1, 2), labels = colnames(x), groups = NULL, legend_name = "group" )
plot_mds( x, top = 500, plot_dims = c(1, 2), labels = colnames(x), groups = NULL, legend_name = "group" )
x |
the log-methylation-ratio matrix. |
top |
the number of top genes used to calculate pairwise distances. |
plot_dims |
the numeric vector of the two dimensions to be plotted. |
labels |
the character vector of labels for data points. By default uses column names of x, set to NULL to plot points. |
groups |
the character vector of groups the data points will be coloured by. Colour palette can be adjusted using scale_colour_*() functions from ggplot2. If groups is numeric, the points will be coloured by a continuous colour palette. By default, groups is NULL and the points will not be coloured. |
legend_name |
the name for the legend. |
ggplot object of the MDS plot.
nmr <- load_example_nanomethresult() bss <- methy_to_bsseq(nmr) lmr <- bsseq_to_log_methy_ratio(bss) plot_mds(lmr)
nmr <- load_example_nanomethresult() bss <- methy_to_bsseq(nmr) lmr <- bsseq_to_log_methy_ratio(bss) plot_mds(lmr)
Plot multi-dimensional scaling plot using algorithm of BiocSingular::runPCA(). It is recommended this be done with the log-methylation-ratio matrix generated by bsseq_to_log_methy_ratio().
plot_pca( x, plot_dims = c(1, 2), labels = colnames(x), groups = NULL, legend_name = "group" )
plot_pca( x, plot_dims = c(1, 2), labels = colnames(x), groups = NULL, legend_name = "group" )
x |
the log-methylation-ratio matrix. |
plot_dims |
the numeric vector of the two dimensions to be plotted. |
labels |
the character vector of labels for data points. By default uses column names of x, set to NULL to plot points. |
groups |
the character vector of groups the data points will be coloured by. |
legend_name |
the name for the legend. |
ggplot object of the MDS plot.
nmr <- load_example_nanomethresult() bss <- methy_to_bsseq(nmr) lmr <- bsseq_to_log_methy_ratio(bss) plot_pca(lmr)
nmr <- load_example_nanomethresult() bss <- methy_to_bsseq(nmr) lmr <- bsseq_to_log_methy_ratio(bss) plot_pca(lmr)
Plot the methylation of a genomic region.
plot_region(x, chr, start, end, ...) ## S4 method for signature 'NanoMethResult,character,numeric,numeric' plot_region( x, chr, start, end, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, window_prop = 0, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL ) ## S4 method for signature 'ModBamResult,character,numeric,numeric' plot_region( x, chr, start, end, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, window_prop = 0, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL ) ## S4 method for signature 'NanoMethResult,factor,numeric,numeric' plot_region( x, chr, start, end, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, window_prop = 0, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL ) ## S4 method for signature 'ModBamResult,factor,numeric,numeric' plot_region( x, chr, start, end, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, window_prop = 0, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL )
plot_region(x, chr, start, end, ...) ## S4 method for signature 'NanoMethResult,character,numeric,numeric' plot_region( x, chr, start, end, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, window_prop = 0, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL ) ## S4 method for signature 'ModBamResult,character,numeric,numeric' plot_region( x, chr, start, end, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, window_prop = 0, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL ) ## S4 method for signature 'NanoMethResult,factor,numeric,numeric' plot_region( x, chr, start, end, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, window_prop = 0, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL ) ## S4 method for signature 'ModBamResult,factor,numeric,numeric' plot_region( x, chr, start, end, anno_regions = NULL, binary_threshold = NULL, avg_method = c("mean", "median"), spaghetti = FALSE, heatmap = TRUE, heatmap_subsample = 50, smoothing_window = 2000, gene_anno = TRUE, window_prop = 0, palette = ggplot2::scale_colour_brewer(palette = "Set1"), line_size = 1, mod_scale = c(0, 1), span = NULL )
x |
the NanoMethResult or ModBamResult object. |
chr |
the chromosome to plot. |
start |
the start of the plotting region. |
end |
the end of the plotting region. |
... |
additional arguments. |
anno_regions |
the data.frame of regions to be annotated. |
binary_threshold |
the modification probability such that calls with modification probability above the threshold are set to 1 and probabilities equal to or below the threshold are set to 0. |
avg_method |
the average method for pre-smoothing at each genomic position. Data is pre-smoothed at each genomic position before the smoothed aggregate line is generated for performance reasons. The default is "mean" which corresponds to the average methylation fraction. The alternative "median" option is closer to an average within the more common methylation state. |
spaghetti |
whether or not individual reads should be shown. |
heatmap |
whether or not read-methylation heatmap should be shown. |
heatmap_subsample |
how many packed rows of reads to subsample to. |
smoothing_window |
the window size for smoothing the trend line. |
gene_anno |
whether to show gene annotation. |
window_prop |
the size of flanking region to plot. Can be a vector of two values for left and right window size. Values indicate proportion of gene length. |
palette |
the ggplot colour palette used for groups. |
line_size |
the size of the lines. |
mod_scale |
the scale range for modification probabilities. Default c(0, 1), set to "auto" for automatic limits. |
span |
DEPRECATED, use smoothing_window instead. Will be removed in next version. |
This function plots the methylation data for a given region. The region is specified by chromosome, start and end positions. The basic plot contains a smoothed line plot of the methylation of each experimental group. Since V3.0.0 NanoMethViz has changed the smoothing strategy from a loess smoothing to a weighted moving average. This is because the loess smoothing was too computationally expensive for large datasets and had a span parameter that was difficult to tune. The new smoothing strategy is controlled by the smoothing_window argument.
a patchwork plot containing the methylation profile in the specified region.
nmr <- load_example_nanomethresult() plot_region(nmr, "chr7", 6703892, 6730431)
nmr <- load_example_nanomethresult() plot_region(nmr, "chr7", 6703892, 6730431)
Plot the methylation heatmap of a genomic region.
plot_region_heatmap(x, chr, start, end, ...) ## S4 method for signature 'NanoMethResult,character,numeric,numeric' plot_region_heatmap( x, chr, start, end, pos_style = c("to_scale", "compact"), window_prop = 0, subsample = 50 ) ## S4 method for signature 'ModBamResult,character,numeric,numeric' plot_region_heatmap( x, chr, start, end, pos_style = c("to_scale", "compact"), window_prop = 0, subsample = 50 ) ## S4 method for signature 'NanoMethResult,factor,numeric,numeric' plot_region_heatmap( x, chr, start, end, pos_style = c("to_scale", "compact"), window_prop = 0, subsample = 50 ) ## S4 method for signature 'ModBamResult,factor,numeric,numeric' plot_region_heatmap( x, chr, start, end, pos_style = c("to_scale", "compact"), window_prop = 0, subsample = 50 )
plot_region_heatmap(x, chr, start, end, ...) ## S4 method for signature 'NanoMethResult,character,numeric,numeric' plot_region_heatmap( x, chr, start, end, pos_style = c("to_scale", "compact"), window_prop = 0, subsample = 50 ) ## S4 method for signature 'ModBamResult,character,numeric,numeric' plot_region_heatmap( x, chr, start, end, pos_style = c("to_scale", "compact"), window_prop = 0, subsample = 50 ) ## S4 method for signature 'NanoMethResult,factor,numeric,numeric' plot_region_heatmap( x, chr, start, end, pos_style = c("to_scale", "compact"), window_prop = 0, subsample = 50 ) ## S4 method for signature 'ModBamResult,factor,numeric,numeric' plot_region_heatmap( x, chr, start, end, pos_style = c("to_scale", "compact"), window_prop = 0, subsample = 50 )
x |
the NanoMethResult or ModBamResult object. |
chr |
the chromosome to plot. |
start |
the start of the plotting region. |
end |
the end of the plotting region. |
... |
additional arguments. |
pos_style |
the style for plotting the base positions along the x-axis. Defaults to "to_scale", plotting (potentially) overlapping squares along the genomic position to scale. The "compact" options plots only the positions with measured modification. |
window_prop |
the size of flanking region to plot. Can be a vector of two values for left and right window size. Values indicate proportion of gene length. |
subsample |
the number of read of packed read rows to subsample to. |
a ggplot object of the heatmap.
a ggplot plot containing the heatmap.
nmr <- load_example_nanomethresult() plot_region_heatmap(nmr, "chr7", 6703892, 6730431)
nmr <- load_example_nanomethresult() plot_region_heatmap(nmr, "chr7", 6703892, 6730431)
This function plots a violin plot of the methylation proportion for each region in the regions table. The methylation proportion is calculated as the mean of the modification probability wihtin each region and the violin represents the . The regions are then grouped and coloured by the group_col column in the regions table or samples(x).
plot_violin( x, regions, binary_threshold = 0.5, group_col = "group", palette = ggplot2::scale_colour_brewer(palette = "Set1") )
plot_violin( x, regions, binary_threshold = 0.5, group_col = "group", palette = ggplot2::scale_colour_brewer(palette = "Set1") )
x |
the NanoMethResult object. |
regions |
a table of regions containing at least columns chr, strand, start and end. Any additional columns can be used for grouping. |
binary_threshold |
the modification probability such that calls with modification probability above the threshold are considered methylated, and those with probability equal or below are considered unmethylated. |
group_col |
the column to group aggregated trends by. This column can be in from the regions table or samples(x). |
palette |
the ggplot colour palette used for groups. |
a ggplot object containing the methylation violin plot.
nmr <- load_example_nanomethresult() gene_anno <- exons_to_genes(NanoMethViz::exons(nmr)) plot_violin(nmr, gene_anno) plot_violin(nmr, gene_anno, group_col = "sample")
nmr <- load_example_nanomethresult() gene_anno <- exons_to_genes(NanoMethViz::exons(nmr)) plot_violin(nmr, gene_anno) plot_violin(nmr, gene_anno, group_col = "sample")
Query a data.frame, NanoMethResult or ModBamResult for exon annotation.
query_exons_region(x, chr, start, end) query_exons_gene_id(x, gene_id) query_exons_symbol(x, symbol)
query_exons_region(x, chr, start, end) query_exons_gene_id(x, gene_id) query_exons_symbol(x, symbol)
x |
the object to query. |
chr |
the chromosome to query. |
start |
the start of the query region. |
end |
the end of the query region. |
gene_id |
the gene_id to query. |
symbol |
the gene_id to query. |
data.frame of queried exons.
query_exons_region()
: Query region.
query_exons_gene_id()
: Query gene ID.
query_exons_symbol()
: Query gene symbol.
Query methylation data
query_methy( x, chr, start, end, simplify = TRUE, force = FALSE, truncate = TRUE, site_filter = getOption("NanoMethViz.site_filter", 3L) )
query_methy( x, chr, start, end, simplify = TRUE, force = FALSE, truncate = TRUE, site_filter = getOption("NanoMethViz.site_filter", 3L) )
x |
the NanoMethResults object or a path to the methylation data (tabix-bgzipped). |
chr |
the vector of chromosomes |
start |
the vector of start positions |
end |
the vector of end positions |
simplify |
whether returned results should be row-concatenated |
force |
whether to force empty output when query region 'chr' does not appear in data. Without 'force', an empty result indicates that the requested 'chr' appears in the data but no data overlaps with requested region, and an invalid 'chr' will cause an error. |
truncate |
when querying from ModBamFiles, whether or not to truncate returned results to only those within the specified region. Otherwise methylation data for entire reads overlapping the region will be returned. |
site_filter |
the minimum amount of coverage to report a site. This
filters the queried data such that any site with less than the filter is
not returned. The default is 1, which means that all sites are returned.
This option can be set globally using the |
The argument site_filter
can be set globally using the options(site_filter = ...)
command. The same data entry may appear multiple times in the output
if it overlaps multiple regions.
A table containing the data within the queried regions. If simplify is TRUE (default) then returns all data in a single table, otherwise returns a list of tables where each table is the data from one region.
nmr <- load_example_nanomethresult() query_methy(methy(nmr), "chr7", 6703892, 6730431)
nmr <- load_example_nanomethresult() query_methy(methy(nmr), "chr7", 6703892, 6730431)
Calculate the average methylation probability and prevalence based on specified probability threshold.
region_methy_stats(nmr, regions, threshold = 0.5)
region_methy_stats(nmr, regions, threshold = 0.5)
nmr |
the NanoMethResult object. |
regions |
the table of regions to query statistics for. |
threshold |
the threshold to use for determining methylation calls for the calculation of prevalence. |
table of regions with additional columns of methylation summary statistics.
nmr <- load_example_nanomethresult() gene_anno <- exons_to_genes(NanoMethViz::exons(nmr)) region_methy_stats(nmr, gene_anno)
nmr <- load_example_nanomethresult() gene_anno <- exons_to_genes(NanoMethViz::exons(nmr)) region_methy_stats(nmr, gene_anno)