Title: | Methylation array and sequencing spatial analysis methods |
---|---|
Description: | De novo identification and extraction of differentially methylated regions (DMRs) from the human genome using Whole Genome Bisulfite Sequencing (WGBS) and Illumina Infinium Array (450K and EPIC) data. Provides functionality for filtering probes possibly confounded by SNPs and cross-hybridisation. Includes GRanges generation and plotting functions. |
Authors: | Tim Peters |
Maintainer: | Tim Peters <[email protected]> |
License: | file LICENSE |
Version: | 3.3.0 |
Built: | 2024-10-31 01:31:25 UTC |
Source: | https://github.com/bioc/DMRcate |
De novo identification and extraction of differentially
methylated regions (DMRs) in the human genome using Illumina array and bisulfite sequencing
data. DMRcate
extracts and annotates differentially methylated regions
(DMRs) using a kernel-smoothed estimate. Functions are
provided for filtering probes possibly confounded by SNPs and
cross-hybridisation. Includes GRanges generation and plotting functions.
Tim J. Peters <[email protected]>
Peters TJ, Buckley MJ, Statham A, Pidsley R, Samaras K, Lord RV, Clark SJ and Molloy PL. De novo identification of differentially methylated regions in the human genome. Epigenetics & Chromatin 2015, 8:6, doi:10.1186/1756-8935-8-6
Peters TJ, Buckley MJ, Chen Y, Smyth GK, Goodnow CC and Clark SJ. Calling differentially methylated regions from whole genome bisulphite sequencing with DMRcate. Nucleic Acids Research 2021, 49(19):e109. doi:10.1093/nar/gkab637.
library(ExperimentHub) library(limma) eh <- ExperimentHub() FlowSorted.Blood.EPIC <- eh[["EH1136"]] tcell <- FlowSorted.Blood.EPIC[,colData(FlowSorted.Blood.EPIC)$CD4T==100 | colData(FlowSorted.Blood.EPIC)$CD8T==100] detP <- minfi::detectionP(tcell) remove <- apply(detP, 1, function (x) any(x > 0.01)) tcell <- tcell[!remove,] tcell <- minfi::preprocessFunnorm(tcell) #Subset to chr2 only tcell <- tcell[seqnames(tcell) == "chr2",] tcellms <- minfi::getM(tcell) tcellms.noSNPs <- rmSNPandCH(tcellms, dist=2, mafcut=0.05) tcell$Replicate[tcell$Replicate==""] <- tcell$Sample_Name[tcell$Replicate==""] tcellms.noSNPs <- avearrays(tcellms.noSNPs, tcell$Replicate) tcell <- tcell[,!duplicated(tcell$Replicate)] tcell <- tcell[rownames(tcellms.noSNPs),] colnames(tcellms.noSNPs) <- colnames(tcell) assays(tcell)[["M"]] <- tcellms.noSNPs assays(tcell)[["Beta"]] <- minfi::ilogit2(tcellms.noSNPs) type <- factor(tcell$CellType) design <- model.matrix(~type) myannotation <- cpg.annotate("array", tcell, arraytype = "EPICv1", analysis.type="differential", design=design, coef=2) dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2) results.ranges <- extractRanges(dmrcoutput, genome = "hg19") groups <- c(CD8T="magenta", CD4T="forestgreen") cols <- groups[as.character(type)] DMR.plot(ranges=results.ranges, dmr=1, CpGs=minfi::getBeta(tcell), what="Beta", arraytype = "EPICv1", phen.col=cols, genome="hg19")
library(ExperimentHub) library(limma) eh <- ExperimentHub() FlowSorted.Blood.EPIC <- eh[["EH1136"]] tcell <- FlowSorted.Blood.EPIC[,colData(FlowSorted.Blood.EPIC)$CD4T==100 | colData(FlowSorted.Blood.EPIC)$CD8T==100] detP <- minfi::detectionP(tcell) remove <- apply(detP, 1, function (x) any(x > 0.01)) tcell <- tcell[!remove,] tcell <- minfi::preprocessFunnorm(tcell) #Subset to chr2 only tcell <- tcell[seqnames(tcell) == "chr2",] tcellms <- minfi::getM(tcell) tcellms.noSNPs <- rmSNPandCH(tcellms, dist=2, mafcut=0.05) tcell$Replicate[tcell$Replicate==""] <- tcell$Sample_Name[tcell$Replicate==""] tcellms.noSNPs <- avearrays(tcellms.noSNPs, tcell$Replicate) tcell <- tcell[,!duplicated(tcell$Replicate)] tcell <- tcell[rownames(tcellms.noSNPs),] colnames(tcellms.noSNPs) <- colnames(tcell) assays(tcell)[["M"]] <- tcellms.noSNPs assays(tcell)[["Beta"]] <- minfi::ilogit2(tcellms.noSNPs) type <- factor(tcell$CellType) design <- model.matrix(~type) myannotation <- cpg.annotate("array", tcell, arraytype = "EPICv1", analysis.type="differential", design=design, coef=2) dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2) results.ranges <- extractRanges(dmrcoutput, genome = "hg19") groups <- c(CD8T="magenta", CD4T="forestgreen") cols <- groups[as.character(type)] DMR.plot(ranges=results.ranges, dmr=1, CpGs=minfi::getBeta(tcell), what="Beta", arraytype = "EPICv1", phen.col=cols, genome="hg19")
Takes a CpGannotated-class
object and a specified FDR > 0 and < 1, and re-indexes the object in order to call DMRs at the specified rate.
changeFDR(annot, FDR)
changeFDR(annot, FDR)
annot |
A |
FDR |
The desired individual CpG FDR, which will index the rate at which DMRs are called. |
The number of CpG sites called as significant by this function will set the post-smoothing threshold for DMR constituents in dmrcate
.
A re-indexed CpGannotated-class
object.
Tim Peters <[email protected]>
library(GenomicRanges) stats <- rt(1000, 2) pvals <- pt(-abs(stats), 100) fdrs <- p.adjust(2*pvals, "BH") annotated <- GRanges(rep("chr1", 1000), IRanges(1:1000, 1:1000), stat = stats, rawpval = pvals, diff = 0, ind.fdr = fdrs, is.sig = fdrs < 0.05) names(annotated) <- paste0("CpG_", 1:1000) myannotation <- new("CpGannotated", ranges=annotated) changeFDR(myannotation, 0.1)
library(GenomicRanges) stats <- rt(1000, 2) pvals <- pt(-abs(stats), 100) fdrs <- p.adjust(2*pvals, "BH") annotated <- GRanges(rep("chr1", 1000), IRanges(1:1000, 1:1000), stat = stats, rawpval = pvals, diff = 0, ind.fdr = fdrs, is.sig = fdrs < 0.05) names(annotated) <- paste0("CpG_", 1:1000) myannotation <- new("CpGannotated", ranges=annotated) changeFDR(myannotation, 0.1)
Annotate a matrix/GenomicRatioSet representing EPICv2, EPICv1 or 450K data with probe weights and chromosomal position. Provides replicate filtering and remapping functions for EPICv2 probes.
cpg.annotate(datatype = c("array", "sequencing"), object, what = c("Beta", "M"), arraytype = c("EPICv2", "EPICv1", "EPIC", "450K"), epicv2Remap = TRUE, epicv2Filter = c("mean", "sensitivity", "precision", "random"), analysis.type = c("differential", "variability", "ANOVA", "diffVar"), design, contrasts = FALSE, cont.matrix = NULL, fdr = 0.05, coef, varFitcoef = NULL, topVarcoef = NULL, ...)
cpg.annotate(datatype = c("array", "sequencing"), object, what = c("Beta", "M"), arraytype = c("EPICv2", "EPICv1", "EPIC", "450K"), epicv2Remap = TRUE, epicv2Filter = c("mean", "sensitivity", "precision", "random"), analysis.type = c("differential", "variability", "ANOVA", "diffVar"), design, contrasts = FALSE, cont.matrix = NULL, fdr = 0.05, coef, varFitcoef = NULL, topVarcoef = NULL, ...)
datatype |
Character string representing the type of data being analysed. |
object |
Either: - A matrix of M-values, with unique Illumina probe IDs as rownames and unique sample IDs as column names or, - A GenomicRatioSet, appropriately annotated. |
what |
Does the data matrix contain Beta or M-values? Not needed if object is a GenomicRatioSet. |
arraytype |
Is the data matrix sourced from EPIC or 450K data? Not needed if object is a GenomicRatioSet. |
epicv2Remap |
Logical indicating whether to remap 11,878 cross-hybridising EPICv2 probes to their more likely CpG target (see Peters et al. 2024). |
epicv2Filter |
Strategy for filtering probe replicates that map to the same CpG site.
|
analysis.type |
|
design |
Study design matrix. Identical context to differential analysis
pipeline in |
contrasts |
Logical denoting whether a |
cont.matrix |
|
fdr |
FDR cutoff (Benjamini-Hochberg) for which CpG sites are individually called
as significant. Used to index default thresholding in dmrcate(). Highly
recommended as the primary thresholding parameter for calling DMRs.
Not used when |
coef |
The column index in |
varFitcoef |
The columns of the design matrix containing the comparisons to test for
differential variability. If left |
topVarcoef |
Column number or column name specifying which coefficient of the linear
model fit is of interest. It should be the same coefficient that
the differential variability testing was performed on. Default is last
column of fit object. Identical context to |
... |
Extra arguments passed to the |
Tim J. Peters <[email protected]>
Ritchie, M. E., Phipson, B., Wu, D., Hu, Y., Law, C. W., Shi, W., & Smyth, G. K. (2015). limma powers differential expression analyses for RNA-sequencing and microarray studies. Nucleic Acids Research, 43(7), e47.
Phipson, B., & Oshlack, A. (2014). DiffVar: a new method for detecting differential variability with application to methylation in cancer and aging. Genome Biol, 15(9), 465.
Peters T.J., Buckley M.J., Statham, A., Pidsley R., Samaras K., Lord R.V., Clark S.J. and Molloy P.L. De novo identification of differentially methylated regions in the human genome. Epigenetics & Chromatin 2015, 8:6, doi:10.1186/1756-8935-8-6.
Peters, T.J., Meyer, B., Ryan, L., Achinger-Kawecka, J., Song, J., Campbell, E.M., Qu, W., Nair, S., Loi-Luu, P., Stricker, P., Lim, E., Stirzaker, C., Clark, S.J. and Pidsley, R. (2024). Characterisation and reproducibility of the HumanMethylationEPIC v2.0 BeadChip for DNA methylation profiling. BMC Genomics, 25, 251. doi:10.1186/s12864-024-10027-5.
library(AnnotationHub) ah <- AnnotationHub() EPICv2manifest <- ah[["AH116484"]] object <- minfi::logit2(matrix(rbeta(10000, 3, 1), 1000, 10)) rownames(object) <- sample(rownames(EPICv2manifest), 1000) type <- rep(c("Ctrl", "Treat"), each=5) design <- model.matrix(~type) myannotation <- cpg.annotate("array", object, what = "M", arraytype = "EPICv2", analysis.type="differential", design=design, coef=2)
library(AnnotationHub) ah <- AnnotationHub() EPICv2manifest <- ah[["AH116484"]] object <- minfi::logit2(matrix(rbeta(10000, 3, 1), 1000, 10)) rownames(object) <- sample(rownames(EPICv2manifest), 1000) type <- rep(c("Ctrl", "Treat"), each=5) design <- model.matrix(~type) myannotation <- cpg.annotate("array", object, what = "M", arraytype = "EPICv2", analysis.type="differential", design=design, coef=2)
An S4 class that stores output from either cpg.annotate
or sequencing.annotate
.
ranges
:A GRanges object, containing CpG-level information to be passed to dmrcate
. Mcols of this object include:
- stat: Per-CpG test statistic; t if from limma
or Wald if from DSS
if using differential mode. Variance if using variability mode, sqrt(F) if using ANOVA mode, t if using diffVar mode.
- rawpval: Raw p-value from DMP fitting.
- diff: Methylation difference/coefficient. In beta space for cpg.annotate
output and output passed from DSS::DMLtest()
. In logit space for when a BSseq
object is passed from sequencing.annotate
. Not available for output passed from DSS::DMLtest.multiFactor()
. Not applicable in variability, ANOVA or diffVar modes.
- ind.fdr: False discovery rate as calculated on individual CpG sites.
- is.sig: Logical determining whether a CpG site is individually significant or not. Can be adjusted using changeFDR
.
betas
:A matrix of per-CpG beta values matching the annotated loci.
CpGannotated
objects have a show method that describes the data therein.
Tim Peters <[email protected]>
Plots an individual DMR (in context of possibly other DMRs) as found by dmrcate
.
Heatmaps are shown as well as proximal coding regions, smoothed methylation values
(with an option for smoothed group means) and chromosome ideogram.
DMR.plot(ranges, dmr, CpGs, what = c("Beta", "M"), arraytype = c("EPICv2", "EPICv1", "450K"), phen.col, genome = c("hg19", "hg38", "mm10"), labels = names(ranges), flank = 5000, heatmap = TRUE, extra.ranges = NULL, extra.title = names(extra.ranges))
DMR.plot(ranges, dmr, CpGs, what = c("Beta", "M"), arraytype = c("EPICv2", "EPICv1", "450K"), phen.col, genome = c("hg19", "hg38", "mm10"), labels = names(ranges), flank = 5000, heatmap = TRUE, extra.ranges = NULL, extra.title = names(extra.ranges))
ranges |
A GRanges object (ostensibly created by |
dmr |
Index of |
CpGs |
Either: - A CpGannotated object (preferred), - A matrix of beta values for plotting, with unique Illumina probe IDs as rownames, - A GenomicRatioSet, annotated with the appropriate array and data types, or - A BSseq object containing per-CpG methylation and coverage counts for the samples to be plotted |
what |
Does |
arraytype |
Is |
phen.col |
Vector of colors denoting phenotypes of all samples described in
|
genome |
Reference genome for annotating DMRs. Can be one of |
labels |
Vector of DMR names to be displayed. Defaults to |
flank |
Size, in base pairs, of the plotted region either side of the DMR. Cannot be less than 10bp or greater than 10kb. |
heatmap |
Should the heatmap be plotted? Default is |
extra.ranges |
Optional GRanges object. Will plot any range overlapping a DMR.. |
extra.title |
Vector of names for ranges from |
A plot to the current device.
Tim J. Peters <[email protected]>, Aaron Statham <[email protected]>, Braydon Meyer <[email protected]>
library(GenomicRanges) library(AnnotationHub) ah <- AnnotationHub() EPICv2manifest <- ah[["AH116484"]] dmrranges <- GRanges("chr2:86787856-86793994") probes <- EPICv2manifest$IlmnID[EPICv2manifest$CHR=="chr2" & EPICv2manifest$MAPINFO > 86770000 & EPICv2manifest$MAPINFO < 86810000] probes <- probes[order(EPICv2manifest[probes, "MAPINFO"])] object <- minfi::logit2(matrix(rbeta(length(probes)*10, 3, 1), length(probes), 10)) rownames(object) <- probes object[9:35, 6:10] <- minfi::logit2(matrix(rbeta(135, 1, 3), 27, 5)) cols <- c(rep("forestgreen", 5), rep("magenta", 5)) names(cols) <- rep(c("Ctrl", "Treat"), each=5) DMR.plot(dmrranges, dmr = 1, CpGs=object, what = "M", arraytype="EPICv2", phen.col=cols, genome="hg38")
library(GenomicRanges) library(AnnotationHub) ah <- AnnotationHub() EPICv2manifest <- ah[["AH116484"]] dmrranges <- GRanges("chr2:86787856-86793994") probes <- EPICv2manifest$IlmnID[EPICv2manifest$CHR=="chr2" & EPICv2manifest$MAPINFO > 86770000 & EPICv2manifest$MAPINFO < 86810000] probes <- probes[order(EPICv2manifest[probes, "MAPINFO"])] object <- minfi::logit2(matrix(rbeta(length(probes)*10, 3, 1), length(probes), 10)) rownames(object) <- probes object[9:35, 6:10] <- minfi::logit2(matrix(rbeta(135, 1, 3), 27, 5)) cols <- c(rep("forestgreen", 5), rep("magenta", 5)) names(cols) <- rep(c("Ctrl", "Treat"), each=5) DMR.plot(dmrranges, dmr = 1, CpGs=object, what = "M", arraytype="EPICv2", phen.col=cols, genome="hg38")
The main function of this package. Computes a kernel estimate against a null comparison to identify significantly differentially (or variable) methylated regions.
dmrcate(object, lambda = 1000, C=NULL, pcutoff = "fdr", consec = FALSE, conseclambda = 10, betacutoff = NULL, min.cpgs = 2 )
dmrcate(object, lambda = 1000, C=NULL, pcutoff = "fdr", consec = FALSE, conseclambda = 10, betacutoff = NULL, min.cpgs = 2 )
object |
A |
lambda |
Gaussian kernel bandwidth for smoothed-function estimation. Also informs DMR
bookend definition; gaps >= |
C |
Scaling factor for bandwidth. Gaussian kernel is calculated where
|
pcutoff |
Threshold to determine DMRs. Default implies indexing at the rate of individually significant CpGs and can be set on the |
consec |
Use |
conseclambda |
Bandwidth in CpGs (rather than nucleotides) to use when
|
betacutoff |
Optional filter; removes any region from the results where the absolute mean beta shift is less than the given value. Only available for Illumina array data and results produced from DSS::DMLtest(). |
min.cpgs |
Minimum number of consecutive CpGs constituting a DMR. |
The values of lambda
and C
should be chosen with care. For array data, we currently recommend that half a kilobase represent 1 standard deviation of support (lambda=1000
and C=2
). If lambda
is too small or C
too large then the kernel estimator will not have enough support to significantly differentiate the weighted estimate from the null distribution. If lambda
is too large then dmrcate
will report very long DMRs spanning multiple gene loci, and the large amount of support will likely give Type I errors. If you are concerned about Type I errors we highly
recommend using the default value of pcutoff
, although this will return no DMRs if no DM CpGs are returned by limma/DSS
either.
A DMResults object.
Tim J. Peters <[email protected]>, Mike J. Buckley <[email protected]>, Tim Triche Jr. <[email protected]>
Peters, T. J., Buckley, M.J., Chen, Y., Smyth, G.K., Goodnow, C. C. and Clark, S. J. (2021). Calling differentially methylated regions from whole genome bisulphite sequencing with DMRcate. Nucleic Acids Research, 49(19), e109.
Peters T.J., Buckley M.J., Statham, A., Pidsley R., Samaras K., Lord R.V., Clark S.J. and Molloy P.L. De novo identification of differentially methylated regions in the human genome. Epigenetics & Chromatin 2015, 8:6, doi:10.1186/1756-8935-8-6
Wand, M.P. & Jones, M.C. (1995) Kernel Smoothing. Chapman & Hall.
Duong T. (2013) Local significant differences from nonparametric two-sample tests. Journal of Nonparametric Statistics. 2013 25(3), 635-645.
library(AnnotationHub) library(GenomicRanges) ah <- AnnotationHub() EPICv2manifest <- ah[["AH116484"]] chr21probes <- rownames(EPICv2manifest)[EPICv2manifest$CHR=="chr21"] coords <- EPICv2manifest[chr21probes, "MAPINFO"] stats <- rt(length(chr21probes), 2) pvals <- pt(-abs(stats), 100) fdrs <- p.adjust(2*pvals, "BH") annotated <- GRanges(rep("chr21", length(stats)), IRanges(coords, coords), stat = stats, diff = 0, rawpval = pvals, ind.fdr = fdrs, is.sig = fdrs < 0.05) names(annotated) <- chr21probes myannotation <- new("CpGannotated", ranges=annotated) dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2)
library(AnnotationHub) library(GenomicRanges) ah <- AnnotationHub() EPICv2manifest <- ah[["AH116484"]] chr21probes <- rownames(EPICv2manifest)[EPICv2manifest$CHR=="chr21"] coords <- EPICv2manifest[chr21probes, "MAPINFO"] stats <- rt(length(chr21probes), 2) pvals <- pt(-abs(stats), 100) fdrs <- p.adjust(2*pvals, "BH") annotated <- GRanges(rep("chr21", length(stats)), IRanges(coords, coords), stat = stats, diff = 0, rawpval = pvals, ind.fdr = fdrs, is.sig = fdrs < 0.05) names(annotated) <- chr21probes myannotation <- new("CpGannotated", ranges=annotated) dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2)
An S4 class that stores DMR information as output from dmrcate
.
This class has eight slots, summarising DMR information to be passed to extractRanges
:
coord
:DMR coordinates in UCSC style.
no.cpgs
:Number of constituent CpG sites of DMR.
min_smoothed_fdr
:Minimum FDR of the smoothed estimate.
Stouffer
:Stouffer summary transform of the individual CpG p-values (FDR-corrected).
HMFDR
:Harmonic mean of the individual CpG p-values (FDR-corrected).
Fisher
:Fisher combined probability transform of the individual CpG p-values (FDR corrected).
maxdiff
:Maximum differential/coefficient within the DMR (logit2-space for sequencing mode).
meandiff
:Mean differential/coefficient across the DMR (logit2-space for sequencing mode).
DMResults
objects have a show
method describing the number of DMRs called.
Tim Peters <[email protected]>
dmrcate
output.
Takes a DMResults object and produces the corresponding GRanges object.
extractRanges(dmrcoutput, genome = c("hg19", "hg38", "mm10"))
extractRanges(dmrcoutput, genome = c("hg19", "hg38", "mm10"))
dmrcoutput |
A DMResults object. |
genome |
Reference genome for annotating DMRs with promoter overlaps.
Can be one of |
A GRanges object.
Tim Triche Jr. <[email protected]>, Tim Peters <[email protected]>
library(AnnotationHub) library(GenomicRanges) ah <- AnnotationHub() EPICv2manifest <- ah[["AH116484"]] chr21probes <- rownames(EPICv2manifest)[EPICv2manifest$CHR=="chr21"] coords <- EPICv2manifest[chr21probes, "MAPINFO"] stats <- rt(length(chr21probes), 2) pvals <- pt(-abs(stats), 100) fdrs <- p.adjust(2*pvals, "BH") annotated <- GRanges(rep("chr21", length(stats)), IRanges(coords, coords), stat = stats, rawpval = pvals, diff = 0, ind.fdr = fdrs, is.sig = fdrs < 0.05) names(annotated) <- chr21probes myannotation <- new("CpGannotated", ranges=annotated) dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2) extractRanges(dmrcoutput, genome = "hg38")
library(AnnotationHub) library(GenomicRanges) ah <- AnnotationHub() EPICv2manifest <- ah[["AH116484"]] chr21probes <- rownames(EPICv2manifest)[EPICv2manifest$CHR=="chr21"] coords <- EPICv2manifest[chr21probes, "MAPINFO"] stats <- rt(length(chr21probes), 2) pvals <- pt(-abs(stats), 100) fdrs <- p.adjust(2*pvals, "BH") annotated <- GRanges(rep("chr21", length(stats)), IRanges(coords, coords), stat = stats, rawpval = pvals, diff = 0, ind.fdr = fdrs, is.sig = fdrs < 0.05) names(annotated) <- chr21probes myannotation <- new("CpGannotated", ranges=annotated) dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2) extractRanges(dmrcoutput, genome = "hg38")
Retrieves the matrix of per-CpG beta values matching the annotated loci. For EPICv2 data, these betas have been (optionally) remapped to more likely target, and collapsed to one value per CpG. See cpg.annotate
for details on EPICv2 replicate filtering.
getCollapsedBetas(annot, ranges=NULL)
getCollapsedBetas(annot, ranges=NULL)
annot |
A |
ranges |
A GRanges object, over which the desired beta values are subsetted. |
Only applicable to CpGannotated
objects that have been created by cpg.annotate
; for those created by sequencing.annotate
please use bsseq::getMeth
.
A matrix of beta values, with individual CpG coordinates as rownames.
Tim Peters <[email protected]>
library(AnnotationHub) ah <- AnnotationHub() EPICv2manifest <- ah[["AH116484"]] object <- minfi::logit2(matrix(rbeta(10000, 3, 1), 1000, 10)) rownames(object) <- sample(rownames(EPICv2manifest), 1000) type <- rep(c("Ctrl", "Treat"), each=5) design <- model.matrix(~type) myannotation <- cpg.annotate("array", object, what = "M", arraytype = "EPICv2", analysis.type="differential", design=design, coef=2) getCollapsedBetas(myannotation)
library(AnnotationHub) ah <- AnnotationHub() EPICv2manifest <- ah[["AH116484"]] object <- minfi::logit2(matrix(rbeta(10000, 3, 1), 1000, 10)) rownames(object) <- sample(rownames(EPICv2manifest), 1000) type <- rep(c("Ctrl", "Treat"), each=5) design <- model.matrix(~type) myannotation <- cpg.annotate("array", object, what = "M", arraytype = "EPICv2", analysis.type="differential", design=design, coef=2) getCollapsedBetas(myannotation)
Filters a matrix of M-values (or beta values) by distance to SNP/variant. Also (optionally) removes cross-hybridising probes and sex-chromosome probes. Also removes “rs” and “nv” probes from the matrix.
rmSNPandCH(object, dist = 2, mafcut = 0.05, and = TRUE, rmcrosshyb = TRUE, rmXY = FALSE)
rmSNPandCH(object, dist = 2, mafcut = 0.05, and = TRUE, rmcrosshyb = TRUE, rmXY = FALSE)
object |
A matrix of M-values or beta values, with unique Illumina probe IDs as rownames. |
dist |
Maximum distance (from CpG to SNP/variant) of probes to be filtered out. See details for when Illumina occasionally lists a CpG-to-SNP distance as being < 0. |
mafcut |
Minimum minor allele frequency of probes to be filtered out. |
and |
If |
rmcrosshyb |
If |
rmXY |
If |
Probes in -1:dist
will be filtered out for any integer
specification of dist
. When a probe is listed as being “-1”
nucleotides from a SNP, that SNP is immediately adjacent to the end of
the probe, and is likely to
confound the measurement, in addition to those listed as 0, 1 or 2
nucleotides away. See vignette for further details.
A matrix, attenuated from object
, with rows corresponding to
probes matching user input filtered out.
Tim Peters <[email protected]>
Pidsley R, Zotenko E, Peters TJ, Lawrence MG, Risbridger GP, Molloy P, Van Dijk S, Muhlhausler B, Stirzaker C, Clark SJ. Critical evaluation of the Illumina MethylationEPIC BeadChip microarray for whole-genome DNA methylation profiling. Genome Biology. 2016 17(1), 208.
Chen YA, Lemire M, Choufani S, Butcher DT, Grafodatskaya D, Zanke BW, Gallinger S, Hudson TJ, Weksberg R. Discovery of cross-reactive probes and polymorphic CpGs in the Illumina Infinium HumanMethylation450 microarray. Epigenetics. 2013 Jan 11;8(2).
Peters, T.J., Meyer, B., Ryan, L., Achinger-Kawecka, J., Song, J., Campbell, E.M., Qu, W., Nair, S., Loi-Luu, P., Stricker, P., Lim, E., Stirzaker, C., Clark, S.J. and Pidsley, R. (2024). Characterisation and reproducibility of the HumanMethylationEPIC v2.0 BeadChip for DNA methylation profiling. BMC Genomics 25, 251. doi:10.1186/s12864-024-10027-5.
library(ExperimentHub) eh <- ExperimentHub() ALLbetas <- eh[["EH9451"]] ALLbetas <- ALLbetas[1:1000,] ALLMs <- minfi::logit2(ALLbetas) ALLMs.noSNPs <- rmSNPandCH(ALLMs, rmcrosshyb = FALSE)
library(ExperimentHub) eh <- ExperimentHub() ALLbetas <- eh[["EH9451"]] ALLbetas <- ALLbetas[1:1000,] ALLMs <- minfi::logit2(ALLbetas) ALLMs.noSNPs <- rmSNPandCH(ALLMs, rmcrosshyb = FALSE)
Either:
- Annotate a BSseq object with chromosome position and test statistic, or
- Parse output from DSS::DMLtest()
or DSS::DMLtest.multiFactor()
into a CpGannotated object.
sequencing.annotate(obj, methdesign, all.cov=FALSE, contrasts = FALSE, cont.matrix = NULL, fdr = 0.05, coef, ...)
sequencing.annotate(obj, methdesign, all.cov=FALSE, contrasts = FALSE, cont.matrix = NULL, fdr = 0.05, coef, ...)
obj |
A BSseq object or data.frame output from |
methdesign |
Methylation study design matrix describing samples and groups. Use of
edgeR::modelMatrixMeth() to make this matrix is highly recommended, since it
transforms a regular model.matrix (as one would construct for a microarray or
RNA-Seq experiment) into a “two-channel” matrix representing methylated and
unmethylated reads for each sample. Only applicable when |
all.cov |
If |
contrasts |
Logical denoting whether a |
cont.matrix |
|
fdr |
FDR cutoff (Benjamini-Hochberg) for which CpG sites are individually called
as significant. Used to index default thresholding in dmrcate(). Highly
recommended as the primary thresholding parameter for calling DMRs.
Only applicable when |
coef |
The column index in |
... |
Extra arguments passed to the |
Tim J. Peters <[email protected]>
Peters, T. J., Buckley, M.J., Chen, Y., Smyth, G.K., Goodnow, C. C. and Clark, S. J. (2021). Calling differentially methylated regions from whole genome bisulphite sequencing with DMRcate. Nucleic Acids Research, 49(19), e109.
Ritchie, M. E., Phipson, B., Wu, D., Hu, Y., Law, C. W., Shi, W., & Smyth, G. K. (2015). limma powers differential expression analyses for RNA-sequencing and microarray studies. Nucleic Acids Research, 43(7), e47.
library(ExperimentHub) library(SummarizedExperiment) library(bsseq) library(GenomeInfoDb) eh = ExperimentHub() bis_1072 <- eh[["EH1072"]] pData(bis_1072) <- data.frame(replicate=gsub(".*-", "", colnames(bis_1072)), tissue=substr(colnames(bis_1072), 1, nchar(colnames(bis_1072))-3), row.names=colnames(bis_1072)) colData(bis_1072)$tissue <- gsub("-", "_", colData(bis_1072)$tissue) bis_1072 <- renameSeqlevels(bis_1072, mapSeqlevels(seqlevels(bis_1072), "UCSC")) bis_1072 <- bis_1072[seqnames(bis_1072)=="chr19",] bis_1072 <- bis_1072[240201:240300,] tissue <- factor(pData(bis_1072)$tissue) tissue <- relevel(tissue, "Liver_Treg") design <- model.matrix(~tissue) colnames(design) <- gsub("tissue", "", colnames(design)) colnames(design)[1] <- "Intercept" rownames(design) <- colnames(bis_1072) methdesign <- edgeR::modelMatrixMeth(design) cont.mat <- limma::makeContrasts(treg_vs_tcon=Lymph_N_Treg-Lymph_N_Tcon, fat_vs_ln=Fat_Treg-Lymph_N_Treg, skin_vs_ln=Skin_Treg-Lymph_N_Treg, fat_vs_skin=Fat_Treg-Skin_Treg, levels=methdesign) seq_annot <- sequencing.annotate(bis_1072, methdesign, all.cov = TRUE, contrasts = TRUE, cont.matrix = cont.mat, coef = "treg_vs_tcon", fdr=0.05)
library(ExperimentHub) library(SummarizedExperiment) library(bsseq) library(GenomeInfoDb) eh = ExperimentHub() bis_1072 <- eh[["EH1072"]] pData(bis_1072) <- data.frame(replicate=gsub(".*-", "", colnames(bis_1072)), tissue=substr(colnames(bis_1072), 1, nchar(colnames(bis_1072))-3), row.names=colnames(bis_1072)) colData(bis_1072)$tissue <- gsub("-", "_", colData(bis_1072)$tissue) bis_1072 <- renameSeqlevels(bis_1072, mapSeqlevels(seqlevels(bis_1072), "UCSC")) bis_1072 <- bis_1072[seqnames(bis_1072)=="chr19",] bis_1072 <- bis_1072[240201:240300,] tissue <- factor(pData(bis_1072)$tissue) tissue <- relevel(tissue, "Liver_Treg") design <- model.matrix(~tissue) colnames(design) <- gsub("tissue", "", colnames(design)) colnames(design)[1] <- "Intercept" rownames(design) <- colnames(bis_1072) methdesign <- edgeR::modelMatrixMeth(design) cont.mat <- limma::makeContrasts(treg_vs_tcon=Lymph_N_Treg-Lymph_N_Tcon, fat_vs_ln=Fat_Treg-Lymph_N_Treg, skin_vs_ln=Skin_Treg-Lymph_N_Treg, fat_vs_skin=Fat_Treg-Skin_Treg, levels=methdesign) seq_annot <- sequencing.annotate(bis_1072, methdesign, all.cov = TRUE, contrasts = TRUE, cont.matrix = cont.mat, coef = "treg_vs_tcon", fdr=0.05)