| Title: | dnaEPICO: Analysis Pipeline for Illumina DNA Methylation Array Data |
|---|---|
| Description: | A modular and reproducible workflow for preprocessing and analysing Illumina DNA methylation array data from the EPICv2, EPIC, and 450K platforms. The package integrates quality control, probe filtering, cell-type deconvolution, phenotype preparation, generalised linear models, linear mixed-effects models, and automated report generation. It builds on established Bioconductor infrastructure and wraps commonly used tools including 'minfi', 'ENmix', and 'wateRmelon', with support for both local execution and high-performance computing workflows. |
| Authors: | Paul Ruiz [aut, cre] (ORCID: <https://orcid.org/0009-0007-6714-3566>), Divya Mehta [aut] (ORCID: <https://orcid.org/0000-0001-7971-7255>) |
| Maintainer: | Paul Ruiz <[email protected]> |
| License: | AGPL-3 + file LICENSE |
| Version: | 0.99.22 |
| Built: | 2026-05-30 09:37:40 UTC |
| Source: | https://github.com/bioc/dnaEPICO |
Fit linear models for each surrogate variable against Sentrix chip and
Sentrix position, perform backward elimination with MASS::dropterm(), and
return the in-memory analysis objects.
analyzeSvaEnmix( sva, RGSet, SentrixIDColumn = "Sentrix_ID", SentrixPositionColumn = "Sentrix_Position", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_analyzeSvaEnmix.txt" )analyzeSvaEnmix( sva, RGSet, SentrixIDColumn = "Sentrix_ID", SentrixPositionColumn = "Sentrix_Position", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_analyzeSvaEnmix.txt" )
sva |
Numeric matrix of surrogate variables with samples in rows. |
RGSet |
An |
SentrixIDColumn |
Character. Name of the chip identifier column in
|
SentrixPositionColumn |
Character. Name of the chip position column in
|
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_svaEnmix_analysis" containing the
aligned Sentrix factors, full and reduced linear models, and ANOVA tables.
ex <- dnaEPICO:::exampleSvaAnalysisStateDnaEpico() analysis_data <- analyzeSvaEnmix( sva = ex$sva, RGSet = ex$RGSet, SentrixIDColumn = "Sentrix_ID", SentrixPositionColumn = "Sentrix_Position", verbose = FALSE, logs = FALSE ) analysis_data$Kex <- dnaEPICO:::exampleSvaAnalysisStateDnaEpico() analysis_data <- analyzeSvaEnmix( sva = ex$sva, RGSet = ex$RGSet, SentrixIDColumn = "Sentrix_ID", SentrixPositionColumn = "Sentrix_Position", verbose = FALSE, logs = FALSE ) analysis_data$K
Merge phenotype-specific CpG summary tables with probe annotation metadata and return a single annotated result table.
annotateMethylationGLM_T1Summaries( modelSummaries, annotationObject, annotationCols = c("Name", "chr", "pos", "UCSC_RefGene_Group", "UCSC_RefGene_Name", "Relation_to_Island", "GencodeV41_Group"), verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )annotateMethylationGLM_T1Summaries( modelSummaries, annotationObject, annotationCols = c("Name", "chr", "pos", "UCSC_RefGene_Group", "UCSC_RefGene_Name", "Relation_to_Island", "GencodeV41_Group"), verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )
modelSummaries |
Object returned by |
annotationObject |
Character package/object name, annotation data frame,
or annotation object understood by |
annotationCols |
Character vector or comma-separated string of annotation columns to append. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLM_T1_annotation"
containing the annotated summary table and any requested annotation columns
that were unavailable in the chosen annotation object.
ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() annotation_data <- annotateMethylationGLM_T1Summaries( modelSummaries = ex$modelSummaries, annotationObject = ex$annotationData, annotationCols = "Name,chr,pos", verbose = FALSE, logs = FALSE ) names(annotation_data)ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() annotation_data <- annotateMethylationGLM_T1Summaries( modelSummaries = ex$modelSummaries, annotationObject = ex$annotationData, annotationCols = "Name,chr,pos", verbose = FALSE, logs = FALSE ) names(annotation_data)
Merge phenotype-specific longitudinal summary tables with probe annotation metadata and return a single annotated result table.
annotateMethylationGLMM_T1T2Summaries( modelSummaries, annotationObject, annotationCols = c("Name", "chr", "pos", "UCSC_RefGene_Group", "UCSC_RefGene_Name", "Relation_to_Island", "GencodeV41_Group"), verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )annotateMethylationGLMM_T1T2Summaries( modelSummaries, annotationObject, annotationCols = c("Name", "chr", "pos", "UCSC_RefGene_Group", "UCSC_RefGene_Name", "Relation_to_Island", "GencodeV41_Group"), verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )
modelSummaries |
Object returned by
|
annotationObject |
Character package/object name, annotation data frame,
or annotation object understood by |
annotationCols |
Character vector or comma-separated string of annotation columns to append. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLMM_T1T2_annotation"
containing the annotated summary table and any requested annotation columns
that were unavailable in the chosen annotation object.
ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() annotation_data <- annotateMethylationGLMM_T1T2Summaries( modelSummaries = ex$modelSummaries, annotationObject = ex$annotationData, annotationCols = "Name,chr,pos", verbose = FALSE, logs = FALSE ) names(annotation_data)ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() annotation_data <- annotateMethylationGLMM_T1T2Summaries( modelSummaries = ex$modelSummaries, annotationObject = ex$annotationData, annotationCols = "Name,chr,pos", verbose = FALSE, logs = FALSE ) names(annotation_data)
Compute minfi QC metrics and detection P values, identify failed samples using the requested threshold, and return the assessment as a single object.
assessSamplesMinfiEwasWater( rawData, RGSet, qcCutoff = 10.5, detPtype = "m+u", detPThreshold = 0.05, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_assessSamplesMinfiEwasWater.txt" )assessSamplesMinfiEwasWater( rawData, RGSet, qcCutoff = 10.5, detPtype = "m+u", detPThreshold = 0.05, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_assessSamplesMinfiEwasWater.txt" )
rawData |
Object returned by |
RGSet |
An |
qcCutoff |
Numeric. Cutoff passed to |
detPtype |
Character. Detection P-value mode passed to
|
detPThreshold |
Numeric. Samples with mean detection P value above this threshold are marked as failed. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_minfiEwasWater_assessment" containing
the QC object, detection P matrix, mean detection P values, and failed
sample identifiers.
ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() raw_data <- buildRawMinfiEwasWater(ex$RGSet, verbose = FALSE, logs = FALSE) assessment <- assessSamplesMinfiEwasWater( rawData = raw_data, RGSet = ex$RGSet, detPThreshold = 1, verbose = FALSE, logs = FALSE ) names(assessment)ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() raw_data <- buildRawMinfiEwasWater(ex$RGSet, verbose = FALSE, logs = FALSE) assessment <- assessSamplesMinfiEwasWater( rawData = raw_data, RGSet = ex$RGSet, detPThreshold = 1, verbose = FALSE, logs = FALSE ) names(assessment)
Prepare the beta and phenotype tables commonly exported for Clock Foundation style downstream workflows, without writing them to disk.
buildClockFoundationInputsPreprocessingPheno( beta, pheno, SampleID = "Sample_Name", sexColumn = "Sex", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_buildClockFoundationInputsPreprocessingPheno.txt" )buildClockFoundationInputsPreprocessingPheno( beta, pheno, SampleID = "Sample_Name", sexColumn = "Sex", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_buildClockFoundationInputsPreprocessingPheno.txt" )
beta |
Numeric matrix of beta values with probes in rows and samples in columns. |
pheno |
Phenotype data frame aligned with the beta matrix columns. |
SampleID |
Character. Name of the phenotype sample identifier column. |
sexColumn |
Character. Name of the phenotype sex column. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_preprocessingPheno_clock"
containing betaCSV and phenoCF.
ex <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico() clock_inputs <- buildClockFoundationInputsPreprocessingPheno( beta = ex$timepointData$data[["1"]]$beta, pheno = ex$timepointData$data[["1"]]$pheno, SampleID = "Sample_Name", sexColumn = "Sex", verbose = FALSE, logs = FALSE ) names(clock_inputs)ex <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico() clock_inputs <- buildClockFoundationInputsPreprocessingPheno( beta = ex$timepointData$data[["1"]]$beta, pheno = ex$timepointData$data[["1"]]$pheno, SampleID = "Sample_Name", sexColumn = "Sex", verbose = FALSE, logs = FALSE ) names(clock_inputs)
Create a raw MethylSet, RatioSet, and genome-mapped object from an
RGChannelSet, and return them together in a single structured object.
buildRawMinfiEwasWater( RGSet, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_buildRawMinfiEwasWater.txt" )buildRawMinfiEwasWater( RGSet, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_buildRawMinfiEwasWater.txt" )
RGSet |
An |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_minfiEwasWater_raw" containing MSet,
RatioSet, and GSet.
ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() raw_data <- buildRawMinfiEwasWater( RGSet = ex$RGSet, verbose = FALSE, logs = FALSE ) names(raw_data)ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() raw_data <- buildRawMinfiEwasWater( RGSet = ex$RGSet, verbose = FALSE, logs = FALSE ) names(raw_data)
Collect the raw coefficient tables for CpGs whose phenotype main effect or interaction p-value passes the requested threshold.
collectSignificantCpGsMethylationGLM_T1( modelResults, pvalThreshold = 0.05, interactionTerm = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )collectSignificantCpGsMethylationGLM_T1( modelResults, pvalThreshold = 0.05, interactionTerm = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )
modelResults |
Object returned by |
pvalThreshold |
Numeric. Threshold applied to phenotype main-effect or interaction p-values. |
interactionTerm |
Character or |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLM_T1_significant_cpgs".
ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() significant_cpgs <- collectSignificantCpGsMethylationGLM_T1( modelResults = ex$modelResults, pvalThreshold = 1, verbose = FALSE, logs = FALSE ) names(significant_cpgs)ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() significant_cpgs <- collectSignificantCpGsMethylationGLM_T1( modelResults = ex$modelResults, pvalThreshold = 1, verbose = FALSE, logs = FALSE ) names(significant_cpgs)
Collect raw coefficient tables for CpGs whose phenotype main effect or requested interaction p-value passes the chosen threshold.
collectSignificantInteractionsMethylationGLMM_T1T2( modelResults, pvalThreshold = 0.05, interactionTerm = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )collectSignificantInteractionsMethylationGLMM_T1T2( modelResults, pvalThreshold = 0.05, interactionTerm = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )
modelResults |
Object returned by |
pvalThreshold |
Numeric. Threshold applied to the extracted phenotype or interaction p-values. |
interactionTerm |
Character or |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLMM_T1T2_significant"
containing the retained coefficient tables for each phenotype.
ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() significant_hits <- collectSignificantInteractionsMethylationGLMM_T1T2( modelResults = ex$modelResults, pvalThreshold = 1, verbose = FALSE, logs = FALSE ) names(significant_hits)ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() significant_hits <- collectSignificantInteractionsMethylationGLMM_T1T2( modelResults = ex$modelResults, pvalThreshold = 1, verbose = FALSE, logs = FALSE ) names(significant_hits)
Combine selected timepoints that were already aligned by
splitTimepointsPreprocessingPheno() into the wide phenotype-plus-beta
objects used by downstream longitudinal models.
combineTimepointsPreprocessingPheno( timepointData, combineTimepoints = "1,2", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_combineTimepointsPreprocessingPheno.txt" )combineTimepointsPreprocessingPheno( timepointData, combineTimepoints = "1,2", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_combineTimepointsPreprocessingPheno.txt" )
timepointData |
Object returned by |
combineTimepoints |
Character vector or comma-separated string of timepoints to combine. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_preprocessingPheno_combined"
containing the combined phenotype table, merged phenotype-plus-beta table,
selected timepoints, and output suffix.
ex <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico() combined_data <- combineTimepointsPreprocessingPheno( timepointData = ex$timepointData, combineTimepoints = "1,2", verbose = FALSE, logs = FALSE ) combined_data$suffixex <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico() combined_data <- combineTimepointsPreprocessingPheno( timepointData = ex$timepointData, combineTimepoints = "1,2", verbose = FALSE, logs = FALSE ) combined_data$suffix
The dnaEPICO package provides a structured workflow for preprocessing and
analyzing Illumina DNA methylation array data, including quality control,
normalization, cell-type estimation, surrogate-variable analysis, phenotype
preparation, cross-sectional modeling, longitudinal mixed-effects modeling,
and PDF reporting.
The package supports two complementary usage styles:
interactive use, where functions return structured in-memory result objects for inspection and composition; and
file-based pipeline use, where the same functions can write logs,
plots, tables, and serialized objects when saveOutputs = TRUE.
The main high-level entry points are:
Maintainer: Paul Ruiz [email protected] (ORCID)
Authors:
Divya Mehta (ORCID)
Useful links:
Objects of class "dnaEPICO_dnamReport_prepared" are list-based results
returned by prepareDnamReportInputs(). They capture normalized report paths,
available figures, and logging metadata before rendering.
Requested output file name.
Normalized report output directory.
Normalized full path to the intended report output file.
Normalized directory used by the report template for copied figures.
Named list describing the available figures for each report section.
Character vector of expected figure directories that were not present at preparation time.
Resolved path to the optional log file, or NULL when
logging was disabled.
Objects of class "dnaEPICO_dnamReport_render" are list-based results
returned by renderDnamReport(). They describe whether a prepared report was
rendered, skipped, or failed.
The input object supplied to renderDnamReport().
Render status string such as "rendered", "skipped", or
"failed".
Normalized path to the rendered PDF file when rendering
succeeded, otherwise NULL.
Render error or skip message when available, otherwise
NULL.
Resolved path to the optional log file, or NULL when
logging was disabled.
Objects of class "dnaEPICO_dnamReport" are list-based results returned by
dnamReport(). They combine the prepared report inputs, render result, and
final status metadata into one convenience object.
Object returned by prepareDnamReportInputs().
Structured render metadata created by dnamReport().
Final status string such as "rendered", "skipped", or
"failed".
Path to docs/index.html.
Final render error or skip message when available,
otherwise NULL.
Resolved path to the optional log file, or NULL when
logging was disabled.
Objects of class "dnaEPICO_methylationGLM_T1" are list-based results
returned by methylationGLM_T1(). They collect the prepared analysis table,
fitted models, summaries, diagnostics, annotations, and optional saved files.
Object returned by prepareMethylationGLM_T1Data().
Object returned by
plotMethylationGLM_T1Distributions().
Object returned by fitMethylationGLM_T1Models().
Object returned by
summarizeMethylationGLM_T1Models().
Object returned by
collectSignificantCpGsMethylationGLM_T1().
Object returned by
plotMethylationGLM_T1Diagnostics().
Object returned by
annotateMethylationGLM_T1Summaries().
Object returned by writeMethylationGLM_T1Outputs() when
saveOutputs = TRUE, otherwise NULL.
Objects of class "dnaEPICO_methylationGLMM_T1T2" are list-based results
returned by methylationGLMM_T1T2(). They collect the prepared longitudinal
analysis table, fitted mixed models, summaries, diagnostics, annotations, and
optional saved files.
Object returned by prepareMethylationGLMM_T1T2Data().
Object returned by fitMethylationGLMM_T1T2Models().
Object returned by
summarizeMethylationGLMM_T1T2Models().
Object returned by
collectSignificantInteractionsMethylationGLMM_T1T2().
Object returned by
plotMethylationGLMM_T1T2Diagnostics().
Object returned by
annotateMethylationGLMM_T1T2Summaries().
Object returned by
writeMethylationGLMM_T1T2Outputs() when saveOutputs = TRUE, otherwise
NULL.
Objects of class "dnaEPICO_preprocessingMinfiEwasWater" are list-based
results returned by preprocessingMinfiEwasWater(). They are lightweight
S3-style containers rather than formal S4 classes.
Filtered phenotype table aligned to the retained samples.
Filtered RGChannelSet used in downstream preprocessing.
Object returned by buildRawMinfiEwasWater().
Object returned by assessSamplesMinfiEwasWater().
Object returned by predictSexMinfiEwasWater().
Object returned by normalizeMinfiEwasWater().
Object returned by filterProbesMinfiEwasWater().
Object returned by extractMetricsMinfiEwasWater().
Object returned by estimateLCMinfiEwasWater().
Resolved path to the optional log file, or NULL when
logging was disabled.
Objects of class "dnaEPICO_preprocessingPheno" are list-based results
returned by preprocessingPheno(). They describe the phenotype data,
methylation matrices, timepoint splits, longitudinal merges, and optional
exported files.
Phenotype table read from phenoFile.
Object returned by loadMetricsPreprocessingPheno().
Object returned by splitTimepointsPreprocessingPheno().
Object returned by combineTimepointsPreprocessingPheno().
Object returned by
buildClockFoundationInputsPreprocessingPheno().
Object returned by writePreprocessingPhenoOutputs()
when saveOutputs = TRUE, otherwise NULL.
Resolved path to the optional log file, or NULL when
logging was disabled.
Objects of class "dnaEPICO_svaEnmix" are list-based results returned by
svaEnmix(). They collect the loaded inputs, surrogate-variable results,
association-analysis summaries, and optional file outputs.
Phenotype table read from phenoFile after any optional row
subsetting.
Loaded RGChannelSet with sample names reset to match
targets.
Object returned by estimateSvaEnmixControls().
Phenotype table returned by mergeSvaTargetsEnmix()
after surrogate variables were appended.
Object returned by analyzeSvaEnmix().
Named list of TIFF output paths for the SVA figures when
saveOutputs = TRUE, otherwise NULL entries for plots not written.
Object returned by writeSvaEnmixOutputs() when
saveOutputs = TRUE, otherwise NULL.
Resolved path to the optional log file, or NULL when
logging was disabled.
Generate a DNA methylation dashboard report
dnamReport( outputDir = "reports", phenoTab = NULL, enmixTab = file.path("figures", "preprocessingMinfiEwasWater", "enmix"), qcTab = file.path("figures", "preprocessingMinfiEwasWater", "qc"), svaTab = file.path("figures", "svaEnmix"), metricTab = file.path("figures", "preprocessingMinfiEwasWater", "metrics"), glmTab = NULL, lmerTab = NULL, logTab = outputDir, verbose = FALSE, logs = FALSE, projectName = "dnaEPICO", detPPath = NULL, detPThreshold = 0.01, cpgDetectionPath = NULL, sampleDetectionPath = NULL, logoPath = system.file("extdata", "dnaEPICO.svg", package = "dnaEPICO"), imagePattern = "\\.(png|jpg|jpeg|gif|webp|svg|tif|tiff)$", recursive = TRUE )dnamReport( outputDir = "reports", phenoTab = NULL, enmixTab = file.path("figures", "preprocessingMinfiEwasWater", "enmix"), qcTab = file.path("figures", "preprocessingMinfiEwasWater", "qc"), svaTab = file.path("figures", "svaEnmix"), metricTab = file.path("figures", "preprocessingMinfiEwasWater", "metrics"), glmTab = NULL, lmerTab = NULL, logTab = outputDir, verbose = FALSE, logs = FALSE, projectName = "dnaEPICO", detPPath = NULL, detPThreshold = 0.01, cpgDetectionPath = NULL, sampleDetectionPath = NULL, logoPath = system.file("extdata", "dnaEPICO.svg", package = "dnaEPICO"), imagePattern = "\\.(png|jpg|jpeg|gif|webp|svg|tif|tiff)$", recursive = TRUE )
outputDir |
Character. Directory where the Quarto project is written. |
phenoTab |
Character or |
enmixTab |
Character. Directory containing ENmix quality-control figures. |
qcTab |
Character. Directory containing Quality Control figures. |
svaTab |
Character. Directory containing Batch Effect or SVA figures. |
metricTab |
Character. Directory containing Metrics figures. |
glmTab |
Character or |
lmerTab |
Character or |
logTab |
Character. Directory containing workflow logs shown in the Logs tab. |
verbose |
Logical. If |
logs |
Logical. If |
projectName |
Character. Name used for the generated Quarto project. |
detPPath |
Character or |
detPThreshold |
Numeric. Detection P-value threshold used when
summarising the |
cpgDetectionPath |
Character or |
sampleDetectionPath |
Character or |
logoPath |
Character. Path to the navbar logo. Defaults to the packaged
|
imagePattern |
Character. Regular expression used to identify image files inside the section directories. |
recursive |
Logical. If |
A list with class "dnaEPICO_dnamReport".
report_root <- file.path(tempdir(), "dnaepico-dnam-report") pheno_file <- file.path( report_root, "data", "model1", "preprocessingMinfiEwasWater", "phenoLC.csv" ) dir.create(dirname(pheno_file), recursive = TRUE, showWarnings = FALSE) utils::write.csv( data.frame( UID = c("sample1", "sample2"), Timepoint = c(1, 2), Sex = c("F", "M") ), pheno_file, row.names = FALSE ) result <- dnamReport( outputDir = file.path(report_root, "reports", "model1"), phenoTab = pheno_file, enmixTab = file.path( report_root, "figures", "model1", "preprocessingMinfiEwasWater", "enmix" ), qcTab = file.path( report_root, "figures", "model1", "preprocessingMinfiEwasWater", "qc" ), svaTab = file.path(report_root, "figures", "model1", "svaEnmix"), metricTab = file.path( report_root, "figures", "model1", "preprocessingMinfiEwasWater", "metrics" ), logTab = file.path(report_root, "logs", "model1") ) result$statusreport_root <- file.path(tempdir(), "dnaepico-dnam-report") pheno_file <- file.path( report_root, "data", "model1", "preprocessingMinfiEwasWater", "phenoLC.csv" ) dir.create(dirname(pheno_file), recursive = TRUE, showWarnings = FALSE) utils::write.csv( data.frame( UID = c("sample1", "sample2"), Timepoint = c(1, 2), Sex = c("F", "M") ), pheno_file, row.names = FALSE ) result <- dnamReport( outputDir = file.path(report_root, "reports", "model1"), phenoTab = pheno_file, enmixTab = file.path( report_root, "figures", "model1", "preprocessingMinfiEwasWater", "enmix" ), qcTab = file.path( report_root, "figures", "model1", "preprocessingMinfiEwasWater", "qc" ), svaTab = file.path(report_root, "figures", "model1", "svaEnmix"), metricTab = file.path( report_root, "figures", "model1", "preprocessingMinfiEwasWater", "metrics" ), logTab = file.path(report_root, "logs", "model1") ) result$status
Estimate cell-type proportions with the saliva reference panels bundled in
dnaEPICO. This function keeps the original estimateLC() interface used by
the package while using the internal reference files distributed in
inst/extdata.
estimateLC(meth, ref, constrained = FALSE)estimateLC(meth, ref, constrained = FALSE)
meth |
Numeric matrix of beta values with CpGs in rows and samples in columns. Row names must contain probe identifiers compatible with the selected reference. |
ref |
Character. Reference panel name. Supported values are |
constrained |
Logical. If |
A data.table with one row per sample and one column per estimated
cell type.
Murat K, et al. Ewastools: Infinium Human Methylation BeadChip pipeline for population epigenetics integrated into Galaxy. GigaScience. 2020;9(5):giaa049. Houseman EA, Accomando WP, Koestler DC, et al. DNA methylation arrays as surrogate measures of cell mixture distribution. BMC Bioinformatics. 2012;13:86. Reinius LE, Acevedo N, Joerink M, et al. Differential DNA methylation in purified human blood cells: implications for cell lineage and studies on disease susceptibility. PLoS One. 2012;7(7):e41361. Bakulski KM, Feinberg JI, Andrews SV, et al. DNA methylation of cord blood cell types: applications for mixed cell birth studies. Epigenetics. 2016;11(5):354-362. de Goede OM, Razzaghian HR, Price EM, et al. Nucleated red blood cells impact DNA methylation and expression analyses of cord blood hematopoietic cells. Clinical Epigenetics. 2015;7:95. Gervin K, Salas LA, Bakulski KM, et al. Cell type specific DNA methylation in cord blood: a 450K reference data set and cell count-based validation of estimated cell type composition. Epigenetics. 2016;11(9):690-698. Gervin K, Salas LA, Bakulski KM, et al. Systematic evaluation and validation of reference and library selection methods for deconvolution of cord blood DNA methylation data. bioRxiv. 2019. doi:10.1101/570457. Salas LA, Koestler DC, Butler RA, et al. An optimized library for reference-based deconvolution of whole-blood biospecimens assayed using the Illumina HumanMethylationEPIC BeadArray. Genome Biology. 2018;19:64. Heiss JA, Just AC, Brenner H. Training a model for estimating leukocyte composition using whole-blood DNA methylation and cell counts as reference. Epigenomics. 2017;9(1):13-20. Middleton LYM, Dou J, Mill J, et al. Saliva cell type DNA methylation reference panel for epidemiology studies in children. 2020.
ref_file <- system.file("extdata", "saliva.txt", package = "dnaEPICO") ref_panel <- as.matrix(utils::read.table(ref_file)) meth <- ref_panel[1:20, , drop = FALSE] colnames(meth) <- c("sample1", "sample2") estimateLC( meth = meth, ref = "saliva", constrained = FALSE )ref_file <- system.file("extdata", "saliva.txt", package = "dnaEPICO") ref_panel <- as.matrix(utils::read.table(ref_file)) meth <- ref_panel[1:20, , drop = FALSE] colnames(meth) <- c("sample1", "sample2") estimateLC( meth = meth, ref = "saliva", constrained = FALSE )
Estimate cell proportions from beta values using estimateLC() for saliva
reference panels or ENmix::estimateCellProp() for other supported
references, then merge the estimates into the phenotype table.
estimateLCMinfiEwasWater( beta, targets, lcRef = "salivaEPIC", phenoOrder = "Sample_Name;Timepoint;Sex;PredSex;Basename;Sentrix_ID;Sentrix_Position", constrained = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_estimateLCMinfiEwasWater.txt" )estimateLCMinfiEwasWater( beta, targets, lcRef = "salivaEPIC", phenoOrder = "Sample_Name;Timepoint;Sex;PredSex;Basename;Sentrix_ID;Sentrix_Position", constrained = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_estimateLCMinfiEwasWater.txt" )
beta |
Numeric matrix of beta values with probes in rows and samples in columns. |
targets |
Phenotype data frame aligned with the columns of |
lcRef |
Character. Cell-composition reference. Internal saliva-based
references supported through |
phenoOrder |
Character vector or semicolon-separated string describing
the phenotype columns that should appear first in the merged |
constrained |
Logical. Passed to |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_minfiEwasWater_lc" containing the cell
proportion matrix, merged phenotype table, reference name, and method used.
ref_file <- system.file("extdata", "saliva.txt", package = "dnaEPICO") beta <- as.matrix(utils::read.table(ref_file))[1:20, , drop = FALSE] colnames(beta) <- c("sample1", "sample2") targets <- data.frame( Sample_Name = colnames(beta), Timepoint = c("T1", "T2"), stringsAsFactors = FALSE ) lc_data <- estimateLCMinfiEwasWater( beta = beta, targets = targets, lcRef = "saliva", phenoOrder = "Sample_Name;Timepoint" ) stopifnot(is.data.frame(lc_data$phenoLC))ref_file <- system.file("extdata", "saliva.txt", package = "dnaEPICO") beta <- as.matrix(utils::read.table(ref_file))[1:20, , drop = FALSE] colnames(beta) <- c("sample1", "sample2") targets <- data.frame( Sample_Name = colnames(beta), Timepoint = c("T1", "T2"), stringsAsFactors = FALSE ) lc_data <- estimateLCMinfiEwasWater( beta = beta, targets = targets, lcRef = "saliva", phenoOrder = "Sample_Name;Timepoint" ) stopifnot(is.data.frame(lc_data$phenoLC))
Run ENmix::ctrlsva() on an RGChannelSet and return the surrogate variable
matrix as an in-memory object.
estimateSvaEnmixControls( RGSet, ctrlSvaPercVar = 0.9, ctrlSvaFlag = 1, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_estimateSvaEnmixControls.txt" )estimateSvaEnmixControls( RGSet, ctrlSvaPercVar = 0.9, ctrlSvaFlag = 1, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_estimateSvaEnmixControls.txt" )
RGSet |
An |
ctrlSvaPercVar |
Numeric. Proportion of variance explained by control
probes, passed to |
ctrlSvaFlag |
Integer. Control-probe flag passed to
|
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_svaEnmix_sva" containing the surrogate
variable matrix and the parameters used to estimate it.
ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() sva_data <- estimateSvaEnmixControls( RGSet = ex$RGSet, ctrlSvaPercVar = 0.5, ctrlSvaFlag = 1, verbose = FALSE, logs = FALSE ) sva_data$Kex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() sva_data <- estimateSvaEnmixControls( RGSet = ex$RGSet, ctrlSvaPercVar = 0.5, ctrlSvaFlag = 1, verbose = FALSE, logs = FALSE ) sva_data$K
Copies the example Makefile pipeline shipped with dnaEPICO to a user-specified directory for local execution or modification.
extractMake(destDir, overwrite = FALSE)extractMake(destDir, overwrite = FALSE)
destDir |
Character. Destination directory where the Makefile will be copied. |
overwrite |
Logical. Whether to overwrite an existing |
Character scalar containing the path to the copied Makefile.
tmp <- file.path(tempdir(), "dnaEPICO-make-example") dir.create(tmp, recursive = TRUE, showWarnings = FALSE) makefile_path <- extractMake( destDir = tmp, overwrite = TRUE ) stopifnot(file.exists(makefile_path))tmp <- file.path(tempdir(), "dnaEPICO-make-example") dir.create(tmp, recursive = TRUE, showWarnings = FALSE) makefile_path <- extractMake( destDir = tmp, overwrite = TRUE ) stopifnot(file.exists(makefile_path))
Extract beta, M, and copy-number matrices from a filtered object
extractMetricsMinfiEwasWater( filteredData, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_extractMetricsMinfiEwasWater.txt" )extractMetricsMinfiEwasWater( filteredData, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_extractMetricsMinfiEwasWater.txt" )
filteredData |
Object returned by |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_minfiEwasWater_metrics" containing
beta, m, and cn.
ex <- dnaEPICO:::exampleMinfiMetricsStateDnaEpico() metrics_data <- extractMetricsMinfiEwasWater( filteredData = ex$filteredData, verbose = FALSE, logs = FALSE ) names(metrics_data)ex <- dnaEPICO:::exampleMinfiMetricsStateDnaEpico() metrics_data <- extractMetricsMinfiEwasWater( filteredData = ex$filteredData, verbose = FALSE, logs = FALSE ) names(metrics_data)
Apply detection P-value, chromosome, SNP, and cross-reactive probe filters to the primary normalized object and return the filtered result.
filterProbesMinfiEwasWater( normData, RGSet, pvalThreshold = 0.01, chrToRemove = "chrX,chrY", snpsToRemove = "SBE,CpG", mafThreshold = 0.1, crossReactivePath, detPtype = "m+u", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_filterProbesMinfiEwasWater.txt" )filterProbesMinfiEwasWater( normData, RGSet, pvalThreshold = 0.01, chrToRemove = "chrX,chrY", snpsToRemove = "SBE,CpG", mafThreshold = 0.1, crossReactivePath, detPtype = "m+u", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_filterProbesMinfiEwasWater.txt" )
normData |
Object returned by |
RGSet |
Filtered |
pvalThreshold |
Numeric. Probes must have detection P values below this threshold in all samples to be retained. |
chrToRemove |
Character vector or comma-separated string of chromosome
names to remove, for example |
snpsToRemove |
Character vector or comma-separated string of SNP probe
types to remove, for example |
mafThreshold |
Numeric. Minor allele frequency threshold passed to
|
crossReactivePath |
Character. Path to a CSV file containing a |
detPtype |
Character. Detection P-value mode passed to
|
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_minfiEwasWater_filter" containing the
filtered object and counts for each filtering stage.
ex <- dnaEPICO:::exampleMinfiWorkflowStateDnaEpico() filtered_data <- filterProbesMinfiEwasWater( normData = ex$normData, RGSet = ex$sampleData$RGSet, pvalThreshold = 1, chrToRemove = "chrY", snpsToRemove = "SBE", mafThreshold = 1, crossReactivePath = ex$crossReactivePath, detPtype = "m+u", verbose = FALSE, logs = FALSE ) filtered_data$counts[["crossReactive"]]ex <- dnaEPICO:::exampleMinfiWorkflowStateDnaEpico() filtered_data <- filterProbesMinfiEwasWater( normData = ex$normData, RGSet = ex$sampleData$RGSet, pvalThreshold = 1, chrToRemove = "chrY", snpsToRemove = "SBE", mafThreshold = 1, crossReactivePath = ex$crossReactivePath, detPtype = "m+u", verbose = FALSE, logs = FALSE ) filtered_data$counts[["crossReactive"]]
Remove failed samples identified during sample assessment and return the
filtered RGChannelSet together with the aligned phenotype table.
filterSamplesMinfiEwasWater( RGSet, targets, failedSamples = character(0), SampleID = "Sample_Name", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_filterSamplesMinfiEwasWater.txt" )filterSamplesMinfiEwasWater( RGSet, targets, failedSamples = character(0), SampleID = "Sample_Name", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_filterSamplesMinfiEwasWater.txt" )
RGSet |
An |
targets |
Data frame containing phenotype information. |
failedSamples |
Character vector of sample identifiers to remove. |
SampleID |
Character. Name of the sample identifier column in |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_minfiEwasWater_samples" containing the
filtered RGSet, aligned phenotype table, and failed sample identifiers.
ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() filtered_samples <- filterSamplesMinfiEwasWater( RGSet = ex$RGSet, targets = ex$targets, failedSamples = ex$targets$Sample_Name[1], SampleID = "Sample_Name", verbose = FALSE, logs = FALSE ) nrow(filtered_samples$targets)ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() filtered_samples <- filterSamplesMinfiEwasWater( RGSet = ex$RGSet, targets = ex$targets, failedSamples = ex$targets$Sample_Name[1], SampleID = "Sample_Name", verbose = FALSE, logs = FALSE ) nrow(filtered_samples$targets)
Fit one Gaussian GLM per CpG for each phenotype requested in the object
returned by prepareMethylationGLM_T1Data().
fitMethylationGLM_T1Models( preparedData, nCores = 1L, libPath = NULL, glmLibs = "glm2", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )fitMethylationGLM_T1Models( preparedData, nCores = 1L, libPath = NULL, glmLibs = "glm2", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )
preparedData |
Object returned by |
nCores |
Integer. Number of worker processes to use. |
libPath |
Character vector or |
glmLibs |
Character vector or comma-separated string of package names to
check on worker processes. The default is |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLM_T1_models" containing
fitted model lists, model formulas, and counts of failed CpG fits.
ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() model_results <- fitMethylationGLM_T1Models( preparedData = ex$preparedData, nCores = 1, verbose = FALSE, logs = FALSE ) names(model_results$fits)ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() model_results <- fitMethylationGLM_T1Models( preparedData = ex$preparedData, nCores = 1, verbose = FALSE, logs = FALSE ) names(model_results$fits)
Fit one linear mixed-effects model per CpG for each phenotype requested in the
object returned by prepareMethylationGLMM_T1T2Data().
fitMethylationGLMM_T1T2Models( preparedData, nCores = 1L, libPath = NULL, lmeLibs = "lme4,lmerTest", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )fitMethylationGLMM_T1T2Models( preparedData, nCores = 1L, libPath = NULL, lmeLibs = "lme4,lmerTest", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )
preparedData |
Object returned by |
nCores |
Integer. Number of worker processes to use. |
libPath |
Character vector or |
lmeLibs |
Character vector or comma-separated string of package names to
check on worker processes. The default is |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLMM_T1T2_models"
containing fitted model lists, model formulas, and counts of failed CpG
fits.
ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() model_results <- fitMethylationGLMM_T1T2Models( preparedData = ex$preparedData, nCores = 1, verbose = FALSE, logs = FALSE ) names(model_results$fits)ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() model_results <- fitMethylationGLMM_T1T2Models( preparedData = ex$preparedData, nCores = 1, verbose = FALSE, logs = FALSE ) names(model_results$fits)
Load the metric matrices generated by preprocessingMinfiEwasWater() and
return them as a single in-memory object for downstream phenotype alignment.
loadMetricsPreprocessingPheno( betaPath, mPath, cnPath, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_loadMetricsPreprocessingPheno.txt" )loadMetricsPreprocessingPheno( betaPath, mPath, cnPath, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_loadMetricsPreprocessingPheno.txt" )
betaPath |
Character. Path to the saved beta-value object. |
mPath |
Character. Path to the saved M-value object. |
cnPath |
Character. Path to the saved copy-number object. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_preprocessingPheno_metrics"
containing beta, m, and cn.
ex <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico() metrics_data <- loadMetricsPreprocessingPheno( betaPath = ex$betaPath, mPath = ex$mPath, cnPath = ex$cnPath, verbose = FALSE, logs = FALSE ) names(metrics_data)ex <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico() metrics_data <- loadMetricsPreprocessingPheno( betaPath = ex$betaPath, mPath = ex$mPath, cnPath = ex$cnPath, verbose = FALSE, logs = FALSE ) names(metrics_data)
Merge the surrogate variable matrix back into the phenotype table while
preserving the original row order of targets.
mergeSvaTargetsEnmix( targets, sva, SampleID = "Sample_Name", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_mergeSvaTargetsEnmix.txt" )mergeSvaTargetsEnmix( targets, sva, SampleID = "Sample_Name", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_mergeSvaTargetsEnmix.txt" )
targets |
Phenotype data frame aligned with the samples in |
sva |
Numeric matrix of surrogate variables with samples in rows. |
SampleID |
Character. Name of the phenotype sample identifier column. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A phenotype data frame with the surrogate variables appended.
ex <- dnaEPICO:::exampleSvaAnalysisStateDnaEpico() merged_pheno <- mergeSvaTargetsEnmix( targets = ex$targets, sva = ex$sva, SampleID = "Sample_Name", verbose = FALSE, logs = FALSE ) colnames(merged_pheno)[seq_len(4)]ex <- dnaEPICO:::exampleSvaAnalysisStateDnaEpico() merged_pheno <- mergeSvaTargetsEnmix( targets = ex$targets, sva = ex$sva, SampleID = "Sample_Name", verbose = FALSE, logs = FALSE ) colnames(merged_pheno)[seq_len(4)]
methylationGLM_T1() is the high-level coordinator for the one-timepoint GLM
stage of the dnaEPICO workflow. It prepares the merged phenotype-plus-beta
input, optionally creates exploratory plots, fits one Gaussian GLM per CpG for
each requested phenotype, extracts CpG-level summaries, optionally collects
significant CpG coefficient tables, generates diagnostic plots, annotates the
combined summary table, and optionally writes legacy-style outputs to disk.
The default behavior is now in-memory and quiet, which makes the function
easier to compose with other package functions and more aligned with typical
Bioconductor usage.
methylationGLM_T1( inputPheno = "rData/preprocessingPheno/mergeData/phenoBetaT1.RData", outputLogs = "logs", outputRData = "rData/methylationGLM_T1/models", outputPlots = "figures/methylationGLM_T1", phenotypes = c("DASS_Depression", "DASS_Anxiety", "DASS_Stress", "PCL5_TotalScore", "MHCSF_TotalScore", "BRS_TotalScore"), covariates = "Sex,Age,Ethnicity,TraumaDefinition,Leukocytes,Epithelial.cells", factorVars = "Sex,Ethnicity,TraumaDefinition", cpgPrefix = "cg", cpgLimit = NA, nCores = 32, plotWidth = 2000, plotHeight = 1000, plotDPI = 150, interactionTerm = NULL, libPath = NULL, glmLibs = "glm2", prsMap = NULL, summaryPval = NA, summaryResidualSD = TRUE, saveSignificantCpGs = FALSE, significantCpGDir = "preliminaryResults/cpgs/methylationGLM_T1", significantCpGPval = 0.05, saveTxtSummaries = TRUE, chunkSize = NULL, summaryTxtDir = "preliminaryResults/summary/methylationGLM_T1/glm", fdrThreshold = 0.05, padjmethod = "fdr", annotationPackage = "IlluminaHumanMethylationEPICv2anno.20a1.hg38", annotationCols = c("Name", "chr", "pos", "UCSC_RefGene_Group", "UCSC_RefGene_Name", "Relation_to_Island", "GencodeV41_Group"), annotatedGLMOut = "data/methylationGLM_T1", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE )methylationGLM_T1( inputPheno = "rData/preprocessingPheno/mergeData/phenoBetaT1.RData", outputLogs = "logs", outputRData = "rData/methylationGLM_T1/models", outputPlots = "figures/methylationGLM_T1", phenotypes = c("DASS_Depression", "DASS_Anxiety", "DASS_Stress", "PCL5_TotalScore", "MHCSF_TotalScore", "BRS_TotalScore"), covariates = "Sex,Age,Ethnicity,TraumaDefinition,Leukocytes,Epithelial.cells", factorVars = "Sex,Ethnicity,TraumaDefinition", cpgPrefix = "cg", cpgLimit = NA, nCores = 32, plotWidth = 2000, plotHeight = 1000, plotDPI = 150, interactionTerm = NULL, libPath = NULL, glmLibs = "glm2", prsMap = NULL, summaryPval = NA, summaryResidualSD = TRUE, saveSignificantCpGs = FALSE, significantCpGDir = "preliminaryResults/cpgs/methylationGLM_T1", significantCpGPval = 0.05, saveTxtSummaries = TRUE, chunkSize = NULL, summaryTxtDir = "preliminaryResults/summary/methylationGLM_T1/glm", fdrThreshold = 0.05, padjmethod = "fdr", annotationPackage = "IlluminaHumanMethylationEPICv2anno.20a1.hg38", annotationCols = c("Name", "chr", "pos", "UCSC_RefGene_Group", "UCSC_RefGene_Name", "Relation_to_Island", "GencodeV41_Group"), annotatedGLMOut = "data/methylationGLM_T1", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE )
inputPheno |
Character. Path to the merged phenotype-plus-beta |
outputLogs |
Character. Directory used for optional log files. |
outputRData |
Character. Directory used for optional serialized model and summary outputs. |
outputPlots |
Character. Directory used for optional TIFF plots. |
phenotypes |
Character vector or comma-separated phenotype variables to model. |
covariates |
Character. Comma-separated covariate variables included in each GLM. |
factorVars |
Character. Comma-separated variables that should be treated as factors before modeling. |
cpgPrefix |
Character. Prefix used to identify methylation columns in the
merged phenotype-plus-beta input object. The default is |
cpgLimit |
Integer or |
nCores |
Integer. Number of worker processes to use while fitting models and extracting summaries. |
plotWidth |
Integer. TIFF width in pixels when plots are written to disk. |
plotHeight |
Integer. TIFF height in pixels when plots are written to disk. |
plotDPI |
Integer. TIFF resolution in DPI when plots are written to disk. |
interactionTerm |
Character or |
libPath |
Character vector or |
glmLibs |
Character. Comma-separated package names to validate on worker
processes. The default is |
prsMap |
Character or |
summaryPval |
Numeric or |
summaryResidualSD |
Logical. If |
saveSignificantCpGs |
Logical. If |
significantCpGDir |
Character. Directory used for optional significant CpG coefficient tables. |
significantCpGPval |
Numeric. P-value threshold used to collect or write significant CpG coefficient tables. |
saveTxtSummaries |
Logical. If |
chunkSize |
Integer or |
summaryTxtDir |
Character. Directory used for optional tab-delimited GLM summary tables. |
fdrThreshold |
Numeric. False-discovery-rate threshold used to highlight CpGs in the residual-significance diagnostic plots. |
padjmethod |
Character. P-value adjustment method passed to
|
annotationPackage |
Character. Annotation package or object name passed
to |
annotationCols |
Character vector or comma-separated annotation columns to append to the combined GLM summary table. Available columns depend on the selected annotation package. |
annotatedGLMOut |
Character. Directory used for the optional annotated GLM summary XLSX workbook. |
display |
Logical. If |
verbose |
Logical. If |
logs |
Logical. If |
saveOutputs |
Logical. If |
A list with class "dnaEPICO_methylationGLM_T1".
Object returned by prepareMethylationGLM_T1Data()
containing the merged phenotype-plus-beta analysis table and modeling
metadata.
Object returned by
plotMethylationGLM_T1Distributions() describing any exploratory plots that
were generated or written.
Object returned by fitMethylationGLM_T1Models()
containing the per-phenotype CpG model fits.
Object returned by
summarizeMethylationGLM_T1Models() containing the combined CpG summary
tables used for reporting and annotation.
Object returned by
collectSignificantCpGsMethylationGLM_T1() containing optional
phenotype-specific significant-CpG tables.
Object returned by
plotMethylationGLM_T1Diagnostics() describing the diagnostic plot objects
and any written TIFF files.
Object returned by
annotateMethylationGLM_T1Summaries() containing the annotated combined
summary table.
Object returned by writeMethylationGLM_T1Outputs() when
saveOutputs = TRUE, otherwise NULL.
See dnaEPICO_methylationGLM_T1 for a class-level overview.
if (requireNamespace("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)) { tmp <- tempdir() toy_path <- file.path(tmp, "phenoBetaT1.RData") phenoBT1 <- data.frame( Sample_Name = c("S1", "S2", "S3", "S4"), status = factor(c("Case", "Case", "Control", "Control")), sex = factor(c("F", "M", "F", "M")), cg00000029 = c(0.20, 0.25, 0.22, 0.27), cg00000108 = c(0.60, 0.55, 0.52, 0.58), check.names = FALSE ) save(phenoBT1, file = toy_path) result <- methylationGLM_T1( inputPheno = toy_path, phenotypes = "status", covariates = "sex", factorVars = "status,sex", cpgLimit = 2, nCores = 1, summaryPval = 1, annotationPackage = "IlluminaHumanMethylation450kanno.ilmn12.hg19", annotationCols = "Name,chr,pos", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE ) class(result) }if (requireNamespace("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)) { tmp <- tempdir() toy_path <- file.path(tmp, "phenoBetaT1.RData") phenoBT1 <- data.frame( Sample_Name = c("S1", "S2", "S3", "S4"), status = factor(c("Case", "Case", "Control", "Control")), sex = factor(c("F", "M", "F", "M")), cg00000029 = c(0.20, 0.25, 0.22, 0.27), cg00000108 = c(0.60, 0.55, 0.52, 0.58), check.names = FALSE ) save(phenoBT1, file = toy_path) result <- methylationGLM_T1( inputPheno = toy_path, phenotypes = "status", covariates = "sex", factorVars = "status,sex", cpgLimit = 2, nCores = 1, summaryPval = 1, annotationPackage = "IlluminaHumanMethylation450kanno.ilmn12.hg19", annotationCols = "Name,chr,pos", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE ) class(result) }
methylationGLMM_T1T2() is the high-level coordinator for the longitudinal
linear mixed-effects stage of the dnaEPICO workflow. It prepares the merged
phenotype-plus-beta input, fits one mixed-effects model per CpG for each
requested phenotype, extracts phenotype-specific coefficient summaries,
optionally collects significant interaction tables, generates diagnostic plots,
annotates the combined summary table, and optionally writes legacy-style
outputs to disk. The default behavior is now in-memory and quiet, which makes
the function easier to compose with other package functions and more aligned
with typical Bioconductor usage.
methylationGLMM_T1T2( inputPheno = "rData/preprocessingPheno/mergeData/phenoBetaT1T2.RData", outputLogs = "logs", outputRData = "rData/methylationGLMM_T1T2/models", outputPlots = "figures/methylationGLMM_T1T2", personVar = "person", timeVar = "Timepoint", phenotypes = c("DASS_Depression", "DASS_Anxiety", "DASS_Stress", "PCL5_TotalScore", "MHCSF_TotalScore", "BRS_TotalScore"), covariates = "Sex,Age,Ethnicity,TraumaDefinition,Leukocytes,Epithelial.cells", factorVars = "Sex,Ethnicity,TraumaDefinition,Timepoint", lmeLibs = "lme4,lmerTest", prsMap = NULL, libPath = NULL, cpgPrefix = "cg", cpgLimit = NA, nCores = 32, summaryPval = NA, plotWidth = 2000, plotHeight = 1000, plotDPI = 150, interactionTerm = NULL, saveSignificantInteractions = TRUE, significantInteractionDir = "preliminaryResults/cpgs/methylationGLMM_T1T2", significantInteractionPval = 0.05, saveTxtSummaries = TRUE, chunkSize = NULL, summaryTxtDir = "preliminaryResults/summary/methylationGLMM_T1T2/lmer", fdrThreshold = 0.05, padjmethod = "fdr", annotationPackage = "IlluminaHumanMethylationEPICv2anno.20a1.hg38", annotationCols = c("Name", "chr", "pos", "UCSC_RefGene_Group", "UCSC_RefGene_Name", "Relation_to_Island", "GencodeV41_Group"), annotatedLMEOut = "data/methylationGLMM_T1T2", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE )methylationGLMM_T1T2( inputPheno = "rData/preprocessingPheno/mergeData/phenoBetaT1T2.RData", outputLogs = "logs", outputRData = "rData/methylationGLMM_T1T2/models", outputPlots = "figures/methylationGLMM_T1T2", personVar = "person", timeVar = "Timepoint", phenotypes = c("DASS_Depression", "DASS_Anxiety", "DASS_Stress", "PCL5_TotalScore", "MHCSF_TotalScore", "BRS_TotalScore"), covariates = "Sex,Age,Ethnicity,TraumaDefinition,Leukocytes,Epithelial.cells", factorVars = "Sex,Ethnicity,TraumaDefinition,Timepoint", lmeLibs = "lme4,lmerTest", prsMap = NULL, libPath = NULL, cpgPrefix = "cg", cpgLimit = NA, nCores = 32, summaryPval = NA, plotWidth = 2000, plotHeight = 1000, plotDPI = 150, interactionTerm = NULL, saveSignificantInteractions = TRUE, significantInteractionDir = "preliminaryResults/cpgs/methylationGLMM_T1T2", significantInteractionPval = 0.05, saveTxtSummaries = TRUE, chunkSize = NULL, summaryTxtDir = "preliminaryResults/summary/methylationGLMM_T1T2/lmer", fdrThreshold = 0.05, padjmethod = "fdr", annotationPackage = "IlluminaHumanMethylationEPICv2anno.20a1.hg38", annotationCols = c("Name", "chr", "pos", "UCSC_RefGene_Group", "UCSC_RefGene_Name", "Relation_to_Island", "GencodeV41_Group"), annotatedLMEOut = "data/methylationGLMM_T1T2", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE )
inputPheno |
Character. Path to the merged longitudinal phenotype-plus-beta
|
outputLogs |
Character. Directory used for optional log files. |
outputRData |
Character. Directory used for optional serialized mixed-model and summary outputs. |
outputPlots |
Character. Directory used for optional TIFF diagnostic plots. |
personVar |
Character. Subject identifier variable used for the random
intercept. When this column is missing, it is derived from |
timeVar |
Character. Name of the longitudinal time variable included as a fixed effect in every model. |
phenotypes |
Character vector or comma-separated phenotype variables to model. |
covariates |
Character. Comma-separated fixed-effect covariates included in every mixed model. |
factorVars |
Character. Comma-separated variables that should be coerced
to factors before modeling. This usually includes categorical covariates and
|
lmeLibs |
Character. Comma-separated package names to validate on worker
processes. The default is |
prsMap |
Character or |
libPath |
Character vector or |
cpgPrefix |
Character. Prefix used to identify methylation columns in the
merged phenotype-plus-beta input object. The default is |
cpgLimit |
Integer or |
nCores |
Integer. Number of worker processes to use while fitting models and extracting summaries. |
summaryPval |
Numeric or |
plotWidth |
Integer. TIFF width in pixels when plots are written to disk. |
plotHeight |
Integer. TIFF height in pixels when plots are written to disk. |
plotDPI |
Integer. TIFF resolution in DPI when plots are written to disk. |
interactionTerm |
Character or |
saveSignificantInteractions |
Logical. If |
significantInteractionDir |
Character. Directory used for optional significant-interaction coefficient tables. |
significantInteractionPval |
Numeric. P-value threshold used to collect or write significant interaction coefficient tables. |
saveTxtSummaries |
Logical. If |
chunkSize |
Integer or |
summaryTxtDir |
Character. Directory used for optional tab-delimited LME summary tables. |
fdrThreshold |
Numeric. False-discovery-rate threshold used to highlight CpGs in the residual-significance diagnostic plots. |
padjmethod |
Character. P-value adjustment method passed to
|
annotationPackage |
Character. Annotation package or object name passed to
|
annotationCols |
Character vector or comma-separated annotation columns to append to the combined LME summary table. Available columns depend on the selected annotation package. |
annotatedLMEOut |
Character. Directory used for the optional annotated LME summary XLSX workbook. |
display |
Logical. If |
verbose |
Logical. If |
logs |
Logical. If |
saveOutputs |
Logical. If |
A list with class "dnaEPICO_methylationGLMM_T1T2".
Object returned by prepareMethylationGLMM_T1T2Data()
containing the merged longitudinal phenotype-plus-beta analysis table and
modeling metadata.
Object returned by fitMethylationGLMM_T1T2Models()
containing the per-phenotype CpG mixed-effects model fits.
Object returned by
summarizeMethylationGLMM_T1T2Models() containing the combined CpG summary
tables used for reporting and annotation.
Object returned by
collectSignificantInteractionsMethylationGLMM_T1T2() containing optional
phenotype-specific significant-interaction tables.
Object returned by
plotMethylationGLMM_T1T2Diagnostics() describing the diagnostic plot
objects and any written TIFF files.
Object returned by
annotateMethylationGLMM_T1T2Summaries() containing the annotated combined
summary table.
Object returned by
writeMethylationGLMM_T1T2Outputs() when saveOutputs = TRUE, otherwise
NULL.
See dnaEPICO_methylationGLMM_T1T2 for a class-level overview.
if ( requireNamespace("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE) && requireNamespace("lmerTest", quietly = TRUE) ) { tmp <- tempdir() toy_path <- file.path(tmp, "phenoBetaT1T2.RData") phenoBT1T2 <- data.frame( SID = c("P1A", "P1B", "P2A", "P2B", "P3A", "P3B", "P4A", "P4B"), person = c(1, 1, 2, 2, 3, 3, 4, 4), Timepoint = factor(c("1", "2", "1", "2", "1", "2", "1", "2")), score = c(10, 12, 9, 11, 13, 14, 8, 9), sex = factor(c("F", "F", "M", "M", "F", "F", "M", "M")), cg00000029 = c(0.25, 0.27, 0.20, 0.22, 0.30, 0.31, 0.18, 0.20), cg00000108 = c(0.50, 0.53, 0.55, 0.57, 0.48, 0.49, 0.60, 0.61), check.names = FALSE ) save(phenoBT1T2, file = toy_path) result <- methylationGLMM_T1T2( inputPheno = toy_path, phenotypes = "score", covariates = "sex", factorVars = "sex,Timepoint", cpgLimit = 2, nCores = 1, summaryPval = 1, annotationPackage = "IlluminaHumanMethylation450kanno.ilmn12.hg19", annotationCols = "Name,chr,pos", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE ) class(result) }if ( requireNamespace("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE) && requireNamespace("lmerTest", quietly = TRUE) ) { tmp <- tempdir() toy_path <- file.path(tmp, "phenoBetaT1T2.RData") phenoBT1T2 <- data.frame( SID = c("P1A", "P1B", "P2A", "P2B", "P3A", "P3B", "P4A", "P4B"), person = c(1, 1, 2, 2, 3, 3, 4, 4), Timepoint = factor(c("1", "2", "1", "2", "1", "2", "1", "2")), score = c(10, 12, 9, 11, 13, 14, 8, 9), sex = factor(c("F", "F", "M", "M", "F", "F", "M", "M")), cg00000029 = c(0.25, 0.27, 0.20, 0.22, 0.30, 0.31, 0.18, 0.20), cg00000108 = c(0.50, 0.53, 0.55, 0.57, 0.48, 0.49, 0.60, 0.61), check.names = FALSE ) save(phenoBT1T2, file = toy_path) result <- methylationGLMM_T1T2( inputPheno = toy_path, phenotypes = "score", covariates = "sex", factorVars = "sex,Timepoint", cpgLimit = 2, nCores = 1, summaryPval = 1, annotationPackage = "IlluminaHumanMethylation450kanno.ilmn12.hg19", annotationCols = "Name,chr,pos", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE ) class(result) }
Apply one or more supported normalization methods to a filtered RGSet and
return all normalized objects together in a single result object.
normalizeMinfiEwasWater( sampleData, sexColumn = "Sex", normMethods = "adjustedfunnorm", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_normalizeMinfiEwasWater.txt" )normalizeMinfiEwasWater( sampleData, sexColumn = "Sex", normMethods = "adjustedfunnorm", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_normalizeMinfiEwasWater.txt" )
sampleData |
Object returned by |
sexColumn |
Character. Name of the phenotype column used as the optional sex covariate for normalization methods that support it. |
normMethods |
Character vector or semicolon-separated string of
normalization methods. Supported values are |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_minfiEwasWater_norm" containing the
requested normalized objects and the first method as primary.
ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() sample_data <- filterSamplesMinfiEwasWater( RGSet = ex$RGSet, targets = ex$targets, failedSamples = character(0), SampleID = "Sample_Name", verbose = FALSE, logs = FALSE ) norm_data <- normalizeMinfiEwasWater( sampleData = sample_data, sexColumn = "Sex", normMethods = "quantile", verbose = FALSE, logs = FALSE ) names(norm_data$normalized)ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() sample_data <- filterSamplesMinfiEwasWater( RGSet = ex$RGSet, targets = ex$targets, failedSamples = character(0), SampleID = "Sample_Name", verbose = FALSE, logs = FALSE ) norm_data <- normalizeMinfiEwasWater( sampleData = sample_data, sexColumn = "Sex", normMethods = "quantile", verbose = FALSE, logs = FALSE ) names(norm_data$normalized)
Draw either the minfi QC plot or the detection P-value plot from an
assessment object returned by assessSamplesMinfiEwasWater().
plotAssessmentMinfiEwasWater( assessment, plot = c("qc", "detection"), display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 150L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotAssessmentMinfiEwasWater.txt" )plotAssessmentMinfiEwasWater( assessment, plot = c("qc", "detection"), display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 150L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotAssessmentMinfiEwasWater.txt" )
assessment |
Object returned by |
plot |
Character. Plot type: |
display |
Logical. If |
file |
Character or |
width |
Integer. TIFF width in pixels when |
height |
Integer. TIFF height in pixels when |
res |
Integer. TIFF resolution in DPI when |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
Invisibly returns the saved TIFF path when file is supplied,
otherwise NULL.
assessment <- list( meanDetP = c(S1 = 0.01, S2 = 0.02, S3 = 0.04), detPThreshold = 0.05 ) plotAssessmentMinfiEwasWater( assessment = assessment, plot = "detection", display = FALSE, verbose = FALSE, logs = FALSE )assessment <- list( meanDetP = c(S1 = 0.01, S2 = 0.02, S3 = 0.04), detPThreshold = 0.05 ) plotAssessmentMinfiEwasWater( assessment = assessment, plot = "detection", display = FALSE, verbose = FALSE, logs = FALSE )
Call ENmix::plotCtrl() for a supplied RGSet. This function only writes
files when output_dir is provided because ENmix::plotCtrl() produces JPG
files on disk rather than returning a plot object.
plotCtrlMinfiEwasWater( RGSet, output_dir = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotCtrlMinfiEwasWater.txt" )plotCtrlMinfiEwasWater( RGSet, output_dir = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotCtrlMinfiEwasWater.txt" )
RGSet |
An |
output_dir |
Character or |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
Invisibly returns output_dir.
ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() output_dir <- file.path(tempdir(), "enmix-control-plots") plotCtrlMinfiEwasWater( RGSet = ex$RGSet, output_dir = output_dir, verbose = FALSE, logs = FALSE ) dir.exists(output_dir)ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() output_dir <- file.path(tempdir(), "enmix-control-plots") plotCtrlMinfiEwasWater( RGSet = ex$RGSet, output_dir = output_dir, verbose = FALSE, logs = FALSE ) dir.exists(output_dir)
Create Q-Q and residual-diagnostic plots from the CpG summary tables returned
by summarizeMethylationGLM_T1Models().
plotMethylationGLM_T1Diagnostics( modelSummaries, preparedData, fdrThreshold = 0.05, padjmethod = "fdr", outputDir = NULL, plotWidth = 2000L, plotHeight = 1000L, plotDPI = 150L, display = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )plotMethylationGLM_T1Diagnostics( modelSummaries, preparedData, fdrThreshold = 0.05, padjmethod = "fdr", outputDir = NULL, plotWidth = 2000L, plotHeight = 1000L, plotDPI = 150L, display = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )
modelSummaries |
Object returned by |
preparedData |
Object returned by |
fdrThreshold |
Numeric. False-discovery-rate threshold used to highlight CpGs in the diagnostic plots. |
padjmethod |
Character. P-value adjustment method passed to
|
outputDir |
Character or |
plotWidth |
Integer. TIFF width in pixels when plots are written to disk. |
plotHeight |
Integer. TIFF height in pixels when plots are written to disk. |
plotDPI |
Integer. TIFF resolution in DPI when plots are written to disk. |
display |
Logical. If |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLM_T1_diagnostic_plots"
containing the generated ggplot2 objects, genomic inflation factors, and
any saved TIFF file paths.
ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() diagnostic_plots <- plotMethylationGLM_T1Diagnostics( modelSummaries = ex$modelSummaries, preparedData = ex$preparedData, display = FALSE, verbose = FALSE, logs = FALSE ) names(diagnostic_plots$plots)ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() diagnostic_plots <- plotMethylationGLM_T1Diagnostics( modelSummaries = ex$modelSummaries, preparedData = ex$preparedData, display = FALSE, verbose = FALSE, logs = FALSE ) names(diagnostic_plots$plots)
Create phenotype, factor-variable, and numeric-covariate distribution plots
from the object returned by prepareMethylationGLM_T1Data().
plotMethylationGLM_T1Distributions( preparedData, plotWidth = 2000L, plotHeight = 1000L, plotDPI = 150L, outputDir = NULL, display = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )plotMethylationGLM_T1Distributions( preparedData, plotWidth = 2000L, plotHeight = 1000L, plotDPI = 150L, outputDir = NULL, display = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )
preparedData |
Object returned by |
plotWidth |
Integer. TIFF width in pixels when plots are written to disk. |
plotHeight |
Integer. TIFF height in pixels when plots are written to disk. |
plotDPI |
Integer. TIFF resolution in DPI when plots are written to disk. |
outputDir |
Character or |
display |
Logical. If |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLM_T1_distribution_plots"
containing the generated ggplot2 objects and any saved TIFF file paths.
ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() distribution_plots <- plotMethylationGLM_T1Distributions( preparedData = ex$preparedData, display = FALSE, verbose = FALSE, logs = FALSE ) names(distribution_plots)ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() distribution_plots <- plotMethylationGLM_T1Distributions( preparedData = ex$preparedData, display = FALSE, verbose = FALSE, logs = FALSE ) names(distribution_plots)
Create Q-Q and standard-error diagnostic plots from the mixed-effects summary
tables returned by summarizeMethylationGLMM_T1T2Models().
plotMethylationGLMM_T1T2Diagnostics( modelSummaries, preparedData, fdrThreshold = 0.05, padjmethod = "fdr", outputDir = NULL, plotWidth = 2000L, plotHeight = 1000L, plotDPI = 150L, display = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )plotMethylationGLMM_T1T2Diagnostics( modelSummaries, preparedData, fdrThreshold = 0.05, padjmethod = "fdr", outputDir = NULL, plotWidth = 2000L, plotHeight = 1000L, plotDPI = 150L, display = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )
modelSummaries |
Object returned by |
preparedData |
Object returned by |
fdrThreshold |
Numeric. False-discovery-rate threshold used to highlight CpGs in the diagnostic plots. |
padjmethod |
Character. P-value adjustment method passed to
|
outputDir |
Character or |
plotWidth |
Integer. TIFF width in pixels when plots are written to disk. |
plotHeight |
Integer. TIFF height in pixels when plots are written to disk. |
plotDPI |
Integer. TIFF resolution in DPI when plots are written to disk. |
display |
Logical. If |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLMM_T1T2_diagnostic_plots"
containing the generated ggplot2 objects, genomic inflation factors, and
any saved TIFF file paths.
ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() diagnostic_plots <- plotMethylationGLMM_T1T2Diagnostics( modelSummaries = ex$modelSummaries, preparedData = ex$preparedData, display = FALSE, verbose = FALSE, logs = FALSE ) names(diagnostic_plots$plots)ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() diagnostic_plots <- plotMethylationGLMM_T1T2Diagnostics( modelSummaries = ex$modelSummaries, preparedData = ex$preparedData, display = FALSE, verbose = FALSE, logs = FALSE ) names(diagnostic_plots$plots)
Plot multidimensional scaling or density summaries from final metrics
plotMetricsMinfiEwasWater( metricsData, targets, plot = c("mds", "density"), plotGroupVar = "Sex", sexColumn = "Sex", display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 150L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotMetricsMinfiEwasWater.txt" )plotMetricsMinfiEwasWater( metricsData, targets, plot = c("mds", "density"), plotGroupVar = "Sex", sexColumn = "Sex", display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 150L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotMetricsMinfiEwasWater.txt" )
metricsData |
Object returned by |
targets |
Filtered phenotype data aligned with |
plot |
Character. Plot type: |
plotGroupVar |
Character. Phenotype column used for the main grouping. |
sexColumn |
Character. Phenotype column used for the sex grouping in the MDS plot. |
display |
Logical. If |
file |
Character or |
width |
Integer. TIFF width in pixels when |
height |
Integer. TIFF height in pixels when |
res |
Integer. TIFF resolution in DPI when |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
Invisibly returns the saved TIFF path when file is supplied,
otherwise NULL.
ex <- dnaEPICO:::exampleMinfiMetricsStateDnaEpico() plotMetricsMinfiEwasWater( metricsData = ex$metricsData, targets = ex$targets, plot = "density", plotGroupVar = "Sex", sexColumn = "Sex", display = FALSE, verbose = FALSE, logs = FALSE )ex <- dnaEPICO:::exampleMinfiMetricsStateDnaEpico() plotMetricsMinfiEwasWater( metricsData = ex$metricsData, targets = ex$targets, plot = "density", plotGroupVar = "Sex", sexColumn = "Sex", display = FALSE, verbose = FALSE, logs = FALSE )
Draw the density comparison plot used to inspect raw versus normalized data.
plotNormalizationMinfiEwasWater( RGSet, normData, targets, sexColumn = "Sex", display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 150L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotNormalizationMinfiEwasWater.txt" )plotNormalizationMinfiEwasWater( RGSet, normData, targets, sexColumn = "Sex", display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 150L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotNormalizationMinfiEwasWater.txt" )
RGSet |
An |
normData |
Object returned by |
targets |
Filtered phenotype data aligned with |
sexColumn |
Character. Name of the phenotype column used to colour the density curves. |
display |
Logical. If |
file |
Character or |
width |
Integer. TIFF width in pixels when |
height |
Integer. TIFF height in pixels when |
res |
Integer. TIFF resolution in DPI when |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
Invisibly returns the saved TIFF path when file is supplied,
otherwise NULL.
ex <- dnaEPICO:::exampleMinfiMetricsStateDnaEpico() plotNormalizationMinfiEwasWater( RGSet = ex$beta, normData = ex$normData, targets = ex$targets, sexColumn = "Sex", display = FALSE, verbose = FALSE, logs = FALSE )ex <- dnaEPICO:::exampleMinfiMetricsStateDnaEpico() plotNormalizationMinfiEwasWater( RGSet = ex$beta, normData = ex$normData, targets = ex$targets, sexColumn = "Sex", display = FALSE, verbose = FALSE, logs = FALSE )
Draw the pre-normalization beta density plot from a raw minfi object and a grouping variable in the phenotype table.
plotRawDensityMinfiEwasWater( rawData, targets, plotGroupVar = "Sex", display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 150L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotRawDensityMinfiEwasWater.txt" )plotRawDensityMinfiEwasWater( rawData, targets, plotGroupVar = "Sex", display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 150L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotRawDensityMinfiEwasWater.txt" )
rawData |
Object returned by |
targets |
Filtered phenotype data aligned with |
plotGroupVar |
Character. Phenotype column used to group samples in the density plot. |
display |
Logical. If |
file |
Character or |
width |
Integer. TIFF width in pixels when |
height |
Integer. TIFF height in pixels when |
res |
Integer. TIFF resolution in DPI when |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
Invisibly returns the saved TIFF path when file is supplied,
otherwise NULL.
ex <- dnaEPICO:::exampleMinfiMetricsStateDnaEpico() plotRawDensityMinfiEwasWater( rawData = ex$rawData, targets = ex$targets, plotGroupVar = "Sex", display = FALSE, verbose = FALSE, logs = FALSE )ex <- dnaEPICO:::exampleMinfiMetricsStateDnaEpico() plotRawDensityMinfiEwasWater( rawData = ex$rawData, targets = ex$targets, plotGroupVar = "Sex", display = FALSE, verbose = FALSE, logs = FALSE )
predictSexMinfiEwasWater()
Plot predicted or clinical sex from predictSexMinfiEwasWater()
plotSexMinfiEwasWater( sexData, type = c("predicted", "clinical"), display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 70L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotSexMinfiEwasWater.txt" )plotSexMinfiEwasWater( sexData, type = c("predicted", "clinical"), display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 70L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotSexMinfiEwasWater.txt" )
sexData |
Object returned by |
type |
Character. Plot type: |
display |
Logical. If |
file |
Character or |
width |
Integer. TIFF width in pixels when |
height |
Integer. TIFF height in pixels when |
res |
Integer. TIFF resolution in DPI when |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
Invisibly returns the saved TIFF path when file is supplied,
otherwise NULL.
ex <- dnaEPICO:::exampleSexPlotStateDnaEpico() plotSexMinfiEwasWater( sexData = ex, type = "predicted", display = FALSE, verbose = FALSE, logs = FALSE )ex <- dnaEPICO:::exampleSexPlotStateDnaEpico() plotSexMinfiEwasWater( sexData = ex, type = "predicted", display = FALSE, verbose = FALSE, logs = FALSE )
Draw one of the standard surrogate-variable plots used by svaEnmix().
plotSvaEnmix( analysisData, plot = c("sentrix_id", "sentrix_position", "matrix"), display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 150L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotSvaEnmix.txt" )plotSvaEnmix( analysisData, plot = c("sentrix_id", "sentrix_position", "matrix"), display = FALSE, file = NULL, width = 2000L, height = 1000L, res = 150L, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_plotSvaEnmix.txt" )
analysisData |
Object returned by |
plot |
Character. Plot type: |
display |
Logical. If |
file |
Character or |
width |
Integer. Plot width in pixels when |
height |
Integer. Plot height in pixels when |
res |
Integer. TIFF resolution in DPI when |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
Invisibly returns file when a TIFF is written, otherwise NULL.
ex <- dnaEPICO:::exampleSvaAnalysisStateDnaEpico() plotSvaEnmix( analysisData = ex$analysisData, plot = "sentrix_id", display = FALSE, verbose = FALSE, logs = FALSE )ex <- dnaEPICO:::exampleSvaAnalysisStateDnaEpico() plotSvaEnmix( analysisData = ex$analysisData, plot = "sentrix_id", display = FALSE, verbose = FALSE, logs = FALSE )
Predict sample sex from a genome-mapped methylation object, align the predictions with phenotype data, and return a structured object that can be plotted or merged into downstream phenotype tables.
predictSexMinfiEwasWater( rawData, targets, SampleID = "Sample_Name", sexColumn = "Sex", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_predictSexMinfiEwasWater.txt" )predictSexMinfiEwasWater( rawData, targets, SampleID = "Sample_Name", sexColumn = "Sex", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_predictSexMinfiEwasWater.txt" )
rawData |
Object returned by |
targets |
Filtered phenotype data frame aligned with |
SampleID |
Character. Name of the sample identifier column in |
sexColumn |
Character. Name of the phenotype column containing reported sex. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_minfiEwasWater_sex" containing the sex
prediction result, aligned phenotype data, plotting data, and mismatch
table.
ex <- dnaEPICO:::exampleMinfiWorkflowStateDnaEpico() sex_data <- predictSexMinfiEwasWater( rawData = ex$rawFiltered, targets = ex$sampleData$targets, SampleID = "Sample_Name", sexColumn = "Sex", verbose = FALSE, logs = FALSE ) names(sex_data)ex <- dnaEPICO:::exampleMinfiWorkflowStateDnaEpico() sex_data <- predictSexMinfiEwasWater( rawData = ex$rawFiltered, targets = ex$sampleData$targets, SampleID = "Sample_Name", sexColumn = "Sex", verbose = FALSE, logs = FALSE ) names(sex_data)
Prepare inputs for a DNA methylation report
prepareDnamReportInputs( outputDir = "reports", qcDir = file.path("figures", "preprocessingMinfiEwasWater", "enmix"), preprocessingDir = file.path("figures", "preprocessingMinfiEwasWater", "qc"), postprocessingDir = file.path("figures", "preprocessingMinfiEwasWater", "metrics"), svaDir = file.path("figures", "svaEnmix"), glmDir = file.path("figures", "methylationGLM_T1"), glmmDir = file.path("figures", "methylationGLMM_T1T2"), figDir = file.path(outputDir, "assets", "figures"), verbose = FALSE, logs = FALSE, logDir = outputDir )prepareDnamReportInputs( outputDir = "reports", qcDir = file.path("figures", "preprocessingMinfiEwasWater", "enmix"), preprocessingDir = file.path("figures", "preprocessingMinfiEwasWater", "qc"), postprocessingDir = file.path("figures", "preprocessingMinfiEwasWater", "metrics"), svaDir = file.path("figures", "svaEnmix"), glmDir = file.path("figures", "methylationGLM_T1"), glmmDir = file.path("figures", "methylationGLMM_T1T2"), figDir = file.path(outputDir, "assets", "figures"), verbose = FALSE, logs = FALSE, logDir = outputDir )
outputDir |
Character. Directory where the report project is written. |
qcDir |
Character. Directory containing ENmix quality-control figures. |
preprocessingDir |
Character. Directory containing preprocessing quality-control figures. |
postprocessingDir |
Character. Directory containing postprocessing metric figures. |
svaDir |
Character. Directory containing SVA or batch-effect figures. |
glmDir |
Character. Directory containing GLM figures. |
glmmDir |
Character. Directory containing GLMM figures. |
figDir |
Character. Directory used for generated report figure assets. |
verbose |
Logical. If |
logs |
Logical. If |
logDir |
Character. Directory for optional log files. |
A list with class "dnaEPICO_dnamReport_prepared".
report_root <- file.path(tempdir(), "dnaepico-report-inputs") prepared <- prepareDnamReportInputs( outputDir = file.path(report_root, "reports"), qcDir = file.path( report_root, "figures", "preprocessingMinfiEwasWater", "enmix" ), preprocessingDir = file.path( report_root, "figures", "preprocessingMinfiEwasWater", "qc" ), postprocessingDir = file.path( report_root, "figures", "preprocessingMinfiEwasWater", "metrics" ), svaDir = file.path(report_root, "figures", "svaEnmix") ) inherits(prepared, "dnaEPICO_dnamReport_prepared")report_root <- file.path(tempdir(), "dnaepico-report-inputs") prepared <- prepareDnamReportInputs( outputDir = file.path(report_root, "reports"), qcDir = file.path( report_root, "figures", "preprocessingMinfiEwasWater", "enmix" ), preprocessingDir = file.path( report_root, "figures", "preprocessingMinfiEwasWater", "qc" ), postprocessingDir = file.path( report_root, "figures", "preprocessingMinfiEwasWater", "metrics" ), svaDir = file.path(report_root, "figures", "svaEnmix") ) inherits(prepared, "dnaEPICO_dnamReport_prepared")
Load the merged phenotype-plus-beta input object, validate the requested modeling variables, convert selected variables to factors, and return a single in-memory object for downstream helpers.
prepareMethylationGLM_T1Data( inputPheno, phenotypes, covariates, factorVars, cpgPrefix = "cg", cpgLimit = NA, interactionTerm = NULL, prsMap = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )prepareMethylationGLM_T1Data( inputPheno, phenotypes, covariates, factorVars, cpgPrefix = "cg", cpgLimit = NA, interactionTerm = NULL, prsMap = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )
inputPheno |
Character. Path to the merged phenotype-plus-beta object
created by |
phenotypes |
Character vector or comma-separated string of phenotype variables to model. |
covariates |
Character vector or comma-separated string of covariate variables to adjust for. |
factorVars |
Character vector or comma-separated string of variables that should be converted to factors before modeling. |
cpgPrefix |
Character. Prefix used to identify methylation columns. |
cpgLimit |
Integer or |
interactionTerm |
Character or |
prsMap |
Character vector or comma-separated string of phenotype-to-PRS
mappings in the form |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLM_T1_data" containing the
prepared analysis data, parsed variable selections, CpG columns, and
exploratory summaries.
ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() prepared_data <- prepareMethylationGLM_T1Data( inputPheno = ex$inputPath, phenotypes = "status", covariates = "sex,age", factorVars = "status,sex", cpgLimit = 2, verbose = FALSE, logs = FALSE ) names(prepared_data)ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() prepared_data <- prepareMethylationGLM_T1Data( inputPheno = ex$inputPath, phenotypes = "status", covariates = "sex,age", factorVars = "status,sex", cpgLimit = 2, verbose = FALSE, logs = FALSE ) names(prepared_data)
Load the merged longitudinal phenotype-plus-beta object, ensure that a subject identifier column is available, validate the requested modeling variables, convert selected variables to factors, and return a single in-memory object for downstream mixed-effects modeling helpers.
prepareMethylationGLMM_T1T2Data( inputPheno, personVar = "person", timeVar = "Timepoint", phenotypes, covariates, factorVars, prsMap = NULL, cpgPrefix = "cg", cpgLimit = NA, interactionTerm = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )prepareMethylationGLMM_T1T2Data( inputPheno, personVar = "person", timeVar = "Timepoint", phenotypes, covariates, factorVars, prsMap = NULL, cpgPrefix = "cg", cpgLimit = NA, interactionTerm = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )
inputPheno |
Character. Path to the merged longitudinal phenotype-plus-
beta object created by |
personVar |
Character. Name of the subject identifier column. |
timeVar |
Character. Name of the time variable. |
phenotypes |
Character vector or comma-separated string of phenotype variables to model. |
covariates |
Character vector or comma-separated string of covariate variables to adjust for. |
factorVars |
Character vector or comma-separated string of variables that should be converted to factors before modeling. |
prsMap |
Character vector or comma-separated string of phenotype-to-PRS
mappings in the form |
cpgPrefix |
Character. Prefix used to identify methylation columns. |
cpgLimit |
Integer or |
interactionTerm |
Character or |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLMM_T1T2_data" containing
the prepared analysis data, parsed variable selections, CpG columns,
timepoint summaries, and subject-ID diagnostics.
ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() prepared_data <- prepareMethylationGLMM_T1T2Data( inputPheno = ex$inputPath, personVar = "person", timeVar = "Timepoint", phenotypes = "score", covariates = "sex", factorVars = "sex,Timepoint", cpgLimit = 2, verbose = FALSE, logs = FALSE ) names(prepared_data)ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() prepared_data <- prepareMethylationGLMM_T1T2Data( inputPheno = ex$inputPath, personVar = "person", timeVar = "Timepoint", phenotypes = "score", covariates = "sex", factorVars = "sex,Timepoint", cpgLimit = 2, verbose = FALSE, logs = FALSE ) names(prepared_data)
Run the dnaEPICO preprocessing workflow as a convenience wrapper around the
smaller minfi/ENmix/wateRmelon helper functions in this package. The wrapper
now returns a structured result object containing the in-memory outputs from
each stage. Legacy files are written only when saveOutputs = TRUE.
preprocessingMinfiEwasWater( phenoFile = "data/preprocessingMinfiEwasWater/pheno.csv", idatFolder = "data/preprocessingMinfiEwasWater/idats", outputLogs = "logs", nSamples = NA, SampleID = "Sample_Name", arrayType = "IlluminaHumanMethylationEPICv2", annotationVersion = "20a1.hg38", scriptLabel = "preprocessingMinfiEwasWater", baseDataFolder = "rData", figureBaseDir = "figures", sepType = "", tiffWidth = 2000, tiffHeight = 1000, tiffRes = 150, qcCutoff = 10.5, detPtype = "m+u", detPThreshold = 0.05, normMethods = "adjustedfunnorm", sexColumn = "Sex", pvalThreshold = 0.01, chrToRemove = "chrX,chrY", snpsToRemove = "SBE,CpG", mafThreshold = 0.1, crossReactivePath = "data/preprocessingMinfiEwasWater/12864_2024_10027_MOESM8_ESM.csv", plotGroupVar = "Sex", lcRef = "salivaEPIC", phenoOrder = "Sample_Name;Timepoint;Sex;PredSex;Basename;Sentrix_ID;Sentrix_Position", lcPhenoDir = "data/preprocessingMinfiEwasWater", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE )preprocessingMinfiEwasWater( phenoFile = "data/preprocessingMinfiEwasWater/pheno.csv", idatFolder = "data/preprocessingMinfiEwasWater/idats", outputLogs = "logs", nSamples = NA, SampleID = "Sample_Name", arrayType = "IlluminaHumanMethylationEPICv2", annotationVersion = "20a1.hg38", scriptLabel = "preprocessingMinfiEwasWater", baseDataFolder = "rData", figureBaseDir = "figures", sepType = "", tiffWidth = 2000, tiffHeight = 1000, tiffRes = 150, qcCutoff = 10.5, detPtype = "m+u", detPThreshold = 0.05, normMethods = "adjustedfunnorm", sexColumn = "Sex", pvalThreshold = 0.01, chrToRemove = "chrX,chrY", snpsToRemove = "SBE,CpG", mafThreshold = 0.1, crossReactivePath = "data/preprocessingMinfiEwasWater/12864_2024_10027_MOESM8_ESM.csv", plotGroupVar = "Sex", lcRef = "salivaEPIC", phenoOrder = "Sample_Name;Timepoint;Sex;PredSex;Basename;Sentrix_ID;Sentrix_Position", lcPhenoDir = "data/preprocessingMinfiEwasWater", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE )
phenoFile |
Character. Path to the phenotype CSV file. |
idatFolder |
Character. Directory containing the IDAT files. |
outputLogs |
Character. Directory used for log files when |
nSamples |
Integer or |
SampleID |
Character. Name of the phenotype column containing sample identifiers. |
arrayType |
Character. Illumina array identifier passed to
|
annotationVersion |
Character. Annotation build passed to
|
scriptLabel |
Character. Label used to name output folders when
|
baseDataFolder |
Character. Base directory used for saved |
figureBaseDir |
Character. Base directory used for saved figure outputs
when |
sepType |
Character. Field separator used in |
tiffWidth |
Integer. Width of saved TIFF plots in pixels. |
tiffHeight |
Integer. Height of saved TIFF plots in pixels. |
tiffRes |
Integer. Resolution in DPI for saved TIFF plots. |
qcCutoff |
Numeric. QC cutoff passed to |
detPtype |
Character. Detection P-value mode passed to
|
detPThreshold |
Numeric. Samples with mean detection P value above this threshold are removed. |
normMethods |
Character vector or semicolon-separated string of
normalization methods. Supported values are |
sexColumn |
Character. Name of the phenotype column containing reported sex. |
pvalThreshold |
Numeric. Probe-level detection P-value threshold used in the probe filter. |
chrToRemove |
Character vector or comma-separated string of chromosome
names to remove, for example |
snpsToRemove |
Character vector or comma-separated string of SNP probe
types to remove, for example |
mafThreshold |
Numeric. Minor allele frequency threshold passed to
|
crossReactivePath |
Character. Path to a CSV file containing a |
plotGroupVar |
Character. Phenotype column used for density and MDS grouping plots. |
lcRef |
Character. Reference panel used for cell composition estimation.
|
phenoOrder |
Character vector or semicolon-separated string describing
which phenotype columns should appear first in the merged |
lcPhenoDir |
Character. Directory used for the saved |
display |
Logical. If |
verbose |
Logical. If |
logs |
Logical. If |
saveOutputs |
Logical. If |
A list with class "dnaEPICO_preprocessingMinfiEwasWater".
Filtered phenotype table aligned to the retained samples.
Filtered RGChannelSet used in downstream preprocessing and
available for direct interactive inspection.
Object returned by buildRawMinfiEwasWater() containing the
raw MSet, RatioSet, and genome-mapped object derived from RGSet.
Object returned by assessSamplesMinfiEwasWater()
containing detection P values, QC summaries, and failed-sample tracking.
Object returned by predictSexMinfiEwasWater() containing
predicted sex labels, mismatch summaries, and plotting data.
Object returned by normalizeMinfiEwasWater() containing
the requested normalized objects and metadata on the methods that were run.
Object returned by filterProbesMinfiEwasWater()
containing the probe-filtered methylation objects at each filtering stage.
Object returned by extractMetricsMinfiEwasWater()
containing the beta-value, M-value, and copy-number matrices used by later
workflow steps.
Object returned by estimateLCMinfiEwasWater() containing
the estimated cell-type proportions and the phenotype table augmented with
those proportions.
Resolved path to the optional log file, or NULL when
logging was disabled.
See dnaEPICO_preprocessingMinfiEwasWater for a class-level overview.
dnaEPICO_preprocessingMinfiEwasWater
if (requireNamespace("minfiData", quietly = TRUE) && requireNamespace("IlluminaHumanMethylation450kmanifest", quietly = TRUE) && requireNamespace("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)) { ex <- dnaEPICO:::exampleMinfiIdatInputsDnaEpico(n = 4) result <- preprocessingMinfiEwasWater( phenoFile = ex$phenoFile, idatFolder = ex$idatFolder, outputLogs = file.path(ex$tempDir, "logs"), nSamples = 4, SampleID = "Sample_Name", arrayType = ex$arrayType, annotationVersion = ex$annotationVersion, scriptLabel = "preprocessingMinfiEwasWater", baseDataFolder = file.path(ex$tempDir, "rData"), figureBaseDir = file.path(ex$tempDir, "figures"), detPThreshold = 1, normMethods = "quantile", sexColumn = "Sex", pvalThreshold = 1, chrToRemove = "", snpsToRemove = "SBE", mafThreshold = 1, crossReactivePath = ex$crossReactivePath, plotGroupVar = "Sex", lcRef = "saliva", phenoOrder = "Sample_Name;Sex;Basename;Sentrix_ID;Sentrix_Position", lcPhenoDir = ex$tempDir, saveOutputs = FALSE, verbose = FALSE, logs = FALSE ) inherits(result, "dnaEPICO_preprocessingMinfiEwasWater") }if (requireNamespace("minfiData", quietly = TRUE) && requireNamespace("IlluminaHumanMethylation450kmanifest", quietly = TRUE) && requireNamespace("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)) { ex <- dnaEPICO:::exampleMinfiIdatInputsDnaEpico(n = 4) result <- preprocessingMinfiEwasWater( phenoFile = ex$phenoFile, idatFolder = ex$idatFolder, outputLogs = file.path(ex$tempDir, "logs"), nSamples = 4, SampleID = "Sample_Name", arrayType = ex$arrayType, annotationVersion = ex$annotationVersion, scriptLabel = "preprocessingMinfiEwasWater", baseDataFolder = file.path(ex$tempDir, "rData"), figureBaseDir = file.path(ex$tempDir, "figures"), detPThreshold = 1, normMethods = "quantile", sexColumn = "Sex", pvalThreshold = 1, chrToRemove = "", snpsToRemove = "SBE", mafThreshold = 1, crossReactivePath = ex$crossReactivePath, plotGroupVar = "Sex", lcRef = "saliva", phenoOrder = "Sample_Name;Sex;Basename;Sentrix_ID;Sentrix_Position", lcPhenoDir = ex$tempDir, saveOutputs = FALSE, verbose = FALSE, logs = FALSE ) inherits(result, "dnaEPICO_preprocessingMinfiEwasWater") }
Read the phenotype table and the preprocessed beta, M-value, and copy-number
matrices; align them by sample identifier; split them by timepoint; prepare
combined longitudinal objects; and build Clock Foundation export tables. The
function returns a structured in-memory result, while legacy files are
written only when saveOutputs = TRUE.
preprocessingPheno( phenoFile = "data/preprocessingMinfiEwasWater/phenoLC.csv", sepType = "", betaPath = "rData/preprocessingMinfiEwasWater/metrics/beta_NomFilt_MSetF_Flt_Rxy_Ds_Rc.RData", mPath = "rData/preprocessingMinfiEwasWater/metrics/m_NomFilt_MSetF_Flt_Rxy_Ds_Rc.RData", cnPath = "rData/preprocessingMinfiEwasWater/metrics/cn_NomFilt_MSetF_Flt_Rxy_Ds_Rc.RData", SampleID = "Sample_Name", timeVar = "Timepoint", timepoints = "1,2", combineTimepoints = "1,2", outputPheno = "data/preprocessingPheno", outputRData = "rData/preprocessingPheno/metrics", outputRDataMerge = "rData/preprocessingPheno/mergeData", sexColumn = "Sex", outputLogs = "logs", outputDir = "data/preprocessingPheno", verbose = FALSE, logs = FALSE, saveOutputs = FALSE )preprocessingPheno( phenoFile = "data/preprocessingMinfiEwasWater/phenoLC.csv", sepType = "", betaPath = "rData/preprocessingMinfiEwasWater/metrics/beta_NomFilt_MSetF_Flt_Rxy_Ds_Rc.RData", mPath = "rData/preprocessingMinfiEwasWater/metrics/m_NomFilt_MSetF_Flt_Rxy_Ds_Rc.RData", cnPath = "rData/preprocessingMinfiEwasWater/metrics/cn_NomFilt_MSetF_Flt_Rxy_Ds_Rc.RData", SampleID = "Sample_Name", timeVar = "Timepoint", timepoints = "1,2", combineTimepoints = "1,2", outputPheno = "data/preprocessingPheno", outputRData = "rData/preprocessingPheno/metrics", outputRDataMerge = "rData/preprocessingPheno/mergeData", sexColumn = "Sex", outputLogs = "logs", outputDir = "data/preprocessingPheno", verbose = FALSE, logs = FALSE, saveOutputs = FALSE )
phenoFile |
Character. Path to the phenotype CSV file. |
sepType |
Character. Field separator used in |
betaPath |
Character. Path to the saved beta-value object. Both |
mPath |
Character. Path to the saved M-value object. Both |
cnPath |
Character. Path to the saved copy-number object. Both |
SampleID |
Character. Name of the phenotype column containing sample identifiers used to align phenotype and methylation data. |
timeVar |
Character. Name of the phenotype column containing timepoint labels. |
timepoints |
Character vector or comma-separated string of timepoints to retain and split into separate in-memory subsets. |
combineTimepoints |
Character vector or comma-separated string of timepoints to combine into the longitudinal phenotype-plus-beta object. |
outputPheno |
Character. Directory used for saved phenotype CSV files
when |
outputRData |
Character. Directory used for saved metric |
outputRDataMerge |
Character. Directory used for saved merged
phenotype-plus-beta |
sexColumn |
Character. Name of the phenotype sex column used when building Clock Foundation exports. |
outputLogs |
Character. Directory used for log files when |
outputDir |
Character. Directory used for Clock Foundation export files
when |
verbose |
Logical. If |
logs |
Logical. If |
saveOutputs |
Logical. If |
A list with class "dnaEPICO_preprocessingPheno".
Phenotype table read from phenoFile.
Object returned by loadMetricsPreprocessingPheno()
containing the beta-value, M-value, and copy-number matrices loaded from
betaPath, mPath, and cnPath.
Object returned by splitTimepointsPreprocessingPheno()
containing per-timepoint phenotype tables and methylation matrices.
Object returned by
combineTimepointsPreprocessingPheno() containing the merged longitudinal
phenotype-plus-beta object and the timepoint combination metadata.
Object returned by
buildClockFoundationInputsPreprocessingPheno() containing the beta table
and phenotype table prepared for Clock Foundation export.
Object returned by writePreprocessingPhenoOutputs() when
saveOutputs = TRUE, otherwise NULL.
Resolved path to the optional log file, or NULL when
logging was disabled.
See dnaEPICO_preprocessingPheno for a class-level overview.
tmp <- tempdir() pheno <- data.frame( Sample_Name = c("S1", "S2", "S3"), Timepoint = c("1", "1", "2"), Sex = c(0, 1, 0), stringsAsFactors = FALSE ) beta <- matrix( c(0.10, 0.20, 0.30, 0.40, 0.50, 0.60), nrow = 2, dimnames = list(c("cg1", "cg2"), pheno$Sample_Name) ) m <- beta * 10 cn <- beta * 100 pheno_file <- file.path(tmp, "pheno.csv") beta_path <- file.path(tmp, "beta.RData") m_path <- file.path(tmp, "m.RData") cn_path <- file.path(tmp, "cn.RData") utils::write.csv(pheno, pheno_file, row.names = FALSE) save(beta, file = beta_path) save(m, file = m_path) save(cn, file = cn_path) result <- preprocessingPheno( phenoFile = pheno_file, betaPath = beta_path, mPath = m_path, cnPath = cn_path, SampleID = "Sample_Name", timeVar = "Timepoint", timepoints = "1,2", combineTimepoints = "1,2", outputPheno = file.path(tmp, "data", "preprocessingPheno"), outputRData = file.path(tmp, "rData", "preprocessingPheno", "metrics"), outputRDataMerge = file.path(tmp, "rData", "preprocessingPheno", "mergeData"), sexColumn = "Sex", outputLogs = file.path(tmp, "logs"), outputDir = file.path(tmp, "clockFoundation"), saveOutputs = FALSE ) stopifnot(inherits(result, "dnaEPICO_preprocessingPheno"))tmp <- tempdir() pheno <- data.frame( Sample_Name = c("S1", "S2", "S3"), Timepoint = c("1", "1", "2"), Sex = c(0, 1, 0), stringsAsFactors = FALSE ) beta <- matrix( c(0.10, 0.20, 0.30, 0.40, 0.50, 0.60), nrow = 2, dimnames = list(c("cg1", "cg2"), pheno$Sample_Name) ) m <- beta * 10 cn <- beta * 100 pheno_file <- file.path(tmp, "pheno.csv") beta_path <- file.path(tmp, "beta.RData") m_path <- file.path(tmp, "m.RData") cn_path <- file.path(tmp, "cn.RData") utils::write.csv(pheno, pheno_file, row.names = FALSE) save(beta, file = beta_path) save(m, file = m_path) save(cn, file = cn_path) result <- preprocessingPheno( phenoFile = pheno_file, betaPath = beta_path, mPath = m_path, cnPath = cn_path, SampleID = "Sample_Name", timeVar = "Timepoint", timepoints = "1,2", combineTimepoints = "1,2", outputPheno = file.path(tmp, "data", "preprocessingPheno"), outputRData = file.path(tmp, "rData", "preprocessingPheno", "metrics"), outputRDataMerge = file.path(tmp, "rData", "preprocessingPheno", "mergeData"), sexColumn = "Sex", outputLogs = file.path(tmp, "logs"), outputDir = file.path(tmp, "clockFoundation"), saveOutputs = FALSE ) stopifnot(inherits(result, "dnaEPICO_preprocessingPheno"))
Print a DNA methylation report result
## S3 method for class 'dnaEPICO_dnamReport' print(x, ...)## S3 method for class 'dnaEPICO_dnamReport' print(x, ...)
x |
Object returned by |
... |
Additional arguments ignored. |
Invisibly returns x.
Read the phenotype table used by shared dnaEPICO workflows, validate the
sample identifier column, optionally subset the first nSamples, and return
the targets as a base data.frame.
readPhenotypeTargets( phenoFile, sepType = "", nSamples = NA, SampleID = "Sample_Name", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_readPhenotypeTargets.txt" )readPhenotypeTargets( phenoFile, sepType = "", nSamples = NA, SampleID = "Sample_Name", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_readPhenotypeTargets.txt" )
phenoFile |
Character. Path to the phenotype table on disk. |
sepType |
Character. Field separator used in |
nSamples |
Integer or |
SampleID |
Character. Name of the column containing sample identifiers that will later be used to name methylation-array samples. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A data.frame containing the phenotype targets.
tmp <- tempdir() pheno <- data.frame( Sample_Name = c("S1", "S2"), Sex = c("F", "M"), stringsAsFactors = FALSE ) pheno_file <- file.path(tmp, "pheno.csv") utils::write.csv(pheno, pheno_file, row.names = FALSE) targets <- readPhenotypeTargets( phenoFile = pheno_file, SampleID = "Sample_Name" ) stopifnot(is.data.frame(targets)) stopifnot(nrow(targets) == 2L)tmp <- tempdir() pheno <- data.frame( Sample_Name = c("S1", "S2"), Sex = c("F", "M"), stringsAsFactors = FALSE ) pheno_file <- file.path(tmp, "pheno.csv") utils::write.csv(pheno, pheno_file, row.names = FALSE) targets <- readPhenotypeTargets( phenoFile = pheno_file, SampleID = "Sample_Name" ) stopifnot(is.data.frame(targets)) stopifnot(nrow(targets) == 2L)
Read methylation-array IDAT files with minfi::read.metharray.exp(), set
sample names from the phenotype table, apply the requested annotation, and
return the resulting RGChannelSet.
readRGSetMinfiEwasWater( idatFolder, targets, SampleID = "Sample_Name", arrayType = "IlluminaHumanMethylationEPICv2", annotationVersion = "20a1.hg38", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_readRGSetMinfiEwasWater.txt" )readRGSetMinfiEwasWater( idatFolder, targets, SampleID = "Sample_Name", arrayType = "IlluminaHumanMethylationEPICv2", annotationVersion = "20a1.hg38", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_readRGSetMinfiEwasWater.txt" )
idatFolder |
Character. Directory containing the IDAT files. |
targets |
Data frame returned by |
SampleID |
Character. Name of the phenotype column containing sample
identifiers used to label the |
arrayType |
Character. Array name passed to
|
annotationVersion |
Character. Annotation build passed to
|
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
An annotated RGChannelSet.
if (requireNamespace("minfiData", quietly = TRUE) && requireNamespace("IlluminaHumanMethylation450kmanifest", quietly = TRUE) && requireNamespace("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)) { ex <- dnaEPICO:::exampleMinfiIdatInputsDnaEpico(n = 4) rgset <- readRGSetMinfiEwasWater( idatFolder = ex$idatFolder, targets = ex$targets, SampleID = "Sample_Name", arrayType = ex$arrayType, annotationVersion = ex$annotationVersion ) class(rgset) }if (requireNamespace("minfiData", quietly = TRUE) && requireNamespace("IlluminaHumanMethylation450kmanifest", quietly = TRUE) && requireNamespace("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)) { ex <- dnaEPICO:::exampleMinfiIdatInputsDnaEpico(n = 4) rgset <- readRGSetMinfiEwasWater( idatFolder = ex$idatFolder, targets = ex$targets, SampleID = "Sample_Name", arrayType = ex$arrayType, annotationVersion = ex$annotationVersion ) class(rgset) }
Render a prepared DNA methylation report
renderDnamReport( preparedReport, verbose = FALSE, logs = FALSE, logDir = NULL, clean = TRUE )renderDnamReport( preparedReport, verbose = FALSE, logs = FALSE, logDir = NULL, clean = TRUE )
preparedReport |
Object returned by |
verbose |
Logical. If |
logs |
Logical. If |
logDir |
Character or |
clean |
Logical. Retained for backwards compatibility. |
A list with class "dnaEPICO_dnamReport_render".
report_root <- file.path(tempdir(), "dnaepico-render-example") prepared <- prepareDnamReportInputs( outputDir = file.path(report_root, "reports") ) rendered <- renderDnamReport(prepared) rendered$statusreport_root <- file.path(tempdir(), "dnaepico-render-example") prepared <- prepareDnamReportInputs( outputDir = file.path(report_root, "reports") ) rendered <- renderDnamReport(prepared) rendered$status
Align phenotype rows and metric matrices for each requested timepoint, and precompute the per-timepoint phenotype-plus-beta objects used by downstream modeling functions.
splitTimepointsPreprocessingPheno( pheno, metricsData, SampleID = "Sample_Name", timeVar = "Timepoint", timepoints = "1,2", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_splitTimepointsPreprocessingPheno.txt" )splitTimepointsPreprocessingPheno( pheno, metricsData, SampleID = "Sample_Name", timeVar = "Timepoint", timepoints = "1,2", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_splitTimepointsPreprocessingPheno.txt" )
pheno |
Data frame containing phenotype information. |
metricsData |
Object returned by |
SampleID |
Character. Name of the sample identifier column in |
timeVar |
Character. Name of the timepoint column in |
timepoints |
Character vector or comma-separated string of timepoints to retain. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_preprocessingPheno_timepoints"
containing the parsed timepoints and aligned per-timepoint subsets.
ex <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico() timepoint_data <- splitTimepointsPreprocessingPheno( pheno = ex$pheno, metricsData = ex$metricsData, SampleID = "Sample_Name", timeVar = "Timepoint", timepoints = "1,2", verbose = FALSE, logs = FALSE ) timepoint_data$timepointsex <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico() timepoint_data <- splitTimepointsPreprocessingPheno( pheno = ex$pheno, metricsData = ex$metricsData, SampleID = "Sample_Name", timeVar = "Timepoint", timepoints = "1,2", verbose = FALSE, logs = FALSE ) timepoint_data$timepoints
Extract phenotype-specific CpG coefficient tables from the fitted model
object returned by fitMethylationGLM_T1Models().
summarizeMethylationGLM_T1Models( modelResults, preparedData, summaryResidualSD = TRUE, summaryPval = NA, nCores = 1L, libPath = NULL, glmLibs = "glm2", chunkSize = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )summarizeMethylationGLM_T1Models( modelResults, preparedData, summaryResidualSD = TRUE, summaryPval = NA, nCores = 1L, libPath = NULL, glmLibs = "glm2", chunkSize = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )
modelResults |
Object returned by |
preparedData |
Object returned by |
summaryResidualSD |
Logical. If |
summaryPval |
Numeric or |
nCores |
Integer. Number of worker processes to use while extracting summary rows. |
libPath |
Character vector or |
glmLibs |
Character vector or comma-separated string of package names to
check on worker processes. The default is |
chunkSize |
Integer or |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLM_T1_summaries"
containing one CpG-level summary data frame per phenotype.
ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() summary_results <- summarizeMethylationGLM_T1Models( modelResults = ex$modelResults, preparedData = ex$preparedData, summaryResidualSD = TRUE, summaryPval = NA, nCores = 1, verbose = FALSE, logs = FALSE ) names(summary_results$summaries)ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() summary_results <- summarizeMethylationGLM_T1Models( modelResults = ex$modelResults, preparedData = ex$preparedData, summaryResidualSD = TRUE, summaryPval = NA, nCores = 1, verbose = FALSE, logs = FALSE ) names(summary_results$summaries)
Extract phenotype-specific fixed-effect tables from the fitted mixed-effects
model object returned by fitMethylationGLMM_T1T2Models().
summarizeMethylationGLMM_T1T2Models( modelResults, preparedData, summaryPval = NA, nCores = 1L, chunkSize = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )summarizeMethylationGLMM_T1T2Models( modelResults, preparedData, summaryPval = NA, nCores = 1L, chunkSize = NULL, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )
modelResults |
Object returned by |
preparedData |
Object returned by |
summaryPval |
Numeric or |
nCores |
Integer. Number of worker processes to use while extracting summary rows. |
chunkSize |
Integer or |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLMM_T1T2_summaries"
containing one CpG-level summary data frame per phenotype.
ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() summary_results <- summarizeMethylationGLMM_T1T2Models( modelResults = ex$modelResults, preparedData = ex$preparedData, summaryPval = NA, nCores = 1, verbose = FALSE, logs = FALSE ) names(summary_results$summaries)ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() summary_results <- summarizeMethylationGLMM_T1T2Models( modelResults = ex$modelResults, preparedData = ex$preparedData, summaryPval = NA, nCores = 1, verbose = FALSE, logs = FALSE ) names(summary_results$summaries)
Summarize the requested phenotype variables by timepoint. Numeric phenotypes are reported with mean, standard deviation, and non-missing counts; non-numeric phenotypes are reported with non-missing counts and the observed levels.
summarizeTimepointsMethylationGLMM_T1T2( data, timeVar = "Timepoint", phenotypes, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )summarizeTimepointsMethylationGLMM_T1T2( data, timeVar = "Timepoint", phenotypes, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )
data |
Data frame containing the longitudinal phenotype-plus-beta data. |
timeVar |
Character. Name of the time variable. |
phenotypes |
Character vector or comma-separated string of phenotype variables to summarize. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A data frame with one row per timepoint and summary columns for each requested phenotype.
ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() timepoint_summary <- summarizeTimepointsMethylationGLMM_T1T2( data = ex$preparedData$data, timeVar = "Timepoint", phenotypes = "score", verbose = FALSE, logs = FALSE ) nrow(timepoint_summary)ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() timepoint_summary <- summarizeTimepointsMethylationGLMM_T1T2( data = ex$preparedData$data, timeVar = "Timepoint", phenotypes = "score", verbose = FALSE, logs = FALSE ) nrow(timepoint_summary)
Read the phenotype table and a saved RGChannelSet, estimate surrogate
variables from ENmix control probes, analyze their association with Sentrix
chip and position factors, and return a structured in-memory result. Legacy
CSV, .RData, text-summary, and figure outputs are written only when
saveOutputs = TRUE.
svaEnmix( phenoFile = "data/preprocessingMinfiEwasWater/phenoLC.csv", rgsetData = "rData/preprocessingMinfiEwasWater/objects/RGSet.RData", sepType = "", outputLogs = "logs", nSamples = NA, SampleID = "Sample_Name", arrayType = "IlluminaHumanMethylationEPICv2", annotationVersion = "20a1.hg38", SentrixIDColumn = "Sentrix_ID", SentrixPositionColumn = "Sentrix_Position", ctrlSvaPercVar = 0.9, ctrlSvaFlag = 1, scriptLabel = "svaEnmix", tiffWidth = 2000, tiffHeight = 1000, tiffRes = 150, figureBaseDir = "figures", dataBaseDir = "data", rBaseDir = "rData", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE )svaEnmix( phenoFile = "data/preprocessingMinfiEwasWater/phenoLC.csv", rgsetData = "rData/preprocessingMinfiEwasWater/objects/RGSet.RData", sepType = "", outputLogs = "logs", nSamples = NA, SampleID = "Sample_Name", arrayType = "IlluminaHumanMethylationEPICv2", annotationVersion = "20a1.hg38", SentrixIDColumn = "Sentrix_ID", SentrixPositionColumn = "Sentrix_Position", ctrlSvaPercVar = 0.9, ctrlSvaFlag = 1, scriptLabel = "svaEnmix", tiffWidth = 2000, tiffHeight = 1000, tiffRes = 150, figureBaseDir = "figures", dataBaseDir = "data", rBaseDir = "rData", display = FALSE, verbose = FALSE, logs = FALSE, saveOutputs = FALSE )
phenoFile |
Character. Path to the phenotype file with cell-composition data. |
rgsetData |
Character. Path to a saved |
sepType |
Character. Field separator used in |
outputLogs |
Character. Directory used for log files when |
nSamples |
Integer or |
SampleID |
Character. Name of the phenotype column containing sample identifiers. |
arrayType |
Character. Illumina array identifier assigned to
|
annotationVersion |
Character. Annotation build assigned to
|
SentrixIDColumn |
Character. Name of the chip identifier column in the phenotype data. |
SentrixPositionColumn |
Character. Name of the chip position column in the phenotype data. |
ctrlSvaPercVar |
Numeric. Proportion of control-probe variance explained
when running |
ctrlSvaFlag |
Integer. Control-probe flag passed to |
scriptLabel |
Character. Label used to name output folders when
|
tiffWidth |
Integer. Width of saved TIFF plots in pixels. |
tiffHeight |
Integer. Height of saved TIFF plots in pixels. |
tiffRes |
Integer. Resolution in DPI for saved TIFF plots. |
figureBaseDir |
Character. Base directory used for saved figure outputs
when |
dataBaseDir |
Character. Base directory used for saved CSV and text
outputs when |
rBaseDir |
Character. Base directory used for saved |
display |
Logical. If |
verbose |
Logical. If |
logs |
Logical. If |
saveOutputs |
Logical. If |
A list with class "dnaEPICO_svaEnmix".
Phenotype table read from phenoFile after any optional row
subsetting.
Loaded RGChannelSet with sample names realigned to
targets[[SampleID]].
Object returned by estimateSvaEnmixControls() containing
the surrogate-variable matrix and the control-probe settings used to
estimate it.
Phenotype table returned by mergeSvaTargetsEnmix()
after the surrogate variables were appended as additional columns.
Object returned by analyzeSvaEnmix() containing the
surrogate-variable association models, ANOVA tables, and Sentrix metadata.
Named list describing the plot file paths requested for the
SVA figures. When saveOutputs = FALSE, the entries are typically NULL.
Object returned by writeSvaEnmixOutputs() when
saveOutputs = TRUE, otherwise NULL.
Resolved path to the optional log file, or NULL when
logging was disabled.
See dnaEPICO_svaEnmix for a class-level overview.
tmp <- tempdir() stopifnot(dir.exists(tmp)) if (requireNamespace("minfiData", quietly = TRUE)) { ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() pheno_file <- file.path(tmp, "pheno.csv") rgset_path <- file.path(tmp, "RGSet.RData") RGSet <- ex$RGSet utils::write.csv(ex$targets, pheno_file, row.names = FALSE) save(RGSet, file = rgset_path) sva_result <- svaEnmix( phenoFile = pheno_file, rgsetData = rgset_path, SampleID = "Sample_Name", arrayType = "IlluminaHumanMethylation450k", annotationVersion = "ilmn12.hg19", SentrixIDColumn = "Sentrix_ID", SentrixPositionColumn = "Sentrix_Position", outputLogs = file.path(tmp, "logs"), figureBaseDir = file.path(tmp, "figures"), dataBaseDir = file.path(tmp, "data"), rBaseDir = file.path(tmp, "rData"), saveOutputs = FALSE ) stopifnot(inherits(sva_result, "dnaEPICO_svaEnmix")) }tmp <- tempdir() stopifnot(dir.exists(tmp)) if (requireNamespace("minfiData", quietly = TRUE)) { ex <- dnaEPICO:::exampleMinfiBaseDataDnaEpico() pheno_file <- file.path(tmp, "pheno.csv") rgset_path <- file.path(tmp, "RGSet.RData") RGSet <- ex$RGSet utils::write.csv(ex$targets, pheno_file, row.names = FALSE) save(RGSet, file = rgset_path) sva_result <- svaEnmix( phenoFile = pheno_file, rgsetData = rgset_path, SampleID = "Sample_Name", arrayType = "IlluminaHumanMethylation450k", annotationVersion = "ilmn12.hg19", SentrixIDColumn = "Sentrix_ID", SentrixPositionColumn = "Sentrix_Position", outputLogs = file.path(tmp, "logs"), figureBaseDir = file.path(tmp, "figures"), dataBaseDir = file.path(tmp, "data"), rBaseDir = file.path(tmp, "rData"), saveOutputs = FALSE ) stopifnot(inherits(sva_result, "dnaEPICO_svaEnmix")) }
Write optional serialized outputs, summary tables, significant-CpG tables, and annotated results from the one-timepoint GLM workflow.
writeMethylationGLM_T1Outputs( modelResults, modelSummaries, annotatedResults, significantCpGs = NULL, outputRData, summaryTxtDir, significantCpGDir, annotatedGLMOut, saveTxtSummaries = TRUE, saveSignificantCpGs = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )writeMethylationGLM_T1Outputs( modelResults, modelSummaries, annotatedResults, significantCpGs = NULL, outputRData, summaryTxtDir, significantCpGDir, annotatedGLMOut, saveTxtSummaries = TRUE, saveSignificantCpGs = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLM_T1.txt" )
modelResults |
Object returned by |
modelSummaries |
Object returned by |
annotatedResults |
Object returned by
|
significantCpGs |
Object returned by
|
outputRData |
Character. Directory used for serialized model and summary outputs. |
summaryTxtDir |
Character. Directory used for tab-delimited summary tables. |
significantCpGDir |
Character. Directory used for significant-CpG coefficient tables. |
annotatedGLMOut |
Character. Directory used for the annotated summary XLSX workbook. |
saveTxtSummaries |
Logical. If |
saveSignificantCpGs |
Logical. If |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLM_T1_paths" containing
the paths of the files written to disk.
ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() annotation_data <- annotateMethylationGLM_T1Summaries( modelSummaries = ex$modelSummaries, annotationObject = ex$annotationData, annotationCols = "Name,chr,pos", verbose = FALSE, logs = FALSE ) significant_cpgs <- collectSignificantCpGsMethylationGLM_T1( modelResults = ex$modelResults, pvalThreshold = 1, verbose = FALSE, logs = FALSE ) output_paths <- writeMethylationGLM_T1Outputs( modelResults = ex$modelResults, modelSummaries = ex$modelSummaries, annotatedResults = annotation_data, significantCpGs = significant_cpgs, outputRData = file.path(ex$tempDir, "models"), summaryTxtDir = file.path(ex$tempDir, "summary"), significantCpGDir = file.path(ex$tempDir, "significant"), annotatedGLMOut = file.path(ex$tempDir, "annotated"), saveTxtSummaries = TRUE, saveSignificantCpGs = TRUE, verbose = FALSE, logs = FALSE ) names(output_paths)ex <- dnaEPICO:::exampleMethylationGLMStateDnaEpico() annotation_data <- annotateMethylationGLM_T1Summaries( modelSummaries = ex$modelSummaries, annotationObject = ex$annotationData, annotationCols = "Name,chr,pos", verbose = FALSE, logs = FALSE ) significant_cpgs <- collectSignificantCpGsMethylationGLM_T1( modelResults = ex$modelResults, pvalThreshold = 1, verbose = FALSE, logs = FALSE ) output_paths <- writeMethylationGLM_T1Outputs( modelResults = ex$modelResults, modelSummaries = ex$modelSummaries, annotatedResults = annotation_data, significantCpGs = significant_cpgs, outputRData = file.path(ex$tempDir, "models"), summaryTxtDir = file.path(ex$tempDir, "summary"), significantCpGDir = file.path(ex$tempDir, "significant"), annotatedGLMOut = file.path(ex$tempDir, "annotated"), saveTxtSummaries = TRUE, saveSignificantCpGs = TRUE, verbose = FALSE, logs = FALSE ) names(output_paths)
Write optional serialized outputs, summary tables, significant interaction tables, and annotated results from the longitudinal mixed-effects workflow.
writeMethylationGLMM_T1T2Outputs( modelResults, modelSummaries, annotatedResults, significantInteractions = NULL, outputRData, summaryTxtDir, significantInteractionDir, annotatedLMEOut, saveTxtSummaries = TRUE, saveSignificantInteractions = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )writeMethylationGLMM_T1T2Outputs( modelResults, modelSummaries, annotatedResults, significantInteractions = NULL, outputRData, summaryTxtDir, significantInteractionDir, annotatedLMEOut, saveTxtSummaries = TRUE, saveSignificantInteractions = FALSE, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_methylationGLMM_T1T2.txt" )
modelResults |
Object returned by |
modelSummaries |
Object returned by |
annotatedResults |
Object returned by
|
significantInteractions |
Object returned by
|
outputRData |
Character. Directory used for serialized model and summary outputs. |
summaryTxtDir |
Character. Directory used for tab-delimited summary tables. |
significantInteractionDir |
Character. Directory used for significant interaction coefficient tables. |
annotatedLMEOut |
Character. Directory used for the annotated summary XLSX workbook. |
saveTxtSummaries |
Logical. If |
saveSignificantInteractions |
Logical. If |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_methylationGLMM_T1T2_paths"
containing the paths of the files written to disk.
ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() annotation_data <- annotateMethylationGLMM_T1T2Summaries( modelSummaries = ex$modelSummaries, annotationObject = ex$annotationData, annotationCols = "Name,chr,pos", verbose = FALSE, logs = FALSE ) significant_hits <- collectSignificantInteractionsMethylationGLMM_T1T2( modelResults = ex$modelResults, pvalThreshold = 1, verbose = FALSE, logs = FALSE ) output_paths <- writeMethylationGLMM_T1T2Outputs( modelResults = ex$modelResults, modelSummaries = ex$modelSummaries, annotatedResults = annotation_data, significantInteractions = significant_hits, outputRData = file.path(ex$tempDir, "models"), summaryTxtDir = file.path(ex$tempDir, "summary"), significantInteractionDir = file.path(ex$tempDir, "significant"), annotatedLMEOut = file.path(ex$tempDir, "annotated"), saveTxtSummaries = TRUE, saveSignificantInteractions = TRUE, verbose = FALSE, logs = FALSE ) names(output_paths)ex <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico() annotation_data <- annotateMethylationGLMM_T1T2Summaries( modelSummaries = ex$modelSummaries, annotationObject = ex$annotationData, annotationCols = "Name,chr,pos", verbose = FALSE, logs = FALSE ) significant_hits <- collectSignificantInteractionsMethylationGLMM_T1T2( modelResults = ex$modelResults, pvalThreshold = 1, verbose = FALSE, logs = FALSE ) output_paths <- writeMethylationGLMM_T1T2Outputs( modelResults = ex$modelResults, modelSummaries = ex$modelSummaries, annotatedResults = annotation_data, significantInteractions = significant_hits, outputRData = file.path(ex$tempDir, "models"), summaryTxtDir = file.path(ex$tempDir, "summary"), significantInteractionDir = file.path(ex$tempDir, "significant"), annotatedLMEOut = file.path(ex$tempDir, "annotated"), saveTxtSummaries = TRUE, saveSignificantInteractions = TRUE, verbose = FALSE, logs = FALSE ) names(output_paths)
Write the merged phenotype plus cell-composition table
writePhenoLCMinfiEwasWater( lcData, file, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_writePhenoLCMinfiEwasWater.txt" )writePhenoLCMinfiEwasWater( lcData, file, verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_writePhenoLCMinfiEwasWater.txt" )
lcData |
Object returned by |
file |
Character. Path to the CSV file to write. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
Invisibly returns file.
ref_file <- system.file("extdata", "saliva.txt", package = "dnaEPICO") beta <- as.matrix(utils::read.table(ref_file))[1:20, , drop = FALSE] colnames(beta) <- c("sample1", "sample2") targets <- data.frame( Sample_Name = colnames(beta), Timepoint = c("T1", "T2"), stringsAsFactors = FALSE ) lc_data <- estimateLCMinfiEwasWater( beta = beta, targets = targets, lcRef = "saliva", phenoOrder = "Sample_Name;Timepoint" ) output_file <- file.path(tempdir(), "phenoLC.csv") writePhenoLCMinfiEwasWater(lcData = lc_data, file = output_file) file.exists(output_file)ref_file <- system.file("extdata", "saliva.txt", package = "dnaEPICO") beta <- as.matrix(utils::read.table(ref_file))[1:20, , drop = FALSE] colnames(beta) <- c("sample1", "sample2") targets <- data.frame( Sample_Name = colnames(beta), Timepoint = c("T1", "T2"), stringsAsFactors = FALSE ) lc_data <- estimateLCMinfiEwasWater( beta = beta, targets = targets, lcRef = "saliva", phenoOrder = "Sample_Name;Timepoint" ) output_file <- file.path(tempdir(), "phenoLC.csv") writePhenoLCMinfiEwasWater(lcData = lc_data, file = output_file) file.exists(output_file)
Write the legacy CSV, ZIP, and .RData outputs produced by
preprocessingPheno(). This helper keeps file writing separate from the
in-memory preprocessing steps.
writePreprocessingPhenoOutputs( preprocessingData, outputPheno = "data/preprocessingPheno", outputRData = "rData/preprocessingPheno/metrics", outputRDataMerge = "rData/preprocessingPheno/mergeData", outputDir = "data/preprocessingPheno", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_writePreprocessingPhenoOutputs.txt" )writePreprocessingPhenoOutputs( preprocessingData, outputPheno = "data/preprocessingPheno", outputRData = "rData/preprocessingPheno/metrics", outputRDataMerge = "rData/preprocessingPheno/mergeData", outputDir = "data/preprocessingPheno", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_writePreprocessingPhenoOutputs.txt" )
preprocessingData |
Object returned by |
outputPheno |
Character. Directory used for saved phenotype CSV files. |
outputRData |
Character. Directory used for saved metric |
outputRDataMerge |
Character. Directory used for saved merged
phenotype-plus-beta |
outputDir |
Character. Directory used for the Clock Foundation export files. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_preprocessingPheno_paths"
containing the paths written to disk.
ex <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico() output_paths <- writePreprocessingPhenoOutputs( preprocessingData = ex$preprocessingData, outputPheno = file.path(ex$tempDir, "pheno"), outputRData = file.path(ex$tempDir, "metrics"), outputRDataMerge = file.path(ex$tempDir, "merge"), outputDir = file.path(ex$tempDir, "clock"), verbose = FALSE, logs = FALSE ) names(output_paths)ex <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico() output_paths <- writePreprocessingPhenoOutputs( preprocessingData = ex$preprocessingData, outputPheno = file.path(ex$tempDir, "pheno"), outputRData = file.path(ex$tempDir, "metrics"), outputRDataMerge = file.path(ex$tempDir, "merge"), outputDir = file.path(ex$tempDir, "clock"), verbose = FALSE, logs = FALSE ) names(output_paths)
Write the legacy CSV, .RData, and text-summary outputs used by the original
svaEnmix() workflow.
writeSvaEnmixOutputs( svaData, mergedPheno, analysisData = NULL, phenoFile = NULL, dataBaseDir = "data", rBaseDir = "rData", scriptLabel = "svaEnmix", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_writeSvaEnmixOutputs.txt" )writeSvaEnmixOutputs( svaData, mergedPheno, analysisData = NULL, phenoFile = NULL, dataBaseDir = "data", rBaseDir = "rData", scriptLabel = "svaEnmix", verbose = FALSE, logs = FALSE, log_dir = NULL, log_file = "log_writeSvaEnmixOutputs.txt" )
svaData |
Object returned by |
mergedPheno |
Phenotype data frame returned by |
analysisData |
Optional object returned by |
phenoFile |
Character or |
dataBaseDir |
Character. Base directory used for saved data outputs. |
rBaseDir |
Character. Base directory used for saved |
scriptLabel |
Character. Label used to create the output subdirectory. |
verbose |
Logical. If |
logs |
Logical. If |
log_dir |
Character or |
log_file |
Character. File name used when |
A list with class "dnaEPICO_svaEnmix_paths" containing the paths
written to disk.
ex <- dnaEPICO:::exampleSvaAnalysisStateDnaEpico() temp_dir <- tempdir() output_paths <- writeSvaEnmixOutputs( svaData = list(sva = ex$sva), mergedPheno = ex$mergedPheno, analysisData = ex$analysisData, phenoFile = file.path(temp_dir, "phenoLC.csv"), dataBaseDir = file.path(temp_dir, "data"), rBaseDir = file.path(temp_dir, "rData"), scriptLabel = "svaEnmixExample", verbose = FALSE, logs = FALSE ) names(output_paths)ex <- dnaEPICO:::exampleSvaAnalysisStateDnaEpico() temp_dir <- tempdir() output_paths <- writeSvaEnmixOutputs( svaData = list(sva = ex$sva), mergedPheno = ex$mergedPheno, analysisData = ex$analysisData, phenoFile = file.path(temp_dir, "phenoLC.csv"), dataBaseDir = file.path(temp_dir, "data"), rBaseDir = file.path(temp_dir, "rData"), scriptLabel = "svaEnmixExample", verbose = FALSE, logs = FALSE ) names(output_paths)