Title: | A Hidden Markov Model for high throughput genotyping arrays |
---|---|
Description: | Hidden Markov Models for characterizing chromosomal alteration in high throughput SNP arrays. |
Authors: | Robert Scharpf [aut, cre] |
Maintainer: | Robert Scharpf <[email protected]> |
License: | LGPL-2 |
Version: | 1.69.0 |
Built: | 2024-12-27 04:46:12 UTC |
Source: | https://github.com/bioc/VanillaICE |
A wrapper for the function acf that returns the autocorrelation for the specified lag. Missing values are removed.
acf2(x, lag = 10, ...)
acf2(x, lag = 10, ...)
x |
numeric vector |
lag |
integer |
... |
additional arguments to |
ArrayViews provides views to the low-level data – log R ratios, B allele frequencies, and genotypes that are stored in parsed files on disk, often scaled and coerced to an integer. Accessors to the low-level data are provided that extract the marker-level summaries from disk, rescaling when appropriate.
ArrayViews( class = "ArrayViews", colData, rowRanges = GRanges(), sourcePaths = character(), scale = 1000, sample_ids, parsedPath = getwd(), lrrFiles = character(), bafFiles = character(), gtFiles = character() ) ## S4 method for signature 'ArrayViews,ANY,ANY,ANY' x[i, j, ..., drop = FALSE] colnames(x) <- value ## S4 method for signature 'ArrayViews' colnames(x, do.NULL = TRUE, prefix = "col") ## S4 method for signature 'ArrayViews' x$name ## S4 replacement method for signature 'ArrayViews' x$name <- value ## S4 method for signature 'ArrayViews' show(object) ## S4 method for signature 'ArrayViews' sapply(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE) ## S4 method for signature 'ArrayViews' ncol(x) ## S4 method for signature 'ArrayViews' nrow(x) ## S4 method for signature 'ArrayViews' dim(x) ## S4 method for signature 'ArrayViews' start(x)
ArrayViews( class = "ArrayViews", colData, rowRanges = GRanges(), sourcePaths = character(), scale = 1000, sample_ids, parsedPath = getwd(), lrrFiles = character(), bafFiles = character(), gtFiles = character() ) ## S4 method for signature 'ArrayViews,ANY,ANY,ANY' x[i, j, ..., drop = FALSE] colnames(x) <- value ## S4 method for signature 'ArrayViews' colnames(x, do.NULL = TRUE, prefix = "col") ## S4 method for signature 'ArrayViews' x$name ## S4 replacement method for signature 'ArrayViews' x$name <- value ## S4 method for signature 'ArrayViews' show(object) ## S4 method for signature 'ArrayViews' sapply(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE) ## S4 method for signature 'ArrayViews' ncol(x) ## S4 method for signature 'ArrayViews' nrow(x) ## S4 method for signature 'ArrayViews' dim(x) ## S4 method for signature 'ArrayViews' start(x)
class |
character string |
colData |
DataFrame |
rowRanges |
GRanges object |
sourcePaths |
character string provide complete path to plain text source files (one file per sample) containing log R ratios and B allele frequencies |
scale |
log R ratios and B allele frequencies can be stored as integers on disk to increase IO speed. If scale =1, the raw data is not transformed. If scale = 1000 (default), the log R ratios and BAFs are multipled by 1000 and coerced to an integer. |
sample_ids |
character vector indicating how to name samples. Ignored if colData is specified. |
parsedPath |
character vector indicating where parsed files should be saved |
lrrFiles |
character vector of file names for storing log R ratios |
bafFiles |
character vector of file names for storing BAFs |
gtFiles |
character vector of file names for storing genotypes |
x |
a |
i |
numeric vector or missing |
j |
numeric vector or missing |
... |
additional arguments to |
drop |
ignored |
value |
a character-string vector |
do.NULL |
ignored |
prefix |
ignored |
name |
character string indicating name in colData slot of ArrayViews object |
object |
a |
X |
a |
FUN |
a function to apply to each column of |
simplify |
logical indicating whether result should be simplied |
USE.NAMES |
whether the output should be a named vector |
colData
A character string
rowRanges
A DataFrame
. WARNING: The accessor for this slot is rowRanges
, not rowRanges
!
index
A GRanges
object
sourcePaths
A character string providing complete path to source files (one file per sample) containing low-level summaries (Log R ratios, B allele frequencies, genotypes)
scale
A length-one numeric vector
parsedPath
A character string providing full path to where parsed files should be saved
lrrFiles
character vector of filenames for log R ratios
bafFiles
character vector of filenames for BAFs
gtFiles
character vector of filenames for genotypes
CopyNumScanParams
parseSourceFile
ArrayViews() ## From unit test require(BSgenome.Hsapiens.UCSC.hg18) require(data.table) extdir <- system.file("extdata", package="VanillaICE", mustWork=TRUE) features <- suppressWarnings(fread(file.path(extdir, "SNP_info.csv"))) fgr <- GRanges(paste0("chr", features$Chr), IRanges(features$Position, width=1), isSnp=features[["Intensity Only"]]==0) fgr <- SnpGRanges(fgr) names(fgr) <- features[["Name"]] bsgenome <- BSgenome.Hsapiens.UCSC.hg18 seqlevels(fgr, pruning.mode="coarse") <- seqlevels(bsgenome)[seqlevels(bsgenome) %in% seqlevels(fgr)] seqinfo(fgr) <- seqinfo(bsgenome)[seqlevels(fgr),] fgr <- sort(fgr) files <- list.files(extdir, full.names=TRUE, recursive=TRUE, pattern="FinalReport") ids <- gsub(".rds", "", gsub("FinalReport", "", basename(files))) views <- ArrayViews(rowRanges=fgr, sourcePaths=files, sample_ids=ids) lrrFile(views) ## view of first 10 markers and samples 3 and 5 views <- views[1:10, c(3,5)]
ArrayViews() ## From unit test require(BSgenome.Hsapiens.UCSC.hg18) require(data.table) extdir <- system.file("extdata", package="VanillaICE", mustWork=TRUE) features <- suppressWarnings(fread(file.path(extdir, "SNP_info.csv"))) fgr <- GRanges(paste0("chr", features$Chr), IRanges(features$Position, width=1), isSnp=features[["Intensity Only"]]==0) fgr <- SnpGRanges(fgr) names(fgr) <- features[["Name"]] bsgenome <- BSgenome.Hsapiens.UCSC.hg18 seqlevels(fgr, pruning.mode="coarse") <- seqlevels(bsgenome)[seqlevels(bsgenome) %in% seqlevels(fgr)] seqinfo(fgr) <- seqinfo(bsgenome)[seqlevels(fgr),] fgr <- sort(fgr) files <- list.files(extdir, full.names=TRUE, recursive=TRUE, pattern="FinalReport") ids <- gsub(".rds", "", gsub("FinalReport", "", basename(files))) views <- ArrayViews(rowRanges=fgr, sourcePaths=files, sample_ids=ids) lrrFile(views) ## view of first 10 markers and samples 3 and 5 views <- views[1:10, c(3,5)]
This function is not meant to be called directly by the user. It is exported in the package NAMESPACE for internal use by other BioC packages.
baumWelchUpdate(param, assay_list)
baumWelchUpdate(param, assay_list)
param |
A container for the HMM parameters |
assay_list |
list of log R ratios and B allele frequencies |
Given the data and an object containing parameters for the HMM, this function computes emission probabilities. This function is not intended to be called by the user and is exported for internal use by other BioC packages.
calculateEmission(x, param = EmissionParam())
calculateEmission(x, param = EmissionParam())
x |
list of low-level data with two elements: a numeric vector of log R ratios and a numeric vector of B allele frequencies |
param |
parameters for the 6-state HMM |
A matrix of emission probabilities. Column correspond to the HMM states and rows correspond to markers on the array (SNPs and nonpolymorphic markers)
baumWelchUpdate
Parameters for computing emission probabilities for a 6-state HMM, including starting values for the mean and standard deviations for log R ratios (assumed to be Gaussian) and B allele frequencies (truncated Gaussian), and initial state probabilities.
This function is exported primarily for internal use by other BioC packages.
cn_means(object) cn_sds(object) baf_means(object) baf_sds(object) baf_means(object) <- value baf_sds(object) <- value cn_sds(object) <- value cn_means(object) <- value EmissionParam( cn_means = CN_MEANS(), cn_sds = CN_SDS(), baf_means = BAF_MEANS(), baf_sds = BAF_SDS(), initial = rep(1/6, 6), EMupdates = 5L, CN_range = c(-5, 3), temper = 1, p_outlier = 1/100, modelHomozygousRegions = FALSE ) EMupdates(object) ## S4 method for signature 'EmissionParam' show(object)
cn_means(object) cn_sds(object) baf_means(object) baf_sds(object) baf_means(object) <- value baf_sds(object) <- value cn_sds(object) <- value cn_means(object) <- value EmissionParam( cn_means = CN_MEANS(), cn_sds = CN_SDS(), baf_means = BAF_MEANS(), baf_sds = BAF_SDS(), initial = rep(1/6, 6), EMupdates = 5L, CN_range = c(-5, 3), temper = 1, p_outlier = 1/100, modelHomozygousRegions = FALSE ) EMupdates(object) ## S4 method for signature 'EmissionParam' show(object)
object |
see |
value |
numeric vector |
cn_means |
numeric vector of starting values for log R ratio means (order is by copy number state) |
cn_sds |
numeric vector of starting values for log R ratio standard deviations (order is by copy number state) |
baf_means |
numeric vector of starting values for BAF means ordered. See example for details on how these are ordered. |
baf_sds |
numeric vector of starting values for BAF means ordered. See example for details on how these are ordered. |
initial |
numeric vector of intial state probabilities |
EMupdates |
number of EM updates |
CN_range |
the allowable range of log R ratios. Log R ratios outside this range are thresholded. |
temper |
Emission probabilities can be tempered by emit^temper. This is highly experimental. |
p_outlier |
probability that an observation is an outlier (assumed to be the same for all markers) |
modelHomozygousRegions |
logical. If FALSE (default), the emission probabilities for BAFs are modeled from a mixture of truncated normals and a Unif(0,1) where the mixture probabilities are given by the probability that the SNP is heterozygous. See Details below for a discussion of the implications. |
The log R ratios are assumed to be emitted from a normal distribution with a mean and standard deviation that depend on the latent copy number. Similarly, the BAFs are assumed to be emitted from a truncated normal distribution with a mean and standard deviation that depends on the latent number of B alleles relative to the total number of alleles (A+B).
numeric vector
When modelHomozygousRegions
is FALSE (the default in
versions >= 1.28.0), emission probabilities for B allele frequences
are calculated from a mixture of a truncated normal densities and a
Unif(0,1) density with the mixture probabilities given by the
probability that a SNP is homozygous. In particular, let p
denote a 6 dimensional vector of density estimates from a truncated
normal distribution for the latent genotypes 'A', 'B', 'AB', 'AAB',
'ABB', 'AAAB', and 'ABBB'. The probability that a genotype is
homozygous is estimated as
and the probability that the genotype is heterozygous (any latent genotype that is not 'A' or 'B') is given by
Since the density of a Unif(0,1) is 1, the 6-dimensional vector of emission probability at a SNP is given by
The above has the effect of minimizing the influence of BAFs near 0 and 1 on the state path estimated by the Viterbi algorithm. In particular, the emission probability at homozygous SNPs will be virtually the same for states 3 and 4, but at heterozygous SNPs the emission probability for state 3 will be an order of magnitude greater for state 3 (diploid) compared to state 4 (diploid region of homozygosity). The advantage of this parameterization are fewer false positive hemizygous deletion calls. [ Log R ratios tend to be more sensitive to technical sources of variation than the corresponding BAFs/ genotypes. Regions in which the log R ratios are low due to technical sources of variation will be less likely to be interpreted as evidence of copy number loss if heterozygous genotypes have more 'weight' in the emission estimates than homozgous genotypes. ] The trade-off is that only states estimated by the HMM are those with copy number alterations. In particular, copy-neutral regions of homozygosity will not be called.
By setting modelHomozygousRegions = TRUE
, the emission
probabilities at a SNP are given simply by the p
vector
described above and copy-neutral regions of homozygosity will be
called.#'
ep <- EmissionParam() cn_means(ep) ep <- EmissionParam() cn_sds(ep) ep <- EmissionParam() baf_means(ep) ep <- EmissionParam() baf_sds(ep) ep <- EmissionParam() baf_means(ep) <- baf_means(ep) ep <- EmissionParam() baf_sds(ep) <- baf_sds(ep) ep <- EmissionParam() cn_sds(ep) <- cn_sds(ep) ep <- EmissionParam() cn_means(ep) <- cn_means(ep) ep <- EmissionParam() show(ep) cn_means(ep) cn_sds(ep) baf_means(ep) baf_sds(ep)
ep <- EmissionParam() cn_means(ep) ep <- EmissionParam() cn_sds(ep) ep <- EmissionParam() baf_means(ep) ep <- EmissionParam() baf_sds(ep) ep <- EmissionParam() baf_means(ep) <- baf_means(ep) ep <- EmissionParam() baf_sds(ep) <- baf_sds(ep) ep <- EmissionParam() cn_sds(ep) <- cn_sds(ep) ep <- EmissionParam() cn_means(ep) <- cn_means(ep) ep <- EmissionParam() show(ep) cn_means(ep) cn_sds(ep) baf_means(ep) baf_sds(ep)
The HMM-derived genomic ranges are represented as a
GRanges
-derived object. cnvFilter
returns a
GRanges
object using the filters stipulated in the
filters
argument.
cnvFilter(object, filters = FilterParam()) cnvSegs(object, filters = FilterParam(state = c("1", "2", "5", "6"))) duplication(object, filters = FilterParam(state = c("5", "6"))) deletion(object, filters = FilterParam(state = c("1", "2"))) hemizygous(object, filters = FilterParam(state = "2")) homozygous(object, filters = FilterParam(state = "1")) ## S4 method for signature 'HMM' cnvSegs(object, filters = FilterParam(state = as.character(c(1, 2, 5, 6)))) ## S4 method for signature 'HMMList' segs(object) ## S4 method for signature 'HMMList' hemizygous(object) ## S4 method for signature 'HMMList' homozygous(object) ## S4 method for signature 'HMMList' duplication(object) ## S4 method for signature 'HMMList' cnvSegs(object, filters = FilterParam(state = as.character(c(1, 2, 5, 6)))) ## S4 method for signature 'HMMList' cnvFilter(object, filters = FilterParam()) ## S4 method for signature 'HmmGRanges' cnvSegs(object, filters = FilterParam(state = as.character(c(1, 2, 5, 6))))
cnvFilter(object, filters = FilterParam()) cnvSegs(object, filters = FilterParam(state = c("1", "2", "5", "6"))) duplication(object, filters = FilterParam(state = c("5", "6"))) deletion(object, filters = FilterParam(state = c("1", "2"))) hemizygous(object, filters = FilterParam(state = "2")) homozygous(object, filters = FilterParam(state = "1")) ## S4 method for signature 'HMM' cnvSegs(object, filters = FilterParam(state = as.character(c(1, 2, 5, 6)))) ## S4 method for signature 'HMMList' segs(object) ## S4 method for signature 'HMMList' hemizygous(object) ## S4 method for signature 'HMMList' homozygous(object) ## S4 method for signature 'HMMList' duplication(object) ## S4 method for signature 'HMMList' cnvSegs(object, filters = FilterParam(state = as.character(c(1, 2, 5, 6)))) ## S4 method for signature 'HMMList' cnvFilter(object, filters = FilterParam()) ## S4 method for signature 'HmmGRanges' cnvSegs(object, filters = FilterParam(state = as.character(c(1, 2, 5, 6))))
object |
see |
filters |
a |
data(snp_exp) fit <- hmm2(snp_exp) segs(fit) ## all intervals cnvSegs(fit) filter_param <- FilterParam(probability=0.95, numberFeatures=10, state=c("1", "2")) cnvSegs(fit, filter_param) filter_param <- FilterParam(probability=0.5, numberFeatures=2, state=c("1", "2")) cnvSegs(fit, filter_param) hemizygous(fit) homozygous(fit) duplication(fit)
data(snp_exp) fit <- hmm2(snp_exp) segs(fit) ## all intervals cnvSegs(fit) filter_param <- FilterParam(probability=0.95, numberFeatures=10, state=c("1", "2")) cnvSegs(fit, filter_param) filter_param <- FilterParam(probability=0.5, numberFeatures=2, state=c("1", "2")) cnvSegs(fit, filter_param) hemizygous(fit) homozygous(fit) duplication(fit)
Raw SNP array processed files have headers and variable labels that
may depend the software, how the output files was saved, the
software version, and other factors. The purpose of this container
is to collect the parameters relevant for reading in the source
files for a particular project in a single container. This may
require some experimentation as the example illustrates. The
function fread
in the data.table
package
greatly simplifies this process.
CopyNumScanParams( cnvar = "Log R Ratio", bafvar = "B Allele Freq", gtvar = c("Allele1 - AB", "Allele2 - AB"), index_genome = integer(), select = integer(), scale = 1000, row.names = 1L ) ## S4 method for signature 'CopyNumScanParams' show(object)
CopyNumScanParams( cnvar = "Log R Ratio", bafvar = "B Allele Freq", gtvar = c("Allele1 - AB", "Allele2 - AB"), index_genome = integer(), select = integer(), scale = 1000, row.names = 1L ) ## S4 method for signature 'CopyNumScanParams' show(object)
cnvar |
length-one character vector providing name of variable for log R ratios |
bafvar |
length-one character vector providing name of variable for B allele frequencies |
gtvar |
length-one character vector providing name of variable for genotype calls |
index_genome |
integer vector indicating which rows of the of the source files (e.g., GenomeStudio) to keep. By matching on a sorted GRanges object containing the feature annotation (see example), the information on the markers will also be sorted. |
select |
integer vector specifying indicating which columns of the source files to import (see examples) |
scale |
length-one numeric vector for rescaling the raw data and coercing to class integer. By default, the low-level data will be scaled and saved on disk as integers. |
row.names |
length-one numeric vector indicating which column the SNP names are in |
object |
a |
index_genome
an integer vector
cnvar
the column label for the log R ratios
bafvar
the column label for the B allele frequencies
gtvar
the column label(s) for the genotypes
scale
length-one numeric vector indicating how the low-level data should be scaled prior to saving on disk
select
numeric vector indicating which columns to read
row.names
length-one numeric vector indicating which column the SNP names are in
CopyNumScanParams() ## empty container
CopyNumScanParams() ## empty container
This function is not intended to be called directly by the user, and is exported only for internal use by other BioC packages.
doUpdate(param)
doUpdate(param)
param |
An object containing parameters for the HMM |
If there are multiple markers on the same chromosome with the same annotated position, only the first is kept.
dropDuplicatedMapLocs(object)
dropDuplicatedMapLocs(object)
object |
a container for which the methods seqnames and start are defined |
an object of the same class with duplicated genomic positions removed
data(snp_exp) g <- rowRanges(snp_exp) ## duplicate the first row g[length(g)] <- g[1] rowRanges(snp_exp) <- g snp_exp2 <- dropDuplicatedMapLocs(snp_exp)
data(snp_exp) g <- rowRanges(snp_exp) ## duplicate the first row g[length(g)] <- g[1] rowRanges(snp_exp) <- g snp_exp2 <- dropDuplicatedMapLocs(snp_exp)
Removes markers on chromosomes X and Y.
dropSexChrom(object)
dropSexChrom(object)
object |
an object for which the methods |
an object of the same class as the input
Get or set a matrix of emission probabilities. This function is exported primarily for internal use by other BioC packages.
emission(object) emission(object) <- value
emission(object) emission(object) <- value
object |
see |
value |
a matrix of emission probabilities |
matrix
Parameters for computing emission probabilities include the starting values for the Baum Welch update and initial state probabilities.
emissionParam(object) emissionParam(object) <- value
emissionParam(object) emissionParam(object) <- value
object |
an object of class |
value |
an object of class |
EmissionParam
instance
hparam <- HmmParam() emissionParam(hparam) ep <- EmissionParam() cn_means(ep) <- log2(c(.1/2, 1/2, 2/2, 2/2, 3/2, 4/2)) emissionParam(hparam) <- ep
hparam <- HmmParam() emissionParam(hparam) ep <- EmissionParam() cn_means(ep) <- log2(c(.1/2, 1/2, 2/2, 2/2, 3/2, 4/2)) emissionParam(hparam) <- ep
The maximum a posteriori estimate of the trio copy number state for
each genomic range is represented in a
GRanges
-derived class. Ultimately, these ranges will
be filtered based on the trio copy number state (e.g., denovo
deletions), size, number of features (SNPs), or chromosome.
FilterParam
is a container for the parameters commmonly used
to filter the genomic ranges.
FilterParam( probability = 0.99, numberFeatures = 10, seqnames = paste0("chr", c(1:22, "X", "Y")), state = as.character(1:6), width = 1L ) ## S4 method for signature 'FilterParam' probability(object) ## S4 method for signature 'FilterParam' state(object) ## S4 method for signature 'FilterParam' show(object)
FilterParam( probability = 0.99, numberFeatures = 10, seqnames = paste0("chr", c(1:22, "X", "Y")), state = as.character(1:6), width = 1L ) ## S4 method for signature 'FilterParam' probability(object) ## S4 method for signature 'FilterParam' state(object) ## S4 method for signature 'FilterParam' show(object)
probability |
minumum probability for the call |
numberFeatures |
minumum number of SNPs/nonpolymorphic features in a region |
seqnames |
the seqnames (character string or |
state |
character: the HMM states to keep |
width |
the minimum widht of a region |
object |
a |
probability
a length-one numeric vector indicating the
minimum posterior probability for the called state. Genomic
intervals with posterior probabilities below probability
will be filtered.
numberFeatures
a positive integer indicating the minimum number of features in a segment
seqnames
a character vector of seqnames
to select
(i.e., 'chr1' for only those intervals on chromosome 1)
width
positive integer indicating the minimal width of genomic intervals
state
character string indicating which hidden Markov model states to select
fp <- FilterParam() width(fp) numberFeatures(fp) seqnames(fp) ## To select CNV segments for which ## - the CNV call has a 'posterior' probability of at least 0.95 ## - the number of features is at least 10 ## - the HMM states are 1 (homozygous deletion) or 2 (hemizygous deletion) FilterParam(probability=0.95, numberFeatures=10, state=c("1", "2"))
fp <- FilterParam() width(fp) numberFeatures(fp) seqnames(fp) ## To select CNV segments for which ## - the CNV call has a 'posterior' probability of at least 0.95 ## - the number of features is at least 10 ## - the HMM states are 1 (homozygous deletion) or 2 (hemizygous deletion) FilterParam(probability=0.95, numberFeatures=10, state=c("1", "2"))
Accessor for HMM filter parameters
filters(object)
filters(object)
object |
see |
Extract SNP genotypes. Genotypes are assumed to be represented as integers: 1=AA, 2=AB, 3=BB.
genotypes(object) ## S4 method for signature 'ArrayViews' lrr(object) ## S4 method for signature 'ArrayViews' baf(object) ## S4 method for signature 'ArrayViews' genotypes(object) ## S4 method for signature 'SnpArrayExperiment' baf(object) ## S4 method for signature 'SnpArrayExperiment' copyNumber(object) ## S4 method for signature 'SnpArrayExperiment' lrr(object) ## S4 method for signature 'SnpArrayExperiment' genotypes(object)
genotypes(object) ## S4 method for signature 'ArrayViews' lrr(object) ## S4 method for signature 'ArrayViews' baf(object) ## S4 method for signature 'ArrayViews' genotypes(object) ## S4 method for signature 'SnpArrayExperiment' baf(object) ## S4 method for signature 'SnpArrayExperiment' copyNumber(object) ## S4 method for signature 'SnpArrayExperiment' lrr(object) ## S4 method for signature 'SnpArrayExperiment' genotypes(object)
object |
see |
copyNumber
Create an example SnpArrayExperiment from source files containing marker-level genomic data that are provided in this package
getExampleSnpExperiment(bsgenome)
getExampleSnpExperiment(bsgenome)
bsgenome |
a |
## Not run: if(require("BSgenome.Hsapiens.UCSC.hg18")){ genome <- BSgenome.Hsapiens.UCSC.hg18 snp_exp <- getExampleSnpExperiment(genome) } ## End(Not run)
## Not run: if(require("BSgenome.Hsapiens.UCSC.hg18")){ genome <- BSgenome.Hsapiens.UCSC.hg18 snp_exp <- getExampleSnpExperiment(genome) } ## End(Not run)
Accessor for HMM model parameters
getHmmParams(object)
getHmmParams(object)
object |
see |
hmm_object <- HMM() getHmmParams(hmm_object)
hmm_object <- HMM() getHmmParams(hmm_object)
The contructor HMM
creates and object of class
HMM
. Not typically called directly by the user.
HMM( granges = GRanges(), param = HmmParam(), posterior = matrix(), filters = FilterParam() ) ## S4 method for signature 'HMM' state(object) ## S4 method for signature 'HMM' show(object)
HMM( granges = GRanges(), param = HmmParam(), posterior = matrix(), filters = FilterParam() ) ## S4 method for signature 'HMM' state(object) ## S4 method for signature 'HMM' show(object)
granges |
a |
param |
a |
posterior |
matrix of posterior probabilities |
filters |
an object of class |
object |
a |
granges
a GRanges
object
param
a HmmParam
object
posterior
a matrix of posterior probabilities
filters
a FilterParam
object
data(snp_exp) hmm_list <- hmm2(snp_exp[,1]) resultsFirstSample <- hmm_list[[1]] resultsFirstSample HMM()
data(snp_exp) hmm_list <- hmm2(snp_exp[,1]) resultsFirstSample <- hmm_list[[1]] resultsFirstSample HMM()
This function is intended for estimating the integer copy number from germline or DNA of clonal origin using a 6-state HMM. The states are homozygous deletion, hemizygous deletion, diploid copy number, diploid region of homozygosity, single copy gain, and two+ copy gain. Because heterozygous markers are more informative for copy number than homozygous markers and regions of homozgosity are common in normal genomes, we currently computed a weighted average of the BAF emission matrix with a uniform 0,1 distribution by the probability that the marker is heterozygous, thereby downweighting the contribution of homozygous SNPs to the likelihood. In addition to making the detection of copy-neutral regions of homozgosity less likely, it also helps prevent confusing hemizygous deletions with copy neutral regions of homozygosity – the former would be driven mostly by the log R ratios. This is experimental and subject to change.
hmm2( object, emission_param = EmissionParam(), transition_param = TransitionParam(), ... ) ## S4 method for signature 'SnpArrayExperiment' hmm2( object, emission_param = EmissionParam(), transition_param = TransitionParam(), ... ) ## S4 method for signature 'oligoSnpSet' hmm2( object, emission_param = EmissionParam(), transition_param = TransitionParam(), ... ) ## S4 method for signature 'ArrayViews' hmm2( object, emission_param = EmissionParam(), transition_param = TransitionParam(), tolerance = 2, verbose = FALSE, ... )
hmm2( object, emission_param = EmissionParam(), transition_param = TransitionParam(), ... ) ## S4 method for signature 'SnpArrayExperiment' hmm2( object, emission_param = EmissionParam(), transition_param = TransitionParam(), ... ) ## S4 method for signature 'oligoSnpSet' hmm2( object, emission_param = EmissionParam(), transition_param = TransitionParam(), ... ) ## S4 method for signature 'ArrayViews' hmm2( object, emission_param = EmissionParam(), transition_param = TransitionParam(), tolerance = 2, verbose = FALSE, ... )
object |
|
emission_param |
A |
transition_param |
A |
... |
currently ignored |
tolerance |
length-one numeric vector. When the difference in the log-likelihood of the Viterbi state path between successive models (updated by Baum Welch) is less than the tolerance, no additional model updates are performed. |
verbose |
logical. Whether to display messages indicating progress. |
The hmm2
method allows parallelization across samples using
the foreach paradigm. Parallelization is automatic when enabled
via packages such as snow/doSNOW.
tp <- TransitionParam() TransitionParam(taup=1e12) data(snp_exp) emission_param <- EmissionParam(temper=1/2) fit <- hmm2(snp_exp, emission_param) unlist(fit) cnvSegs(fit) ## There is too little data to infer cnv reliably in this trivial example. ## To illustrate filtering options on the results, we select ## CNVs for which ## - the CNV call has a posterior probability of at least 0.5 ## - the number of features is 2 or more ## - the HMM states are 1 (homozygous deletion) or 2 (hemizygous deletion) fp <- FilterParam(probability=0.5, numberFeatures=2, state=c("1", "2")) cnvSegs(fit, fp) ## for parallelization ## Not run: library(snow) library(doSNOW) cl <- makeCluster(2, type = "SOCK") registerDoSNOW(cl) fit <- hmm2(snp_exp, emission_param) ## End(Not run)
tp <- TransitionParam() TransitionParam(taup=1e12) data(snp_exp) emission_param <- EmissionParam(temper=1/2) fit <- hmm2(snp_exp, emission_param) unlist(fit) cnvSegs(fit) ## There is too little data to infer cnv reliably in this trivial example. ## To illustrate filtering options on the results, we select ## CNVs for which ## - the CNV call has a posterior probability of at least 0.5 ## - the number of features is 2 or more ## - the HMM states are 1 (homozygous deletion) or 2 (hemizygous deletion) fp <- FilterParam(probability=0.5, numberFeatures=2, state=c("1", "2")) cnvSegs(fit, fp) ## for parallelization ## Not run: library(snow) library(doSNOW) cl <- makeCluster(2, type = "SOCK") registerDoSNOW(cl) fit <- hmm2(snp_exp, emission_param) ## End(Not run)
HMMList
classThe constructor function for the HMMList
class. The
constructor is useful for representing a list of HMM
objects.
HMMList(object)
HMMList(object)
object |
a list. Each element of the list is in instance of
the |
Each element of the HMMList contains the genomic intervals of the
HMM segmentation (GRanges-derived object), parameters from the
Baum-Welch, and a FilterParam
object.
## S4 method for signature 'HMMList' show(object) ## S4 method for signature 'HMMList' unlist(x, recursive = TRUE, use.names = TRUE)
## S4 method for signature 'HMMList' show(object) ## S4 method for signature 'HMMList' unlist(x, recursive = TRUE, use.names = TRUE)
object |
a |
x |
a |
recursive |
logical; currently ignored |
use.names |
logical; currently ignored |
.Data
a list. Each element of the list should be a HMM
object.
data(snp_exp) fit <- hmm2(snp_exp) class(fit) identical(length(fit), ncol(snp_exp)) unlist(fit)
data(snp_exp) fit <- hmm2(snp_exp) class(fit) identical(length(fit), ncol(snp_exp)) unlist(fit)
Contains emission probabilities, parameters for emission probabilities, and transition probabilities required for computing the most likely state path via the Viterbi algorithm
HmmParam( emission = matrix(0, 0, 0), emission_param = EmissionParam(), transition = rep(0.99, nrow(emission)), chromosome = character(nrow(emission)), loglik = LogLik(), viterbi = Viterbi(), compute_posteriors = TRUE, verbose = FALSE ) ## S4 method for signature 'HmmParam' show(object) ## S4 method for signature 'HmmParam' nrow(x) ## S4 method for signature 'HmmParam' ncol(x)
HmmParam( emission = matrix(0, 0, 0), emission_param = EmissionParam(), transition = rep(0.99, nrow(emission)), chromosome = character(nrow(emission)), loglik = LogLik(), viterbi = Viterbi(), compute_posteriors = TRUE, verbose = FALSE ) ## S4 method for signature 'HmmParam' show(object) ## S4 method for signature 'HmmParam' nrow(x) ## S4 method for signature 'HmmParam' ncol(x)
emission |
A matrix of emission probabilities |
emission_param |
an object of class |
transition |
vector of transition probabilities whose length is N-1, where N is the number of markers. User should provide the probability that the state at marker j is the same as the state at marker j-1. It is assumed that the probability of transitioning to state_j from state_j-1 is the same for all states != state_j-1. |
chromosome |
character vector |
loglik |
an object of class |
viterbi |
an object of class |
compute_posteriors |
logical |
verbose |
logical |
object |
a |
x |
a |
HmmParam()
HmmParam()
The results of a 6-state HMM fit to simulated copy number and genotype data.
a GRanges
object
Constructor for HmmTrellisParam class
HmmTrellisParam( ylimits = list(c(0, 1), c(-3, 1)), expandfun = function(g) { width(g) * 50 } )
HmmTrellisParam( ylimits = list(c(0, 1), c(-3, 1)), expandfun = function(g) { width(g) * 50 } )
ylimits |
length-two list of the y-axis limits for B allele frequencies and log R ratios, respectively |
expandfun |
a function that takes a length-one |
Parameters for plotting idiograms
IdiogramParams( seqnames = character(), seqlengths = numeric(), unit = "kb", genome = "hg19", box = list(color = "blue", lwd = 1) ) ## S4 method for signature 'IdiogramParams,ANY' plot(x, y, ...)
IdiogramParams( seqnames = character(), seqlengths = numeric(), unit = "kb", genome = "hg19", box = list(color = "blue", lwd = 1) ) ## S4 method for signature 'IdiogramParams,ANY' plot(x, y, ...)
seqnames |
length-one character vector providing chromosome name |
seqlengths |
length-one numeric vector indicating size of chromosome |
unit |
character string indicating unit for genomic position |
genome |
character string indicating genome build |
box |
a list of parameters for plotting the box around the part of the idiogram that is plotted |
x |
an IdiogramParam object |
y |
ignored |
... |
ignored |
IdiogramParam object
Paramater class for plotting idiograms
## S4 method for signature 'IdiogramParams' show(object)
## S4 method for signature 'IdiogramParams' show(object)
object |
an IdiogramParam object |
seqnames
length-one character vector providing chromosome name
seqlengths
length-one numeric vector indicating size of chromosome
unit
character string indicating unit for genomic position (default is 'kb')
genome
character string indicating genome build
box
a list of parameters for plotting the box around the part of the idiogram that is plotted.
if(require(BSgenome.Hsapiens.UCSC.hg18) && require(grid)){ si <- seqinfo(BSgenome.Hsapiens.UCSC.hg18) iparam <- IdiogramParams(seqnames="chr1", genome="hg18", seqlengths=seqlengths(si)["chr1"], box=list(xlim=c(20e6L, 25e6L), color="blue", lwd=2)) iparam idiogram <- plot(iparam) vp <- viewport(x=0.05, y=0.8, width=unit(0.9, "npc"), height=unit(0.2, "npc"), name="vp1", just=c("left", "bottom")) grid.newpage() pushViewport(vp) print(idiogram, vp=vp, newpage=FALSE) }
if(require(BSgenome.Hsapiens.UCSC.hg18) && require(grid)){ si <- seqinfo(BSgenome.Hsapiens.UCSC.hg18) iparam <- IdiogramParams(seqnames="chr1", genome="hg18", seqlengths=seqlengths(si)["chr1"], box=list(xlim=c(20e6L, 25e6L), color="blue", lwd=2)) iparam idiogram <- plot(iparam) vp <- viewport(x=0.05, y=0.8, width=unit(0.9, "npc"), height=unit(0.2, "npc"), name="vp1", just=c("left", "bottom")) grid.newpage() pushViewport(vp) print(idiogram, vp=vp, newpage=FALSE) }
Assess whether genotype is heterozygous based on BAFs
isHeterozygous(object, cutoff) ## S4 method for signature 'ArrayViews' isHeterozygous(object, cutoff) ## S4 method for signature 'SnpArrayExperiment' isHeterozygous(object, cutoff) ## S4 method for signature 'numeric' isHeterozygous(object, cutoff) ## S4 method for signature 'matrix' isHeterozygous(object, cutoff)
isHeterozygous(object, cutoff) ## S4 method for signature 'ArrayViews' isHeterozygous(object, cutoff) ## S4 method for signature 'SnpArrayExperiment' isHeterozygous(object, cutoff) ## S4 method for signature 'numeric' isHeterozygous(object, cutoff) ## S4 method for signature 'matrix' isHeterozygous(object, cutoff)
object |
a SnpArrayExperiment or ArrayViews object containing BAFs, a matrix of BAFs, or a numeric vector of BAFs. vector of BAFs |
cutoff |
a length-two numeric vector providing the range of BAFs consistent with allelic heterozygosity |
if(require("BSgenome.Hsapiens.UCSC.hg18")){ bsgenome <- BSgenome.Hsapiens.UCSC.hg18 snp_exp <- getExampleSnpExperiment(bsgenome) is_het <- isHeterozygous(snp_exp[, 1], c(0.4, 0.6)) table(is_het) }
if(require("BSgenome.Hsapiens.UCSC.hg18")){ bsgenome <- BSgenome.Hsapiens.UCSC.hg18 snp_exp <- getExampleSnpExperiment(bsgenome) is_het <- isHeterozygous(snp_exp[, 1], c(0.4, 0.6)) table(is_het) }
A container for the log likelihood of the Viterbi state path. Stores the log likelihood from succesive updates of model parameters. When the difference between the log likelihoods at iteration i and i-1 is below the tolerance, no additional updates are performed.
LogLik(loglik = numeric(), tolerance = 1L)
LogLik(loglik = numeric(), tolerance = 1L)
loglik |
length-one numeric vector for the log likelihood of the Viterbi state path |
tolerance |
if the difference in the log-likelihood of the Viterbi state path after the Baum-Welch update is less than the specified tolerance, no additional Baum-Welch updates are required |
Exported for internal use by other BioC packages
## S4 method for signature 'LogLik' length(x) ## S4 method for signature 'LogLik' show(object)
## S4 method for signature 'LogLik' length(x) ## S4 method for signature 'LogLik' show(object)
x |
object of class |
object |
a |
loglik
a numeric vector
tolerance
a numeric vector
Accessors for objects of class ArrayViews
lrrFile(object) lrrFile(object) <- value bafFile(object) gtFile(object) ## S4 method for signature 'ArrayViews' lrrFile(object) ## S4 replacement method for signature 'ArrayViews' lrrFile(object) <- value ## S4 method for signature 'ArrayViews' bafFile(object) ## S4 method for signature 'ArrayViews' gtFile(object)
lrrFile(object) lrrFile(object) <- value bafFile(object) gtFile(object) ## S4 method for signature 'ArrayViews' lrrFile(object) ## S4 replacement method for signature 'ArrayViews' lrrFile(object) <- value ## S4 method for signature 'ArrayViews' bafFile(object) ## S4 method for signature 'ArrayViews' gtFile(object)
object |
see showMethods("lrrFile") |
value |
a character vector of filenames for the log R ratios |
views <- ArrayViews(parsedPath=tempdir()) sourcePaths(views) lrrFile(views) bafFile(views) gtFile(views)
views <- ArrayViews(parsedPath=tempdir()) sourcePaths(views) lrrFile(views) bafFile(views) gtFile(views)
Exported for internal use by other BioC packages
Remove SNPs with NAs in any of the low-level estimates
NA_filter(x, i)
NA_filter(x, i)
x |
a container for SNP data ( |
i |
integer vector to subset |
An object of the same class
The number of SNP/nonpolymorphic probes contained in a genomic interval
numberFeatures(object)
numberFeatures(object)
object |
see |
A character string indicating the complete path for storing parsed files.
parsedPath(object) ## S4 method for signature 'ArrayViews' parsedPath(object)
parsedPath(object) ## S4 method for signature 'ArrayViews' parsedPath(object)
object |
a |
This function parses genome studio files, writing the low-level data for log R ratios, B allele frequencies, and genotypes to disk as integers (1 file per subject per data type).
parseSourceFile(object, param) ## S4 method for signature 'ArrayViews,CopyNumScanParams' parseSourceFile(object, param)
parseSourceFile(object, param) ## S4 method for signature 'ArrayViews,CopyNumScanParams' parseSourceFile(object, param)
object |
An |
param |
An object of class |
ArrayViews
ArrayViews
CopyNumScanParams
require(BSgenome.Hsapiens.UCSC.hg18) bsgenome <- BSgenome.Hsapiens.UCSC.hg18 require(data.table) extdir <- system.file("extdata", package="VanillaICE", mustWork=TRUE) features <- suppressWarnings(fread(file.path(extdir, "SNP_info.csv"))) fgr <- GRanges(paste0("chr", features$Chr), IRanges(features$Position, width=1), isSnp=features[["Intensity Only"]]==0) fgr <- SnpGRanges(fgr) names(fgr) <- features[["Name"]] seqlevels(fgr) <- seqlevels(bsgenome)[seqlevels(bsgenome) %in% seqlevels(fgr)] seqinfo(fgr) <- seqinfo(bsgenome)[seqlevels(fgr),] fgr <- sort(fgr) files <- list.files(extdir, full.names=TRUE, recursive=TRUE, pattern="FinalReport") views <- ArrayViews(rowRanges=fgr, sourcePaths=files, parsedPath=tempdir()) show(views) ## read the first file dat <- fread(files[1], skip="[Data]") ## information to store on the markers select <- match(c("SNP Name", "Allele1 - AB", "Allele2 - AB", "Log R Ratio", "B Allele Freq"), names(dat)) ## ## which rows to keep in the MAP file. By matching on the sorted GRanges object ## containing the feature annotation, the low-level data for the log R ratios/ ## B allele frequencies will also be sorted ## index_genome <- match(names(fgr), dat[["SNP Name"]]) scan_params <- CopyNumScanParams(index_genome=index_genome, select=select) ## ## parse the source files ## parseSourceFile(views, scan_params) list.files(parsedPath(views)) ## ## Inspecting source data through accessors defined on the views object ## require(oligoClasses) ## log R ratios r <- head(lrr(views)) ## B allele frequencies b <- head(baf(views)) g <- head(genotypes(views))
require(BSgenome.Hsapiens.UCSC.hg18) bsgenome <- BSgenome.Hsapiens.UCSC.hg18 require(data.table) extdir <- system.file("extdata", package="VanillaICE", mustWork=TRUE) features <- suppressWarnings(fread(file.path(extdir, "SNP_info.csv"))) fgr <- GRanges(paste0("chr", features$Chr), IRanges(features$Position, width=1), isSnp=features[["Intensity Only"]]==0) fgr <- SnpGRanges(fgr) names(fgr) <- features[["Name"]] seqlevels(fgr) <- seqlevels(bsgenome)[seqlevels(bsgenome) %in% seqlevels(fgr)] seqinfo(fgr) <- seqinfo(bsgenome)[seqlevels(fgr),] fgr <- sort(fgr) files <- list.files(extdir, full.names=TRUE, recursive=TRUE, pattern="FinalReport") views <- ArrayViews(rowRanges=fgr, sourcePaths=files, parsedPath=tempdir()) show(views) ## read the first file dat <- fread(files[1], skip="[Data]") ## information to store on the markers select <- match(c("SNP Name", "Allele1 - AB", "Allele2 - AB", "Log R Ratio", "B Allele Freq"), names(dat)) ## ## which rows to keep in the MAP file. By matching on the sorted GRanges object ## containing the feature annotation, the low-level data for the log R ratios/ ## B allele frequencies will also be sorted ## index_genome <- match(names(fgr), dat[["SNP Name"]]) scan_params <- CopyNumScanParams(index_genome=index_genome, select=select) ## ## parse the source files ## parseSourceFile(views, scan_params) list.files(parsedPath(views)) ## ## Inspecting source data through accessors defined on the views object ## require(oligoClasses) ## log R ratios r <- head(lrr(views)) ## B allele frequencies b <- head(baf(views)) g <- head(genotypes(views))
Accessor for probability filter
probability(object)
probability(object)
object |
a |
Rescale a numeric vector
rescale(x, l, u)
rescale(x, l, u)
x |
numeric vector |
l |
lower limit of rescaled |
u |
upper limit of rescaled |
Compute the column-wide or row-wise mode of numeric matrices
rowModes(x) colModes(x) rowMAD(x, ...)
rowModes(x) colModes(x) rowMAD(x, ...)
x |
matrix |
... |
additional arguments to rowMedians |
numeric vector
X <- matrix(rnorm(100), 10, 10) rowMAD(X)
X <- matrix(rnorm(100), 10, 10) rowMAD(X)
Accessor to obtain all segments from the HMM.
segs(object)
segs(object)
object |
see |
a GRanges
-derived object
Viterbi
Show method for objects of class Viterbi
## S4 method for signature 'Viterbi' show(object)
## S4 method for signature 'Viterbi' show(object)
object |
a |
A container for low-level summaries used for downstream copy number estimation, including log R ratios, B allele frequencies, and genotypes
a SnpArrayExperiment
object
This function is exported primarily for internal use by other BioC packages.
snpArrayAssays(cn = new("matrix"), baf = new("matrix"), ...)
snpArrayAssays(cn = new("matrix"), baf = new("matrix"), ...)
cn |
matrix of log R ratios |
baf |
matrix of B allele frequencies |
... |
additional matrices of the same dimension, such as SNP genotypes. |
data(snp_exp, package="VanillaICE") r <- lrr(snp_exp) b <- baf(snp_exp) sl <- snpArrayAssays(cn=r, baf=b)
data(snp_exp, package="VanillaICE") r <- lrr(snp_exp) b <- baf(snp_exp) sl <- snpArrayAssays(cn=r, baf=b)
Constructor for SnpArrayExperiment
SnpArrayExperiment( cn, baf, rowRanges = GRanges(), colData = DataFrame(), isSnp = logical(), ... ) ## S4 method for signature 'missing' SnpArrayExperiment( cn, baf, rowRanges = GRanges(), colData = DataFrame(), isSnp = logical(), ... ) ## S4 method for signature 'matrix' SnpArrayExperiment( cn, baf, rowRanges = GRanges(), colData = DataFrame(row.names = colnames(cn)), isSnp = logical(), ... )
SnpArrayExperiment( cn, baf, rowRanges = GRanges(), colData = DataFrame(), isSnp = logical(), ... ) ## S4 method for signature 'missing' SnpArrayExperiment( cn, baf, rowRanges = GRanges(), colData = DataFrame(), isSnp = logical(), ... ) ## S4 method for signature 'matrix' SnpArrayExperiment( cn, baf, rowRanges = GRanges(), colData = DataFrame(row.names = colnames(cn)), isSnp = logical(), ... )
cn |
matrix of copy number estimates (e.g., log R ratios) |
baf |
matrix of B allele frequencies |
rowRanges |
GRanges object for SNPs/nonpolymorphic markers |
colData |
DataFrame containing sample-level covariates |
isSnp |
logical vector indicating whether marker is a SNP |
... |
additional arguments passed to |
## empty container library(VanillaICE) data(snp_exp, package="VanillaICE") # example se <- SnpArrayExperiment(cn=lrr(snp_exp), baf=baf(snp_exp), rowRanges=rowRanges(snp_exp))
## empty container library(VanillaICE) data(snp_exp, package="VanillaICE") # example se <- SnpArrayExperiment(cn=lrr(snp_exp), baf=baf(snp_exp), rowRanges=rowRanges(snp_exp))
A single-argument generic function to construct a SnpArrayExperiment.
SnpExperiment(object) ## S4 method for signature 'ArrayViews' SnpExperiment(object)
SnpExperiment(object) ## S4 method for signature 'ArrayViews' SnpExperiment(object)
object |
see |
view <- ArrayViews() SnpExperiment(view)
view <- ArrayViews() SnpExperiment(view)
An extension to GRanges for representing SNPs
Constructor for SnpGRanges class
SnpGRanges(object = GRanges(), isSnp, ...) ## S4 method for signature 'missing' SnpGRanges(object, isSnp) ## S4 method for signature 'GRanges' SnpGRanges(object, isSnp)
SnpGRanges(object = GRanges(), isSnp, ...) ## S4 method for signature 'missing' SnpGRanges(object, isSnp) ## S4 method for signature 'GRanges' SnpGRanges(object, isSnp)
object |
A |
isSnp |
A logical vector. Each genomic interval in the
|
... |
ignored |
elementMetadata
a SnpDataFrame
SnpGRanges() g <- GRanges("chr1", IRanges(15L, 15L)) SnpGRanges(g, isSnp=TRUE)
SnpGRanges() g <- GRanges("chr1", IRanges(15L, 15L)) SnpGRanges(g, isSnp=TRUE)
Files containing SNP-level summaries for log R ratios, B allele frequencies, and genotypes – one sample per subject – are required.
sourcePaths(object)
sourcePaths(object)
object |
an |
sourcePaths(ArrayViews())
sourcePaths(ArrayViews())
Retrieve genomic location of SNPs
## S4 method for signature 'oligoSnpSet' start(x)
## S4 method for signature 'oligoSnpSet' start(x)
x |
a |
The states are represented as integers: 1=homozygous deletion, 2=hemizygous deletion, 3=diploid normal heterozygosity, 4=diploid region of homozygosity, 5=single copy gain, 6=two or more copy gain.
## S4 method for signature 'Viterbi' state(object)
## S4 method for signature 'Viterbi' state(object)
object |
a |
Extract the copy number state for each genomic interval.
## S4 method for signature 'HmmGRanges' state(object)
## S4 method for signature 'HmmGRanges' state(object)
object |
a |
This function simplifies the process of sweeping the modal log R
ratio from the rows or columns of a SnpArrayExperiment
object. It is most useful when a large number of samples (more
than 10) are available and the dataset is a collection of germline
samples. We assume that the samples are from a single batch and
that the modal value will be a robust estimate of the mean log R
ratio for diploid copy number. Variation in the modal estimates
between markers is presumed to be attributable to probe effects
(e.g., differences hybridization efficiency/PCR do to sequence
composition). For sex chromosomes, one should apply this function
separately to men and women and then recenter the resulting matrix
according to the expected copy number.
sweepMode(x, MARGIN) ## S4 method for signature 'SnpArrayExperiment' sweepMode(x, MARGIN)
sweepMode(x, MARGIN) ## S4 method for signature 'SnpArrayExperiment' sweepMode(x, MARGIN)
x |
see |
MARGIN |
integer indicating which margin (1=rows, 2=columns) to sweep the mode |
an object of the same class as x
data(snp_exp) snp_exp_rowcentered <- sweepMode(snp_exp, 1) snp_exp_colcentered <- sweepMode(snp_exp, 2) x <- lrr(snp_exp) x_rowcentered <- sweep(x, 1, rowModes(x)) all.equal(lrr(snp_exp_rowcentered), x_rowcentered)
data(snp_exp) snp_exp_rowcentered <- sweepMode(snp_exp, 1) snp_exp_colcentered <- sweepMode(snp_exp, 2) x <- lrr(snp_exp) x_rowcentered <- sweep(x, 1, rowModes(x)) all.equal(lrr(snp_exp_rowcentered), x_rowcentered)
Threshold numeric values according to user-specific limits. The thresholded values can also be jittered near the limits.
threshold(x, lim = c(-Inf, Inf), amount = 0)
threshold(x, lim = c(-Inf, Inf), amount = 0)
x |
numeric matrix or vector |
lim |
limit at which to threshold entries in |
amount |
see |
x <- rnorm(1000, 0, 3) y <- threshold(x, c(-5,5)) range(y)
x <- rnorm(1000, 0, 3) y <- threshold(x, c(-5,5)) range(y)
Contains parameters for computing transition probabilities
TransitionParam(taup = 1e+10, taumax = 1 - 5e+06) ## S4 method for signature 'TransitionParam' show(object)
TransitionParam(taup = 1e+10, taumax = 1 - 5e+06) ## S4 method for signature 'TransitionParam' show(object)
taup |
length-one numeric vector |
taumax |
The maximum probability that the current state is the same as the preceding state. See details |
object |
a |
Diagonal elements of the transition probability matrix are
computed as e^-2*d/taup, where d is the distance between markers
i and i-1 and taup
is typically in the range of 1xe10. This
probability is constrained to be no larger than taumax
. The
probabilities on the off-diagonal elements are the same and are
subject to the constraint that the rows of the transition
probability matrix sum to 1.
TransitionParam() ## higher values of taup make transitions between states less likely TransitionParam(taup=1e12)
TransitionParam() ## higher values of taup make transitions between states less likely TransitionParam(taup=1e12)
This function is not intended to be called directly by the user. It is exported in the package NAMESPACE for internal use by other BioC packages.
updateHmmParams( object, emission_param = EmissionParam(), transition_param = TransitionParam() )
updateHmmParams( object, emission_param = EmissionParam(), transition_param = TransitionParam() )
object |
a |
emission_param |
a |
transition_param |
a |
Default viewports for plotting CNV data with lattice-style graphics
viewports()
viewports()
list
vps <- viewports()
vps <- viewports()
Data for the graphic is generated by a call to grangesData
.
xyplotList(granges, se, param = HmmTrellisParam()) ## S4 method for signature 'HmmGRanges,SnpArrayExperiment' xyplotList(granges, se, param = HmmTrellisParam()) ## S4 method for signature 'GRangesList,SnpArrayExperiment' xyplotList(granges, se, param = HmmTrellisParam()) xygrid(trellis_plot, viewports, granges)
xyplotList(granges, se, param = HmmTrellisParam()) ## S4 method for signature 'HmmGRanges,SnpArrayExperiment' xyplotList(granges, se, param = HmmTrellisParam()) ## S4 method for signature 'GRangesList,SnpArrayExperiment' xyplotList(granges, se, param = HmmTrellisParam()) xygrid(trellis_plot, viewports, granges)
granges |
a |
se |
a |
param |
trellis parameters for plotting HMM |
trellis_plot |
an object of class |
viewports |
a list of viewports as provided by the |
if(require("BSgenome.Hsapiens.UCSC.hg18")){ bsgenome <- BSgenome.Hsapiens.UCSC.hg18 snp_exp <- getExampleSnpExperiment(bsgenome) seqlevels(snp_exp, pruning.mode="coarse") <- "chr22" fit <- hmm2(snp_exp) g <- reduce(hemizygous(fit), min.gapwidth=500e3) trellis_param <- HmmTrellisParam() fig <- xyplotList(g, snp_exp, trellis_param) vps <- viewports() xygrid(fig[[1]], vps, g) }
if(require("BSgenome.Hsapiens.UCSC.hg18")){ bsgenome <- BSgenome.Hsapiens.UCSC.hg18 snp_exp <- getExampleSnpExperiment(bsgenome) seqlevels(snp_exp, pruning.mode="coarse") <- "chr22" fit <- hmm2(snp_exp) g <- reduce(hemizygous(fit), min.gapwidth=500e3) trellis_param <- HmmTrellisParam() fig <- xyplotList(g, snp_exp, trellis_param) vps <- viewports() xygrid(fig[[1]], vps, g) }