Title: | Pathway enrichment using a regularized regression approach |
---|---|
Description: | Compute pathway enrichment scores while accounting for term-term relations. This package uses a regularized multiple linear regression to regress differential expression p-values obtained from multi-condition experiments on a pathway membership matrix. By doing so, it is able to incorporate additional biological knowledge into the enrichment analysis and to estimate pathway enrichment scores more robustly. |
Authors: | Kim Philipp Jablonski [aut, cre] |
Maintainer: | Kim Philipp Jablonski <[email protected]> |
License: | GPL-3 |
Version: | 1.11.1 |
Built: | 2024-12-18 08:41:32 UTC |
Source: | https://github.com/bioc/pareg |
Convert sparse similarity matrix from package data to a
dense version with 1 on its diagonal.
This matrix can then be used by pareg
.
as_dense_sim(mat_sparse)
as_dense_sim(mat_sparse)
mat_sparse |
Sparse matrix. |
Dense matrix
transform_y(c(0, 0.5, 1))
transform_y(c(0, 0.5, 1))
pareg
to class enrichResult
.The resulting object can be passed to any method from the enrichplot package and thus allows for nice visualizations of the enrichment results. Note: term similarities are included if available.
as_enrichplot_object(x, pvalue_threshold = 0.05)
as_enrichplot_object(x, pvalue_threshold = 0.05)
x |
An object of class |
pvalue_threshold |
Treshold to select genes for count statistics. |
Object of class enrichResult
.
df_genes <- data.frame( gene = paste("g", 1:20, sep = ""), pvalue = c( rbeta(10, .1, 1), rbeta(10, 1, 1) ) ) df_terms <- rbind( data.frame( term = "foo", gene = paste("g", 1:10, sep = "") ), data.frame( term = "bar", gene = paste("g", 11:20, sep = "") ) ) fit <- pareg(df_genes, df_terms, max_iterations = 10) as_enrichplot_object(fit)
df_genes <- data.frame( gene = paste("g", 1:20, sep = ""), pvalue = c( rbeta(10, .1, 1), rbeta(10, 1, 1) ) ) df_terms <- rbind( data.frame( term = "foo", gene = paste("g", 1:10, sep = "") ), data.frame( term = "bar", gene = paste("g", 11:20, sep = "") ) ) fit <- pareg(df_genes, df_terms, max_iterations = 10) as_enrichplot_object(fit)
pareg
.Retrieve dataframe with enrichment information.
## S3 method for class 'pareg' as.data.frame(x, row.names = NULL, optional = FALSE, ...)
## S3 method for class 'pareg' as.data.frame(x, row.names = NULL, optional = FALSE, ...)
x |
An object of class |
row.names |
Optional character vector of rownames. |
optional |
Allow optional arguments. |
... |
Additional arguments. |
Dataframe containing enrichment score and name for each pathway.
df_genes <- data.frame( gene = paste("g", 1:20, sep = ""), pvalue = c( rbeta(10, .1, 1), rbeta(10, 1, 1) ) ) df_terms <- rbind( data.frame( term = "foo", gene = paste("g", 1:10, sep = "") ), data.frame( term = "bar", gene = paste("g", 11:20, sep = "") ) ) fit <- pareg(df_genes, df_terms, max_iterations = 10) as.data.frame(fit)
df_genes <- data.frame( gene = paste("g", 1:20, sep = ""), pvalue = c( rbeta(10, .1, 1), rbeta(10, 1, 1) ) ) df_terms <- rbind( data.frame( term = "foo", gene = paste("g", 1:10, sep = "") ), data.frame( term = "bar", gene = paste("g", 11:20, sep = "") ) ) fit <- pareg(df_genes, df_terms, max_iterations = 10) as.data.frame(fit)
Run function for each row of input dataframe in LSF job.
cluster_apply( df_iter, func, .bsub_params = c("-n", "2", "-W", "24:00", "-R", "rusage[mem=10000]"), .tempdir = ".", .packages = c(), ... )
cluster_apply( df_iter, func, .bsub_params = c("-n", "2", "-W", "24:00", "-R", "rusage[mem=10000]"), .tempdir = ".", .packages = c(), ... )
df_iter |
Dataframe over whose rows to iterate. |
func |
Function to apply to each dataframe row. Its arguments must be all dataframe columns. |
.bsub_params |
Parameters to pass to 'bsub' during job submission. |
.tempdir |
Location to store auxiliary files in. |
.packages |
Packages to import in each job. |
... |
Extra arguments for function. |
Dataframe created by concatenating results of each function call.
## Not run: foo <- 42 cluster_apply( data.frame(i = seq_len(3), group = c("A", "B", "C")), function(i, group) { log_debug("hello") data.frame(group = group, i = i, foo = foo, result = foo + 2 * i) }, .packages = c(logger) ) ## End(Not run)
## Not run: foo <- 42 cluster_apply( data.frame(i = seq_len(3), group = c("A", "B", "C")), function(i, group) { log_debug("hello") data.frame(group = group, i = i, foo = foo, result = foo + 2 * i) }, .packages = c(logger) ) ## End(Not run)
Generate similarity matrix for input terms.
compute_term_similarities( df_terms, similarity_function = jaccard, max_similarity = 1 )
compute_term_similarities( df_terms, similarity_function = jaccard, max_similarity = 1 )
df_terms |
Dataframe storing pathway database. |
similarity_function |
Function to compute similarity between two sets. |
max_similarity |
Value to fill diagonal with. |
Symmetric matrix of similarity scores.
df_terms <- data.frame( term = c("A", "A", "B", "B", "B", "C", "C", "C"), gene = c("a", "b", "a", "b", "c", "a", "c", "d") ) compute_term_similarities(df_terms)
df_terms <- data.frame( term = c("A", "A", "B", "B", "B", "C", "C", "C"), gene = c("a", "b", "a", "b", "c", "a", "c", "d") ) compute_term_similarities(df_terms)
Store term membership for each gene.
create_model_df(df_genes, df_terms, pvalue_threshold = 0.05)
create_model_df(df_genes, df_terms, pvalue_threshold = 0.05)
df_genes |
Dataframe storing gene names and DE p-values. |
df_terms |
Dataframe storing pathway database. |
pvalue_threshold |
P-value threshold to create binary columns 'pvalue_sig' and 'pvalue_notsig'. |
Dataframe.
df_genes <- data.frame( gene = c("g1", "g2"), pvalue = c(0.1, 0.2) ) df_terms <- data.frame( term = c("A", "A", "B", "B", "C"), gene = c("g1", "g2", "g1", "g2", "g2") ) create_model_df(df_genes, df_terms)
df_genes <- data.frame( gene = c("g1", "g2"), pvalue = c(0.1, 0.2) ) df_terms <- data.frame( term = c("A", "A", "B", "B", "C"), gene = c("g1", "g2", "g1", "g2", "g2") ) create_model_df(df_genes, df_terms)
Finds the optimal regulariztion parameters using cross-validation for edgenet. We use the BOBYQA algorithm to find the optimial regularization parameters in a cross-validation framework.
cv_edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = NA_real_, psigx = NA_real_, psigy = NA_real_, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian, optim.thresh = 0.01, optim.maxit = 100, lambda_range = seq(0, 2, length.out = 10), psigx_range = seq(0, 500, length.out = 10), psigy_range = seq(0, 500, length.out = 10), nfolds = 2, cv_method = c("grid_search", "grid_search_lsf", "optim"), tempdir = "." ) ## S4 method for signature 'matrix,numeric' cv_edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = NA_real_, psigx = NA_real_, psigy = NA_real_, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian, optim.thresh = 0.01, optim.maxit = 100, lambda_range = seq(0, 2, length.out = 10), psigx_range = seq(0, 500, length.out = 10), psigy_range = seq(0, 500, length.out = 10), nfolds = 2, cv_method = c("grid_search", "grid_search_lsf", "optim"), tempdir = "." ) ## S4 method for signature 'matrix,matrix' cv_edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = NA_real_, psigx = NA_real_, psigy = NA_real_, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian, optim.thresh = 0.01, optim.maxit = 100, lambda_range = seq(0, 2, length.out = 10), psigx_range = seq(0, 500, length.out = 10), psigy_range = seq(0, 500, length.out = 10), nfolds = 2, cv_method = c("grid_search", "grid_search_lsf", "optim"), tempdir = "." )
cv_edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = NA_real_, psigx = NA_real_, psigy = NA_real_, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian, optim.thresh = 0.01, optim.maxit = 100, lambda_range = seq(0, 2, length.out = 10), psigx_range = seq(0, 500, length.out = 10), psigy_range = seq(0, 500, length.out = 10), nfolds = 2, cv_method = c("grid_search", "grid_search_lsf", "optim"), tempdir = "." ) ## S4 method for signature 'matrix,numeric' cv_edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = NA_real_, psigx = NA_real_, psigy = NA_real_, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian, optim.thresh = 0.01, optim.maxit = 100, lambda_range = seq(0, 2, length.out = 10), psigx_range = seq(0, 500, length.out = 10), psigy_range = seq(0, 500, length.out = 10), nfolds = 2, cv_method = c("grid_search", "grid_search_lsf", "optim"), tempdir = "." ) ## S4 method for signature 'matrix,matrix' cv_edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = NA_real_, psigx = NA_real_, psigy = NA_real_, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian, optim.thresh = 0.01, optim.maxit = 100, lambda_range = seq(0, 2, length.out = 10), psigx_range = seq(0, 500, length.out = 10), psigy_range = seq(0, 500, length.out = 10), nfolds = 2, cv_method = c("grid_search", "grid_search_lsf", "optim"), tempdir = "." )
X |
input matrix, of dimension ( |
Y |
output matrix, of dimension ( |
G.X |
non-negativ affinity matrix for |
G.Y |
non-negativ affinity matrix for |
lambda |
|
psigx |
|
psigy |
|
thresh |
|
maxit |
maximum number of iterations for the optimizer
( |
learning.rate |
step size for Adam optimizer ( |
family |
family of response, e.g. gaussian or binomial |
optim.thresh |
|
optim.maxit |
the maximum number of iterations for the optimization
( |
lambda_range |
range of lambda to use in CV grid. |
psigx_range |
range of psigx to use in CV grid. |
psigy_range |
range of psigy to use in CV grid. |
nfolds |
the number of folds to be used - default is 10. |
cv_method |
which cross-validation method to use. |
tempdir |
where to store auxiliary files. |
An object of class cv_edgenet
parameters |
the estimated, optimal regularization parameters |
lambda |
optimal estimated value for regularization parameter lambda (or, if provided as argument, the value of the parameter) |
psigx |
optimal estimated value for regularization parameter psigx (or, if provided as argument, the value of the parameter) |
psigy |
optimal estimated value for regularization parameter psigy (or, if provided as argument, the value of the parameter) |
estimated.parameters |
names of parameters that were estimated |
family |
family used for estimated |
fit |
an |
call |
the call that produced the object |
X <- matrix(rnorm(100 * 10), 100, 10) b <- matrix(rnorm(100), 10) G.X <- abs(rWishart(1, 10, diag(10))[, , 1]) G.Y <- abs(rWishart(1, 10, diag(10))[, , 1]) diag(G.X) <- diag(G.Y) <- 0 # estimate the parameters of a Gaussian model Y <- X %*% b + matrix(rnorm(100 * 10), 100) ## dont use affinity matrices and estimate lambda fit <- cv_edgenet( X = X, Y = Y, family = gaussian, maxit = 1, lambda_range = c(0, 1) ) ## only provide one matrix and estimate lambda fit <- cv_edgenet( X = X, Y = Y, G.X = G.X, psigx = 1, family = gaussian, maxit = 1, lambda_range = c(0, 1) ) ## estimate only lambda with two matrices fit <- cv_edgenet( X = X, Y = Y, G.X = G.X, G.Y, psigx = 1, psigy = 1, family = gaussian, maxit = 1, lambda_range = c(0, 1) ) ## estimate only psigx fit <- cv_edgenet( X = X, Y = Y, G.X = G.X, G.Y, lambda = 1, psigy = 1, family = gaussian, maxit = 1, psigx_range = c(0, 1) ) ## estimate all parameters fit <- cv_edgenet( X = X, Y = Y, G.X = G.X, G.Y, family = gaussian, maxit = 1, lambda_range = c(0, 1), psigx_range = c(0, 1), psigy_range = c(0, 1) ) ## if Y is vectorial, we cannot use an affinity matrix for Y fit <- cv_edgenet( X = X, Y = Y[, 1], G.X = G.X, family = gaussian, maxit = 1, lambda_range = c(0, 1), psigx_range = c(0, 1), )
X <- matrix(rnorm(100 * 10), 100, 10) b <- matrix(rnorm(100), 10) G.X <- abs(rWishart(1, 10, diag(10))[, , 1]) G.Y <- abs(rWishart(1, 10, diag(10))[, , 1]) diag(G.X) <- diag(G.Y) <- 0 # estimate the parameters of a Gaussian model Y <- X %*% b + matrix(rnorm(100 * 10), 100) ## dont use affinity matrices and estimate lambda fit <- cv_edgenet( X = X, Y = Y, family = gaussian, maxit = 1, lambda_range = c(0, 1) ) ## only provide one matrix and estimate lambda fit <- cv_edgenet( X = X, Y = Y, G.X = G.X, psigx = 1, family = gaussian, maxit = 1, lambda_range = c(0, 1) ) ## estimate only lambda with two matrices fit <- cv_edgenet( X = X, Y = Y, G.X = G.X, G.Y, psigx = 1, psigy = 1, family = gaussian, maxit = 1, lambda_range = c(0, 1) ) ## estimate only psigx fit <- cv_edgenet( X = X, Y = Y, G.X = G.X, G.Y, lambda = 1, psigy = 1, family = gaussian, maxit = 1, psigx_range = c(0, 1) ) ## estimate all parameters fit <- cv_edgenet( X = X, Y = Y, G.X = G.X, G.Y, family = gaussian, maxit = 1, lambda_range = c(0, 1), psigx_range = c(0, 1), psigy_range = c(0, 1) ) ## if Y is vectorial, we cannot use an affinity matrix for Y fit <- cv_edgenet( X = X, Y = Y[, 1], G.X = G.X, family = gaussian, maxit = 1, lambda_range = c(0, 1), psigx_range = c(0, 1), )
Fit a graph-regularized linear regression model using
edge-penalization. The coefficients are computed using graph-prior
knowledge in the form of one/two affinity matrices. Graph-regularization is
an extension to previously introduced regularization techniques,
such as the LASSO. See the vignette for details on the objective function of
the model:
vignette("edgenet", package="netReg")
edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = 0, psigx = 0, psigy = 0, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian ) ## S4 method for signature 'matrix,numeric' edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = 0, psigx = 0, psigy = 0, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian ) ## S4 method for signature 'matrix,matrix' edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = 0, psigx = 0, psigy = 0, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian )
edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = 0, psigx = 0, psigy = 0, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian ) ## S4 method for signature 'matrix,numeric' edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = 0, psigx = 0, psigy = 0, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian ) ## S4 method for signature 'matrix,matrix' edgenet( X, Y, G.X = NULL, G.Y = NULL, lambda = 0, psigx = 0, psigy = 0, thresh = 1e-05, maxit = 1e+05, learning.rate = 0.01, family = gaussian )
X |
input matrix, of dimension ( |
Y |
output matrix, of dimension ( |
G.X |
non-negativ affinity matrix for |
G.Y |
non-negativ affinity matrix for |
lambda |
|
psigx |
|
psigy |
|
thresh |
|
maxit |
maximum number of iterations for optimizer
( |
learning.rate |
step size for Adam optimizer ( |
family |
family of response, e.g. gaussian or binomial |
An object of class edgenet
beta |
the estimated ( |
alpha |
the estimated ( |
parameters |
regularization parameters |
lambda |
regularization parameter lambda) |
psigx |
regularization parameter psigx |
psigy |
regularization parameter psigy |
family |
a description of the error distribution and link function
to be used. Can be a |
call |
the call that produced the object |
Cheng, Wei and Zhang, Xiang and Guo, Zhishan and Shi, Yu and Wang, Wei
(2014),
Graph-regularized dual Lasso for robust eQTL mapping.
Bioinformatics
X <- matrix(rnorm(100 * 10), 100, 10) b <- matrix(rnorm(100), 10) G.X <- abs(rWishart(1, 10, diag(10))[, , 1]) G.Y <- abs(rWishart(1, 10, diag(10))[, , 1]) diag(G.X) <- diag(G.Y) <- 0 # estimate the parameters of a Gaussian model Y <- X %*% b + matrix(rnorm(100 * 10), 100) ## dont use affinity matrices fit <- edgenet(X = X, Y = Y, family = gaussian, maxit = 10) ## only provide one matrix fit <- edgenet( X = X, Y = Y, G.X = G.X, psigx = 1, family = gaussian, maxit = 10 ) ## use two matrices fit <- edgenet(X = X, Y = Y, G.X = G.X, G.Y, family = gaussian, maxit = 10) ## if Y is vectorial, we cannot use an affinity matrix for Y fit <- edgenet(X = X, Y = Y[, 1], G.X = G.X, family = gaussian, maxit = 10)
X <- matrix(rnorm(100 * 10), 100, 10) b <- matrix(rnorm(100), 10) G.X <- abs(rWishart(1, 10, diag(10))[, , 1]) G.Y <- abs(rWishart(1, 10, diag(10))[, , 1]) diag(G.X) <- diag(G.Y) <- 0 # estimate the parameters of a Gaussian model Y <- X %*% b + matrix(rnorm(100 * 10), 100) ## dont use affinity matrices fit <- edgenet(X = X, Y = Y, family = gaussian, maxit = 10) ## only provide one matrix fit <- edgenet( X = X, Y = Y, G.X = G.X, psigx = 1, family = gaussian, maxit = 10 ) ## use two matrices fit <- edgenet(X = X, Y = Y, G.X = G.X, G.Y, family = gaussian, maxit = 10) ## if Y is vectorial, we cannot use an affinity matrix for Y fit <- edgenet(X = X, Y = Y[, 1], G.X = G.X, family = gaussian, maxit = 10)
Family objects provide a convenient way to specify the details
of the models used by pareg
.
See also stats::family
for more details.
family(object, ...) gaussian(link = c("identity")) bernoulli(link = c("logit", "probit", "log")) beta(link = c("logit", "probit", "log")) beta_phi_lm(link = c("logit", "probit", "log")) beta_phi_var(link = c("logit", "probit", "log"))
family(object, ...) gaussian(link = c("identity")) bernoulli(link = c("logit", "probit", "log")) beta(link = c("logit", "probit", "log")) beta_phi_lm(link = c("logit", "probit", "log")) beta_phi_var(link = c("logit", "probit", "log"))
object |
a object for which the family shoulr be retured
(e.g. |
... |
further arguments passed to methods |
link |
name of a link function |
An object of class pareg.family
family |
name of the family |
link |
name of the link function |
linkinv |
inverse link function |
loss |
loss function |
gaussian() bernoulli("probit")$link beta()$loss
gaussian() bernoulli("probit")$link beta()$loss
Generate block-structured similarity matrices corresponding to cluster structures.
generate_similarity_matrix(cluster_sizes)
generate_similarity_matrix(cluster_sizes)
cluster_sizes |
List of cluster sizes. |
Similarity matrix with samples as row-/colnames.
generate_similarity_matrix(c(1, 2, 3))
generate_similarity_matrix(c(1, 2, 3))
Compute Jaccard similarity between two sets.
jaccard(x, y)
jaccard(x, y)
x |
First set. |
y |
Second set. |
Jaccard similarity between set x and y.
Other pathway similarity methods:
overlap_coefficient()
jaccard(c(1, 2, 3), c(2, 3, 4))
jaccard(c(1, 2, 3), c(2, 3, 4))
Compute overlap coefficient between two sets.
overlap_coefficient(x, y)
overlap_coefficient(x, y)
x |
First set. |
y |
Second set. |
Overlap coefficient between set x and y.
Other pathway similarity methods:
jaccard()
overlap_coefficient(c(1, 2, 3), c(2, 3, 4))
overlap_coefficient(c(1, 2, 3), c(2, 3, 4))
Run model to compute pathway enrichments. Can model inter-pathway relations, cross-validation and much more.
pareg( df_genes, df_terms, lasso_param = NA_real_, network_param = NA_real_, term_network = NULL, cv = FALSE, cv_cores = NULL, family = beta, response_column_name = "pvalue", max_iterations = 1e+05, lasso_param_range = seq(0, 2, length.out = 10), network_param_range = seq(0, 500, length.out = 10), log_level = NULL, ... )
pareg( df_genes, df_terms, lasso_param = NA_real_, network_param = NA_real_, term_network = NULL, cv = FALSE, cv_cores = NULL, family = beta, response_column_name = "pvalue", max_iterations = 1e+05, lasso_param_range = seq(0, 2, length.out = 10), network_param_range = seq(0, 500, length.out = 10), log_level = NULL, ... )
df_genes |
Dataframe storing gene names and DE p-values. |
df_terms |
Dataframe storing pathway database. |
lasso_param |
Lasso regularization parameter. |
network_param |
Network regularization parameter. |
term_network |
Term similarity network as adjacency matrix. |
cv |
Estimate best regularization parameters using cross-validation. |
cv_cores |
How many cores to use for CV parallelization. |
family |
Distribution family of response. |
response_column_name |
Which column of model dataframe to use as response. |
max_iterations |
How many iterations to maximally run optimizer for. |
lasso_param_range |
LASSO regularization parameter search space in grid search of CV. |
network_param_range |
Network regularization parameter search space in grid search of CV. |
log_level |
Control verbosity (logger::INFO, logger::DEBUG, ...). |
... |
Further arguments to pass to '(cv.)edgenet'. |
An object of class pareg
.
df_genes <- data.frame( gene = paste("g", 1:20, sep = ""), pvalue = c( rbeta(10, .1, 1), rbeta(10, 1, 1) ) ) df_terms <- rbind( data.frame( term = "foo", gene = paste("g", 1:10, sep = "") ), data.frame( term = "bar", gene = paste("g", 11:20, sep = "") ) ) pareg(df_genes, df_terms, max_iterations = 10)
df_genes <- data.frame( gene = paste("g", 1:20, sep = ""), pvalue = c( rbeta(10, .1, 1), rbeta(10, 1, 1) ) ) df_terms <- rbind( data.frame( term = "foo", gene = paste("g", 1:10, sep = "") ), data.frame( term = "bar", gene = paste("g", 11:20, sep = "") ) ) pareg(df_genes, df_terms, max_iterations = 10)
Declare Python packages needed to run this R package.
pareg_env
pareg_env
An object of class BasiliskEnvironment
of length 1.
Contains matrices for various pathway databases and similarity measures.
Note that the matrices are sparse, upper triangular and subsampled to
a maximum size of $1000x1000$ if necessary.
They can be transformed to a dense representation
using pareg::as_dense_sim
.
pathway_similarities
pathway_similarities
A list of lists of matrices. * Pathway database 1 * Similarity measure 1 * Similarity measure 2 * ... * Pathway database 2 * ...
Visualize pathway enrichments as network.
plot_pareg_with_args( x, show_term_names = TRUE, min_similarity = 0, term_subset = NULL )
plot_pareg_with_args( x, show_term_names = TRUE, min_similarity = 0, term_subset = NULL )
x |
An object of class |
show_term_names |
Whether to plot node labels. |
min_similarity |
Don't plot edges for similarities below this value. |
term_subset |
Subset of terms to show. |
ggplot object.
df_genes <- data.frame( gene = paste("g", 1:20, sep = ""), pvalue = c( rbeta(10, .1, 1), rbeta(10, 1, 1) ) ) df_terms <- rbind( data.frame( term = "foo", gene = paste("g", 1:10, sep = "") ), data.frame( term = "bar", gene = paste("g", 11:20, sep = "") ) ) fit <- pareg(df_genes, df_terms, max_iterations = 10) plot(fit)
df_genes <- data.frame( gene = paste("g", 1:20, sep = ""), pvalue = c( rbeta(10, .1, 1), rbeta(10, 1, 1) ) ) df_terms <- rbind( data.frame( term = "foo", gene = paste("g", 1:10, sep = "") ), data.frame( term = "bar", gene = paste("g", 11:20, sep = "") ) ) fit <- pareg(df_genes, df_terms, max_iterations = 10) plot(fit)
Check pareg::plot_pareg_with_args for details. Needed because of WARNING in "checking S3 generic/method consistency"
## S3 method for class 'pareg' plot(x, ...)
## S3 method for class 'pareg' plot(x, ...)
x |
An object of class |
... |
Parameters passed to pareg::plot_pareg_with_args |
ggplot object.
Choose similar object more often, depending on 'similarity_factor'.
similarity_sample(sim_mat, size, similarity_factor = 1)
similarity_sample(sim_mat, size, similarity_factor = 1)
sim_mat |
Similarity matrix with samples as row/col names. |
size |
How many samples to draw. |
similarity_factor |
Uniform sampling for 0. Weights mixture of uniform and similarity vector for each draw. |
Vector of samples.
similarity_sample(matrix(runif(100), nrow = 10, ncol = 10), 3)
similarity_sample(matrix(runif(100), nrow = 10, ncol = 10), 3)
Make (response) vector conform to Beta assumptions as described in section 2 of the betareg vignette https://cran.r-project.org/web/packages/betareg/vignettes/betareg.pdf.
transform_y(y)
transform_y(y)
y |
Numeric vector in [0, 1]^N |
Numeric vector in (0, 1)^N
transform_y(c(0, 0.5, 1))
transform_y(c(0, 0.5, 1))