Calculate Z-score for motif frequencies
calculate_Z(observed = NULL, nulls = NULL)
calculate_Z(observed = NULL, nulls = NULL)
observed |
A list of observed motif frequencies for each motif type. List elements must be named 'lambda', 'bifan', 'V', 'PPI_V', and 'delta' (not necessarily in that order). |
nulls |
A list of null distributions for each motif type
as returned by |
A numeric vector with the Z-score for each motif type.
# Simulating it for test purposes null <- rnorm(1000, mean = 5, sd = 1) nulls <- list( lambda = null, V = null, PPI_V = null, delta = null, bifan = null ) observed <- list(lambda = 7, bifan = 13, delta = 9, V = 5, PPI_V = 10) z <- calculate_Z(observed, nulls) # Check for motif enrichment (Z > 5) z[which(z > 5)]
# Simulating it for test purposes null <- rnorm(1000, mean = 5, sd = 1) nulls <- list( lambda = null, V = null, PPI_V = null, delta = null, bifan = null ) observed <- list(lambda = 7, bifan = 13, delta = 9, V = 5, PPI_V = 10) z <- calculate_Z(observed, nulls) # Check for motif enrichment (Z > 5) z[which(z > 5)]
Find bifan motifs
find_bifan( edgelist = NULL, paralogs = NULL, lambda_vec = NULL, count_only = FALSE )
find_bifan( edgelist = NULL, paralogs = NULL, lambda_vec = NULL, count_only = FALSE )
edgelist |
A 2-column data frame with regulators in column 1 and targets in column 2. It can be ignored if you give lambda motifs to parameter lambda_vec (recommended). |
paralogs |
A 2-column data frame with gene IDs for each paralog in the paralog pair. |
lambda_vec |
A character of lambda motifs as returned
by |
count_only |
Logical indicating whether the function should return only motif counts as a numeric scalar. If FALSE, it will return a character vector of motifs. Default: FALSE. |
A character vector with bifan motifs represented in the format regulator1, regulator2->target1, target2.
data(gma_grn) data(gma_paralogs) edgelist <- gma_grn[1:50000, 1:2] paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] paralogs <- rbind( paralogs, data.frame(duplicate1 = "Glyma.01G177200", duplicate2 = "Glyma.08G116700") ) lambda_vec <- find_lambda(edgelist, paralogs) bifan <- find_bifan(paralogs = paralogs, lambda_vec = lambda_vec)
data(gma_grn) data(gma_paralogs) edgelist <- gma_grn[1:50000, 1:2] paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] paralogs <- rbind( paralogs, data.frame(duplicate1 = "Glyma.01G177200", duplicate2 = "Glyma.08G116700") ) lambda_vec <- find_lambda(edgelist, paralogs) bifan <- find_bifan(paralogs = paralogs, lambda_vec = lambda_vec)
Find delta motifs
find_delta( edgelist = NULL, paralogs = NULL, edgelist_ppi = NULL, lambda_vec = NULL, count_only = FALSE )
find_delta( edgelist = NULL, paralogs = NULL, edgelist_ppi = NULL, lambda_vec = NULL, count_only = FALSE )
edgelist |
A 2-column data frame with regulators in column 1 and targets in column 2. It can be ignored if you give lambda motifs to parameter lambda_vec (recommended). |
paralogs |
A 2-column data frame with gene IDs for each paralog in the paralog pair. It can be ignored if you give lambda motifs to parameter lambda_vec (recommended). |
edgelist_ppi |
A 2-column data frame with IDs of genes that encode each protein in the interacting pair. |
lambda_vec |
A character of lambda motifs as returned
by |
count_only |
Logical indicating whether the function should return only motif counts as a numeric scalar. If FALSE, it will return a character vector of motifs. Default: FALSE. |
A character vector with lambda motifs represented in the format target1<-regulator->target2.
data(gma_grn) data(gma_paralogs) data(gma_ppi) edgelist <- gma_grn[500:1000, 1:2] # reducing for test purposes edgelist <- gma_grn[1:10000, 1:2] paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] edgelist_ppi <- gma_ppi lambda_vec <- find_lambda(edgelist, paralogs) motifs <- find_delta(edgelist_ppi = edgelist_ppi, lambda_vec = lambda_vec)
data(gma_grn) data(gma_paralogs) data(gma_ppi) edgelist <- gma_grn[500:1000, 1:2] # reducing for test purposes edgelist <- gma_grn[1:10000, 1:2] paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] edgelist_ppi <- gma_ppi lambda_vec <- find_lambda(edgelist, paralogs) motifs <- find_delta(edgelist_ppi = edgelist_ppi, lambda_vec = lambda_vec)
Find lambda motifs
find_lambda(edgelist = NULL, paralogs = NULL, count_only = FALSE)
find_lambda(edgelist = NULL, paralogs = NULL, count_only = FALSE)
edgelist |
A 2-column data frame with regulators in column 1 and targets in column 2. |
paralogs |
A 2-column data frame with gene IDs for each paralog in the paralog pair. |
count_only |
Logical indicating whether the function should return only motif counts as a numeric scalar. If FALSE, it will return a character vector of motifs. Default: FALSE. |
A character vector with lambda motifs represented in the format target1<-regulator->target2.
data(gma_grn) data(gma_paralogs) edgelist <- gma_grn[500:1000, 1:2] # reducing for test purposes paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] motifs <- find_lambda(edgelist, paralogs)
data(gma_grn) data(gma_paralogs) edgelist <- gma_grn[500:1000, 1:2] # reducing for test purposes paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] motifs <- find_lambda(edgelist, paralogs)
Find V motifs in protein-protein interactions
find_ppi_v(edgelist = NULL, paralogs = NULL, count_only = FALSE)
find_ppi_v(edgelist = NULL, paralogs = NULL, count_only = FALSE)
edgelist |
A 2-column data frame with protein 1 in column 1 and protein 2 in column 2. |
paralogs |
A 2-column data frame with gene IDs for each paralog in the paralog pair. |
count_only |
Logical indicating whether the function should return only motif counts as a numeric scalar. If FALSE, it will return a character vector of motifs. Default: FALSE. |
This function aims to find the number of paralogous gene pairs that share an interaction partner.
A character vector with V motifs represented in the format paralog1-partner-paralog2.
data(gma_ppi) data(gma_paralogs) edgelist <- gma_ppi paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] motifs <- find_ppi_v(edgelist, paralogs)
data(gma_ppi) data(gma_paralogs) edgelist <- gma_ppi paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] motifs <- find_ppi_v(edgelist, paralogs)
Find V motifs
find_v(edgelist = NULL, paralogs = NULL, count_only = FALSE)
find_v(edgelist = NULL, paralogs = NULL, count_only = FALSE)
edgelist |
A 2-column data frame with regulators in column 1 and targets in column 2. |
paralogs |
A 2-column data frame with gene IDs for each paralog in the paralog pair. |
count_only |
Logical indicating whether the function should return only motif counts as a numeric scalar. If FALSE, it will return a character vector of motifs. Default: FALSE. |
A character vector with V motifs represented in the format regulator1->target<-regulator2.
data(gma_grn) data(gma_paralogs) edgelist <- gma_grn[2000:4000, 1:2] # reducing for test purposes paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] motifs <- find_v(edgelist, paralogs)
data(gma_grn) data(gma_paralogs) edgelist <- gma_grn[2000:4000, 1:2] # reducing for test purposes paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] motifs <- find_v(edgelist, paralogs)
Generate null distributions of motif counts for each motif type
generate_nulls( edgelist = NULL, paralogs = NULL, edgelist_ppi = NULL, n = 1000, bp_param = BiocParallel::SerialParam() )
generate_nulls( edgelist = NULL, paralogs = NULL, edgelist_ppi = NULL, n = 1000, bp_param = BiocParallel::SerialParam() )
edgelist |
A 2-column data frame with regulators in column 1 and targets in column 2. |
paralogs |
A 2-column data frame with gene IDs for each paralog in the paralog pair. |
edgelist_ppi |
A 2-column data frame with IDs of genes that encode each protein in the interacting pair. |
n |
Number of degree-preserving simulated networks to generate. Default: 1000. |
bp_param |
BiocParallel back-end to be used. Default: BiocParallel::SerialParam(). |
A list of numeric vectors named lambda
, delta
, V
,
PPI_V
, and bifan
, containing the null distribution of motif counts
for each motif type.
set.seed(123) data(gma_grn) data(gma_paralogs) data(gma_ppi) edgelist <- gma_grn[500:1000, 1:2] # reducing for test purposes paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] edgelist_ppi <- gma_ppi n <- 2 # small n for demonstration purposes generate_nulls(edgelist, paralogs, edgelist_ppi, n)
set.seed(123) data(gma_grn) data(gma_paralogs) data(gma_ppi) edgelist <- gma_grn[500:1000, 1:2] # reducing for test purposes paralogs <- gma_paralogs[gma_paralogs$type == "WGD", 1:2] edgelist_ppi <- gma_ppi n <- 2 # small n for demonstration purposes generate_nulls(edgelist, paralogs, edgelist_ppi, n)
The GRN was inferred with BioNERO using expression data from Libault et al., 2010, and Severin et al., 2010.
data(gma_grn)
data(gma_grn)
A 3-column data frame with node1, node2, and edge weight.
Severin, A. J., Woody, J. L., Bolon, Y. T., Joseph, B., Diers, B. W., Farmer, A. D., ... & Shoemaker, R. C. (2010). RNA-Seq Atlas of Glycine max: a guide to the soybean transcriptome. BMC plant biology, 10(1), 1-16.
Libault, M., Farmer, A., Joshi, T., Takahashi, K., Langley, R. J., Franklin, L. D., ... & Stacey, G. (2010). An integrated transcriptome atlas of the crop model Glycine max, and its use in comparative analyses in plants. The Plant Journal, 63(1), 86-99.
data(gma_grn)
data(gma_grn)
The repertoire of soybean paralogs was retrieved from Almeida-Silva et al., 2020.
data(gma_paralogs)
data(gma_paralogs)
A 3-column data frame with duplicate 1, duplicate 2, and duplication type
Almeida-Silva, F., Moharana, K. C., Machado, F. B., & Venancio, T. M. (2020). Exploring the complexity of soybean (Glycine max) transcriptional regulation using global gene co-expression networks. Planta, 252(6), 1-12.
data(gma_paralogs)
data(gma_paralogs)
PPI were retrieved from the STRING database and filtered to keep only medium confidence edges and nodes in the GRN.
data(gma_ppi)
data(gma_ppi)
A 2-column data frame with node1 and node2.
data(gma_ppi)
data(gma_ppi)
Data were filtered exactly as demonstrated in the vignette. Briefly, the top 30k edges from the GRN were kept, and only WGD-derived gene pairs were used.
data(nulls)
data(nulls)
A list of numeric vectors with the motif frequencies in each simulated network. List elements are named lambda, delta, V, PPI_V, and bifan, and each element has length 100.
data(nulls)
data(nulls)
Calculate Sorensen-Dice similarity between paralogous gene pairs
sd_similarity(edgelist = NULL, paralogs = NULL)
sd_similarity(edgelist = NULL, paralogs = NULL)
edgelist |
A 2-column data frame with regulators in column 1 and targets in column 2. |
paralogs |
A 2-column data frame with gene IDs for each paralog in the paralog pair. |
A data frame containing the paralogous gene pairs and their Sorensen-Dice similarity scores.
data(gma_ppi) data(gma_paralogs) edgelist <- gma_ppi paralogs <- gma_paralogs sim <- sd_similarity(edgelist, paralogs)
data(gma_ppi) data(gma_paralogs) edgelist <- gma_ppi paralogs <- gma_paralogs sim <- sd_similarity(edgelist, paralogs)