Title: | TOP Constructs Transferable Model Across Gene Expression Platforms |
---|---|
Description: | TOP constructs a transferable model across gene expression platforms for prospective experiments. Such a transferable model can be trained to make predictions on independent validation data with an accuracy that is similar to a re-substituted model. The TOP procedure also has the flexibility to be adapted to suit the most common clinical response variables, including linear response, binomial and Cox PH models. |
Authors: | Harry Robertson [aut, cre] , Nicholas Robertson [aut] |
Maintainer: | Harry Robertson <[email protected]> |
License: | GPL-3 |
Version: | 1.7.0 |
Built: | 2024-11-30 05:37:25 UTC |
Source: | https://github.com/bioc/TOP |
coefNetworkPlot
coefNetworkPlot(TOP_model, nFeatures = 20, s = "lambda.min")
coefNetworkPlot(TOP_model, nFeatures = 20, s = "lambda.min")
TOP_model |
A Transferable Omics Prediction model. THe output from the TOP_model function. |
nFeatures |
The number of features that will be plotted. Default: 20 |
s |
Lambda value for the lasso model. Default is "lambda.min" |
A coefNetwork plot
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) coefNetworkPlot(model) #' @import ggplot2
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) coefNetworkPlot(model) #' @import ggplot2
The expit function
expit(x)
expit(x)
x |
numeric |
The expit of x
curve(expit, from = -5, to = 5)
curve(expit, from = -5, to = 5)
A function that implements feature selection, using limma, from a list of data frames with corresponding labels.
filterFeatures( x_list, y_list, contrast = NULL, nFeatures = 50, combinationMethod = "OSP" )
filterFeatures( x_list, y_list, contrast = NULL, nFeatures = 50, combinationMethod = "OSP" )
x_list |
A list of data frames, with columns corresponding to features and rows corresponding to observations. |
y_list |
A list of factor labels. |
contrast |
A character vector describing which order of levels to contrast in y_list ("disease - control"), Default: NULL |
nFeatures |
Number of features to return, Default: 50 |
combinationMethod |
Which p-value combination method to use, Default: 'OSP' Options are 'Stouffer', 'OSP', 'Fisher', 'maxP'. |
contrast must be a character vector of length 1. If contrast is NULL, the first level of the first factor in y_list will be used as the reference level.
A vector of feature names.
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 x_list <- list(x1, x2, x3) y_list <- list(TOP_data_binary$y1, TOP_data_binary$y2, TOP_data_binary$y3) y_list <- y_list <- lapply(y_list, function(x) { x <- factor(x, levels = c("1", "0"), labels = c("Yes", "No")) }) filterFeatures( x_list, y_list, contrast = "Yes - No", nFeatures = 10, combinationMethod = "OSP" )
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 x_list <- list(x1, x2, x3) y_list <- list(TOP_data_binary$y1, TOP_data_binary$y2, TOP_data_binary$y3) y_list <- y_list <- lapply(y_list, function(x) { x <- factor(x, levels = c("1", "0"), labels = c("Yes", "No")) }) filterFeatures( x_list, y_list, contrast = "Yes - No", nFeatures = 10, combinationMethod = "OSP" )
Compute pairwise difference between matrix columns
pairwise_col_diff(x)
pairwise_col_diff(x)
x |
A data matrix of size n times p. Where rows are observations and columns are features. |
A matrix of size n times (p choose 2), where each column is the difference between two of the original columns.
n <- 1 p <- 4 x <- matrix(rep(seq_len(p), n), nrow = n, ncol = p, byrow = TRUE) colnames(x) <- paste0("X", seq_len(p)) pairwise_col_diff(x)
n <- 1 p <- 4 x <- matrix(rep(seq_len(p), n), nrow = n, ncol = p, byrow = TRUE) colnames(x) <- paste0("X", seq_len(p)) pairwise_col_diff(x)
A function to calculate the external performance of the Tranferable Omics Prediction model.
performance_TOP(TOP_model, newx, newy, covariates = NULL, s = "lambda.min")
performance_TOP(TOP_model, newx, newy, covariates = NULL, s = "lambda.min")
TOP_model |
This is the output of the function TOP_model. |
newx |
A matrix of the new data to be predicted. With the same number of feature columns as the original data. |
newy |
A vector of the true labels that are being predicted. With the same number of samples as newx. |
covariates |
A data.frame of the same covariates as the original TOP model, Default: NULL |
s |
Lambda used in the lasso model, Default: 'lambda.min' |
A confusion matrix that displays the performance of the classifier.
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x_list <- list(x1,x2) y_list <- list(TOP_data_binary$y1, TOP_data_binary$y2) model <- TOP_model(x_list, y_list) x3 <- TOP_data_binary$x3 y3 <- TOP_data_binary$y3 performance_TOP(model$models, newx = x3, newy = y3)
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x_list <- list(x1,x2) y_list <- list(TOP_data_binary$y1, TOP_data_binary$y2) model <- TOP_model(x_list, y_list) x3 <- TOP_data_binary$x3 y3 <- TOP_data_binary$y3 performance_TOP(model$models, newx = x3, newy = y3)
A prediction function for the Trasferable Omics Prediction model.
predict_TOP(TOP_model, newx, covariates = NULL, s = "lambda.min")
predict_TOP(TOP_model, newx, covariates = NULL, s = "lambda.min")
TOP_model |
The output from the TOP_model function. |
newx |
A matrix of the new data to be predicted. The columns should be features and the rows should be samples. |
covariates |
A data frame of the same covariates that were used in the TOP model, Default: NULL |
s |
Lambda value for the lasso model, Default: 'lambda.min' |
A vector of predictions for the new data.
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) predictions <- predict_TOP(model$models, newx = x3)
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) predictions <- predict_TOP(model$models, newx = x3)
A function visualizes the performance of a classifier by plotting the Receiver Operating Characteristic (ROC) curve.
ROC_Plot(roc_list)
ROC_Plot(roc_list)
roc_list |
A list of roc objects from the pROC package |
A ROC Plot
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) pred <- predict_TOP(model$models, newx = x3) roc <- pROC::roc(y3, pred) ROC_Plot(list(roc))
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) pred <- predict_TOP(model$models, newx = x3) roc <- pROC::roc(y3, pred) ROC_Plot(list(roc))
simplenetworkPlot
simplenetworkPlot(TOP_model, nFeatures = 50, s = "lambda.min")
simplenetworkPlot(TOP_model, nFeatures = 50, s = "lambda.min")
TOP_model |
A Transferable Omics Prediction model. The output from the TOP_model function. |
nFeatures |
The number of features that will be plotted. Default: 20 |
s |
Lambda value for the lasso model. Default is "lambda.min" |
A simple network plot
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) simplenetworkPlot(model)
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) simplenetworkPlot(model)
FUNCTION_DESCRIPTION
Surv_TOP_CI(TOP_survival, newx, newy)
Surv_TOP_CI(TOP_survival, newx, newy)
TOP_survival |
A TOP_survival model. See |
newx |
A new data.frame to predict the survival time. |
newy |
A data.frame, where the first columns in each data frame is the time and the second column is the event status. |
An object of class concordance
data(TOP_data_binary, package = "TOP") time <- rpois(300, c(600, 1000)) surv <- sample(c(0, 1), 300, replace = TRUE) y <- data.frame(time, surv) batch <- rep(paste0("y", 1:3), c(100, 100, 100)) y_list <- y |> split(batch) x_list <- list(TOP_data_binary$x1, TOP_data_binary$x2, TOP_data_binary$x3) surv_model <- TOP_survival(x_list[-3], y_list[-3], nFeatures = 10) Surv_TOP_CI(surv_model, newx = x_list[[3]], newy = y_list[[3]])
data(TOP_data_binary, package = "TOP") time <- rpois(300, c(600, 1000)) surv <- sample(c(0, 1), 300, replace = TRUE) y <- data.frame(time, surv) batch <- rep(paste0("y", 1:3), c(100, 100, 100)) y_list <- y |> split(batch) x_list <- list(TOP_data_binary$x1, TOP_data_binary$x2, TOP_data_binary$x3) surv_model <- TOP_survival(x_list[-3], y_list[-3], nFeatures = 10) Surv_TOP_CI(surv_model, newx = x_list[[3]], newy = y_list[[3]])
TOP_coefPlot
TOP_coefPlot(TOP_model, nFeatures = 20, s = "lambda.min")
TOP_coefPlot(TOP_model, nFeatures = 20, s = "lambda.min")
TOP_model |
A Transferable Omics Prediction model. THe output from the TOP_model function. |
nFeatures |
The number of features that will be plotted. Default: 20 |
s |
Lambda value for the lasso model, Default: 'lambda.min' |
A TOP coeff plot
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) TOP_coefPlot(model)
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) TOP_coefPlot(model)
A simulated binary data
data("TOP_data_binary")
data("TOP_data_binary")
A list with columns:
A matrix of size 100x20, each column has mean 1 and sd 1
A matrix of size 100x20, each column has mean 2 and sd 1
A matrix of size 100x20, each column has mean 3 and sd 1
A factor vector of 0's and 1's, created by beta and x1
A factor vector of 0's and 1's, created by beta and x2
A factor vector of 0's and 1's, created by beta and x3
A vector with first 10 entries drawn from random unif(-1, 1), otherwise 0's.
The example data.
TOP_lambdaPlot
TOP_lambdaPlot( TOP_model, nFeatures = 20, s = "lambda.min", interactive = FALSE, label = FALSE )
TOP_lambdaPlot( TOP_model, nFeatures = 20, s = "lambda.min", interactive = FALSE, label = FALSE )
TOP_model |
A Transferable Omics Prediction model. The output from the TOP_model function. |
nFeatures |
The number of features to plot, features are ranked beta's for lambda.min. Default: 20 |
s |
Lambda value for the lasso model. Default is "lambda.min" |
interactive |
A boolean indicaitng whether the plot should be interactive. Defaults to FALSE . |
label |
A boolean indicating whether the features should be labeled on the plot. Defaults to FALSE . |
A TOP lambda plot
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) TOP_lambdaPlot(model)
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list) TOP_lambdaPlot(model)
The main function of the TOP package. This function returns a glmnet model .
TOP_model( x_list, y_list, covariates = NULL, dataset_weights = NULL, sample_weights = FALSE, optimiseExponent = FALSE, nCores = 1 )
TOP_model( x_list, y_list, covariates = NULL, dataset_weights = NULL, sample_weights = FALSE, optimiseExponent = FALSE, nCores = 1 )
x_list |
a list of data frames, each containing the data for a single batch or dataset. Columns should be features and rows should be observations. |
y_list |
a list of factors, each containing the labels for a single batch or dataset. The length of this list should be the same as the length of x_list. |
covariates |
a list of data frames with the covariates that should be included in the model, Default: NULL |
dataset_weights |
a list of data frames that refer to any grouping structure in the batches, Default: NULL |
sample_weights |
Should each batch we weighted equally? This is important in unequal sample sizes, Default: FALSE |
optimiseExponent |
Should the exponent used to modufy the lasso weights be optimised using resubstitution?, Default: FALSE |
nCores |
A numeric specifying the number of cores used if the user wants to use parallelisation, Default: 1 |
Returns a list with the following elements: models, which is a glmnet object and features, which is a list of the features used in each model.
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list)
data(TOP_data_binary, package = "TOP") x1 <- TOP_data_binary$x1 x2 <- TOP_data_binary$x2 x3 <- TOP_data_binary$x3 y1 <- TOP_data_binary$y1 y2 <- TOP_data_binary$y2 y3 <- TOP_data_binary$y3 set.seed(23) x_list <- list(x1, x2) y_list <- list(factor(y1), factor(y2)) model <- TOP_model(x_list, y_list)
FUNCTION_DESCRIPTION
TOP_survival( x_list, y_list, nFeatures = 50, dataset_weights = NULL, sample_weights = FALSE, nCores = 1 )
TOP_survival( x_list, y_list, nFeatures = 50, dataset_weights = NULL, sample_weights = FALSE, nCores = 1 )
x_list |
A list of data frames, each containing the data for a single batch or dataset. Columns are features and rows are observations. |
y_list |
A list of data frames, where the first columns in each data frame is the time and the second column is the event status. The length of this list should be the same as the length of x_list. |
nFeatures |
Number of features to return, Default: 50 |
dataset_weights |
a list of data frames that refer to any grouping structure in the batches, Default: NULL |
sample_weights |
Should each batch we weighted equally? This is important in unequal sample sizes, Default: FALSE |
nCores |
A numeric specifying the number of cores used if the user wants to use parallelisation, Default: 1 |
DETAILS
A cox net model
data(TOP_data_binary, package = "TOP") time <- rpois(300, c(600, 1000)) surv <- sample(c(0, 1), 300, replace = TRUE) y <- data.frame(time, surv) batch <- rep(paste0("y", 1:3), c(100, 100, 100)) y_list <- y |> split(batch) x_list <- list(TOP_data_binary$x1, TOP_data_binary$x2, TOP_data_binary$x3) TOP_survival(x_list[-3], y_list[-3], nFeatures = 10)
data(TOP_data_binary, package = "TOP") time <- rpois(300, c(600, 1000)) surv <- sample(c(0, 1), 300, replace = TRUE) y <- data.frame(time, surv) batch <- rep(paste0("y", 1:3), c(100, 100, 100)) y_list <- y |> split(batch) x_list <- list(TOP_data_binary$x1, TOP_data_binary$x2, TOP_data_binary$x3) TOP_survival(x_list[-3], y_list[-3], nFeatures = 10)
A prediction function for TOP_survival
TOP_survivalPrediction(TOP_survival, newx)
TOP_survivalPrediction(TOP_survival, newx)
TOP_survival |
A TOP_survival model. See |
newx |
A new dataset to predict the survival time. |
A vector of predicted survival time.
data(TOP_data_binary, package = "TOP") time <- rpois(300, c(600, 1000)) surv <- sample(c(0, 1), 300, replace = TRUE) y <- data.frame(time, surv) batch <- rep(paste0("y", 1:3), c(100, 100, 100)) y_list <- y |> split(batch) x_list <- list(TOP_data_binary$x1, TOP_data_binary$x2, TOP_data_binary$x3) surv_model <- TOP_survival(x_list[-3], y_list[-3], nFeatures = 10) TOP_survivalPrediction(surv_model, newx = x_list[[3]])
data(TOP_data_binary, package = "TOP") time <- rpois(300, c(600, 1000)) surv <- sample(c(0, 1), 300, replace = TRUE) y <- data.frame(time, surv) batch <- rep(paste0("y", 1:3), c(100, 100, 100)) y_list <- y |> split(batch) x_list <- list(TOP_data_binary$x1, TOP_data_binary$x2, TOP_data_binary$x3) surv_model <- TOP_survival(x_list[-3], y_list[-3], nFeatures = 10) TOP_survivalPrediction(surv_model, newx = x_list[[3]])