Title: | Latent Embedding Multivariate Regression |
---|---|
Description: | Fit a latent embedding multivariate regression (LEMUR) model to multi-condition single-cell data. The model provides a parametric description of single-cell data measured with treatment vs. control or more complex experimental designs. The parametric model is used to (1) align conditions, (2) predict log fold changes between conditions for all cells, and (3) identify cell neighborhoods with consistent log fold changes. For those neighborhoods, a pseudobulked differential expression test is conducted to assess which genes are significantly changed. |
Authors: | Constantin Ahlmann-Eltze [aut, cre] |
Maintainer: | Constantin Ahlmann-Eltze <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.5.0 |
Built: | 2024-10-30 08:37:39 UTC |
Source: | https://github.com/bioc/lemur |
lemur_fit
Access values from a lemur_fit
## S3 method for class 'lemur_fit' .DollarNames(x, pattern = "") ## S4 method for signature 'lemur_fit' x$name ## S4 replacement method for signature 'lemur_fit' x$name <- value
## S3 method for class 'lemur_fit' .DollarNames(x, pattern = "") ## S4 method for signature 'lemur_fit' x$name ## S4 replacement method for signature 'lemur_fit' x$name <- value
x |
the |
pattern |
the pattern from looking up potential values interactively |
name |
the name of the value behind the dollar |
value |
the replacement value. This only works for |
The respective value stored in the lemur_fit
object.
lemur_fit
for more documentation on the
accessor functions.
Enforce additional alignment of cell clusters beyond the direct differential embedding
align_harmony( fit, design = fit$alignment_design, ridge_penalty = 0.01, max_iter = 10, ..., verbose = TRUE ) align_by_grouping( fit, grouping, design = fit$alignment_design, ridge_penalty = 0.01, preserve_position_of_NAs = FALSE, verbose = TRUE )
align_harmony( fit, design = fit$alignment_design, ridge_penalty = 0.01, max_iter = 10, ..., verbose = TRUE ) align_by_grouping( fit, grouping, design = fit$alignment_design, ridge_penalty = 0.01, preserve_position_of_NAs = FALSE, verbose = TRUE )
fit |
a |
design |
a specification of the design (matrix or formula) that is used
for the transformation. Default: |
ridge_penalty |
specification how much the flexibility of the transformation
should be regularized. Default: |
max_iter |
argument specific for |
... |
additional parameters that are passed on to relevant functions |
verbose |
Should the method print information during the fitting. Default: |
grouping |
argument specific for |
preserve_position_of_NAs |
argument specific for |
The fit
object with the updated fit$embedding
and fit$alignment_coefficients
.
data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) # Creating some grouping for illustration cell_types <- sample(c("tumor cell", "neuron", "leukocyte"), size = ncol(fit), replace = TRUE) fit_al1 <- align_by_grouping(fit, grouping = cell_types) # Alternatively, use harmony to automatically group cells fit_al2 <- align_harmony(fit) fit_al2 # The alignment coefficients are a 3D array fit_al2$alignment_coefficients
data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) # Creating some grouping for illustration cell_types <- sample(c("tumor cell", "neuron", "leukocyte"), size = ncol(fit), replace = TRUE) fit_al1 <- align_by_grouping(fit, grouping = cell_types) # Alternatively, use harmony to automatically group cells fit_al2 <- align_harmony(fit) fit_al2 # The alignment coefficients are a 3D array fit_al2$alignment_coefficients
Find differential expression neighborhoods
find_de_neighborhoods( fit, group_by, contrast = fit$contrast, selection_procedure = c("zscore", "contrast"), directions = c("random", "contrast", "axis_parallel"), min_neighborhood_size = 50, de_mat = SummarizedExperiment::assays(fit)[["DE"]], test_data = fit$test_data, test_data_col_data = NULL, test_method = c("glmGamPoi", "edgeR", "limma", "none"), continuous_assay_name = fit$use_assay, count_assay_name = "counts", size_factor_method = NULL, design = fit$design, alignment_design = fit$alignment_design, add_diff_in_diff = TRUE, make_neighborhoods_consistent = FALSE, skip_confounded_neighborhoods = FALSE, control_parameters = NULL, verbose = TRUE )
find_de_neighborhoods( fit, group_by, contrast = fit$contrast, selection_procedure = c("zscore", "contrast"), directions = c("random", "contrast", "axis_parallel"), min_neighborhood_size = 50, de_mat = SummarizedExperiment::assays(fit)[["DE"]], test_data = fit$test_data, test_data_col_data = NULL, test_method = c("glmGamPoi", "edgeR", "limma", "none"), continuous_assay_name = fit$use_assay, count_assay_name = "counts", size_factor_method = NULL, design = fit$design, alignment_design = fit$alignment_design, add_diff_in_diff = TRUE, make_neighborhoods_consistent = FALSE, skip_confounded_neighborhoods = FALSE, control_parameters = NULL, verbose = TRUE )
fit |
the |
group_by |
If the |
contrast |
a specification which contrast to fit. This defaults to the
|
selection_procedure |
specify the algorithm that is used to select the
neighborhoods for each gene. Broadly, |
directions |
a string to define the algorithm to select the direction onto
which the cells are projected before searching for the neighborhood.
|
min_neighborhood_size |
the minimum number of cells per neighborhood. Default: |
de_mat |
the matrix with the differential expression values and is only relevant if
|
test_data |
a |
test_data_col_data |
additional column data for the |
test_method |
choice of test for the pseudobulked differential expression. glmGamPoi and edgeR work on an count assay. limma works on the continuous assay. |
continuous_assay_name , count_assay_name
|
the assay or list names of |
size_factor_method |
Set the procedure to calculate the size factor after pseudobulking. This argument
is only relevant if |
design , alignment_design
|
the design to use for the fit. Default: |
add_diff_in_diff |
a boolean to specify if the log-fold change (plus significance) of
the DE in the neighborhood against the DE in the complement of the neighborhood is calculated.
If |
make_neighborhoods_consistent |
Include cells from outside the neighborhood if they are
at least 10 times in the k-nearest neighbors of the cells inside the neighborhood. Secondly,
remove cells from the neighborhood which are less than 10 times in the k-nearest neighbors of the
other cells in the neighborhood. Default |
skip_confounded_neighborhoods |
Sometimes the inferred neighborhoods are not limited to
a single cell state; this becomes problematic if the cells of the conditions compared in the contrast
are unequally distributed between the cell states. Default: |
control_parameters |
named list with additional parameters passed to underlying functions. |
verbose |
Should the method print information during the fitting. Default: |
a data frame with one entry per gene
name
The gene name.
neighborhood
A list column where each element is a vector with the cell names included in that neighborhood.
n_cells
the number of cells in the neighborhood (lengths(neighborhood)
).
sel_statistic
The statistic that is maximized by the selection_procedure
.
pval
, adj_pval
, t_statistic
, lfc
The p-value, Benjamini-Hochberg adjusted p-value (FDR), the
t-statistic, and the log2 fold change of the differential expression test defined by contrast
for the
cells inside the neighborhood (calculated using test_method
). Only present if test_data
is not NULL
.
did_pval
, did_adj_pval
, did_lfc
The measurement if the differential expression of the cells
inside the neighborhood is significantly different from the differential expression of the cells outside
the neighborhood. Only present if add_diff_in_diff = TRUE
.
data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) # Optional alignment # fit <- align_harmony(fit) fit <- test_de(fit, contrast = cond(condition = "panobinostat") - cond(condition = "ctrl")) nei <- find_de_neighborhoods(fit, group_by = vars(patient_id)) head(nei)
data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) # Optional alignment # fit <- align_harmony(fit) fit <- test_de(fit, contrast = cond(condition = "panobinostat") - cond(condition = "ctrl")) nei <- find_de_neighborhoods(fit, group_by = vars(patient_id)) head(nei)
glioblastoma_example_data
datasetThe dataset is a SingleCellExperiment
object subset to 5,000 cells and
300 genes. The colData
contain an entry for each cell from which patient
it came and to which treatment condition it belonged ("ctrl"
or "panobinostat"
).
The original data was collected by Zhao et al. (2021).
A SingleCellExperiment
object.
Zhao, Wenting, Athanassios Dovas, Eleonora Francesca Spinazzi, Hanna Mendes Levitin, Matei Alexandru Banu, Pavan Upadhyayula, Tejaswi Sudhakar, et al. “Deconvolution of Cell Type-Specific Drug Responses in Human Tumor Tissue with Single-Cell RNA-Seq.” Genome Medicine 13, no. 1 (December 2021): 82. https://doi.org/10.1186/s13073-021-00894-y.
Main function to fit the latent embedding multivariate regression (LEMUR) model
lemur( data, design = ~1, col_data = NULL, n_embedding = 15, linear_coefficient_estimator = c("linear", "mean", "cluster_median", "zero"), use_assay = "logcounts", test_fraction = 0.2, ..., verbose = TRUE )
lemur( data, design = ~1, col_data = NULL, n_embedding = 15, linear_coefficient_estimator = c("linear", "mean", "cluster_median", "zero"), use_assay = "logcounts", test_fraction = 0.2, ..., verbose = TRUE )
data |
a matrix with observations in the columns and features in the rows.
Or a |
design |
a formula referring to global objects or column in the |
col_data |
an optional data frame with |
n_embedding |
the dimension of the $k$-plane that is rotated through space. |
linear_coefficient_estimator |
specify which estimator is used to center the conditions.
|
use_assay |
if |
test_fraction |
the fraction of cells that are split of before the model fit to keep an
independent set of test observations. Alternatively, a logical vector of length |
... |
additional parameters that are passed on to the internal function |
verbose |
Should the method print information during the fitting. Default: |
An object of class lemur_fit
which extends SingleCellExperiment
. Accordingly,
all functions that work for sce
's also work for lemur_fit
's. In addition, we
give easy access to the fitted values using the dollar notation (e.g., fit$embedding
).
For details see the lemur_fit help page.
Ahlmann-Eltze, C. & Huber, W. (2023). Analysis of multi-condition single-cell data with latent embedding multivariate regression. bioRxiv https://doi.org/10.1101/2023.03.06.531268
align_by_grouping
, align_harmony
, test_de
, find_de_neighborhoods
data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5) fit
data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5) fit
lemur_fit
classThe lemur_fit
class extends SingleCellExperiment
and provides
additional accessors to get the values of the values produced by lemur
.
## S4 method for signature 'lemur_fit,ANY,ANY,ANY' x[i, j, ..., drop = TRUE] ## S4 method for signature 'lemur_fit' design(object)
## S4 method for signature 'lemur_fit,ANY,ANY,ANY' x[i, j, ..., drop = TRUE] ## S4 method for signature 'lemur_fit' design(object)
x , i , j , ... , drop
|
the |
object |
the |
To access the values produced by lemur
, use the dollar notation ($
):
fit$n_embedding
the number of embedding dimensions.
fit$design
the specification of the design in lemur
. Usually this is a stats::formula
.
fit$base_point
a matrix (nrow(fit) * fit$n_embedding
) with the base point for the Grassmann exponential map.
fit$coefficients
a three-dimensional tensor (nrow(fit) * fit$n_embedding * ncol(fit$design_matrix)
) with the coefficients for
the exponential map.
fit$embedding
a matrix (fit$n_embedding * ncol(fit)
) with the low dimensional position for each cell.
fit$design_matrix
a matrix with covariates for each cell (ncol(fit) * ncol(fit$design_matrix)
).
fit$linear_coefficients
a matrix (nrow(fit) * ncol(fit$design_matrix)
) with the coefficients for the linear regression.
fit$alignment_coefficients
a 3D tensor with the coefficients for the alignment (fit$n_embedding * fit$n_embedding * ncol(fit$design_matrix)
)
fit$alignment_design
an alternative design specification for the alignment. This is typically a stats::formula
.
fit$alignment_design_matrix
an alternative design matrix specification for the alignment.
fit$contrast
a parsed version of the contrast specification from the test_de
function or NULL
.
fit$colData
the column annotation DataFrame
.
fit$rowData
the row annotation DataFrame
.
An object of class lemur_fit
.
# The easiest way to make a lemur_fit object, is to call `lemur` data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) fit$n_embedding fit$embedding[,1:10] fit$n_embedding fit$embedding[,1:10] fit$design_matrix[1:10,] fit$coefficients[1:3,,]
# The easiest way to make a lemur_fit object, is to call `lemur` data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) fit$n_embedding fit$embedding[,1:10] fit$n_embedding fit$embedding[,1:10] fit$design_matrix[1:10,] fit$coefficients[1:3,,]
lemur_fit
objectPredict values from lemur_fit
object
## S3 method for class 'lemur_fit' predict( object, newdata = NULL, newdesign = NULL, newcondition = NULL, embedding = object$embedding, with_linear_model = TRUE, with_embedding = TRUE, with_alignment = TRUE, ... )
## S3 method for class 'lemur_fit' predict( object, newdata = NULL, newdesign = NULL, newcondition = NULL, embedding = object$embedding, with_linear_model = TRUE, with_embedding = TRUE, with_alignment = TRUE, ... )
object |
an |
newdata |
a data.frame which passed to |
newdesign |
a matrix with the covariates for which the output
is predicted. If |
newcondition |
an unquoted expression with a call to |
embedding |
the low-dimensional cell position for which the output is predicted. |
with_linear_model |
a boolean to indicate if the linear regression offset is included in the prediction. |
with_embedding |
a boolean to indicate if the embedding contributes to the output. |
with_alignment |
a boolean to indicate if the alignment effect is removed from the output. |
... |
additional parameters passed to |
A matrix with the same dimension nrow(object) * nrow(newdesign)
.
data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) pred <- predict(fit) pred_ctrl <- predict(fit, newdesign = c(1, 0, 0, 0, 0, 0)) pred_trt <- predict(fit, newdesign = c(1, 0, 0, 0, 0, 1)) # This is the same as the test_de result fit <- test_de(fit, cond(condition = "panobinostat") - cond(condition = "ctrl")) all.equal(SummarizedExperiment::assay(fit, "DE"), pred_trt - pred_ctrl, check.attributes = FALSE)
data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) pred <- predict(fit) pred_ctrl <- predict(fit, newdesign = c(1, 0, 0, 0, 0, 0)) pred_trt <- predict(fit, newdesign = c(1, 0, 0, 0, 0, 1)) # This is the same as the test_de result fit <- test_de(fit, cond(condition = "panobinostat") - cond(condition = "ctrl")) all.equal(SummarizedExperiment::assay(fit, "DE"), pred_trt - pred_ctrl, check.attributes = FALSE)
Project new data onto the latent spaces of an existing lemur fit
project_on_lemur_fit( fit, data, col_data = NULL, use_assay = "logcounts", design = fit$design, alignment_design = fit$alignment_design, return = c("matrix", "lemur_fit") )
project_on_lemur_fit( fit, data, col_data = NULL, use_assay = "logcounts", design = fit$design, alignment_design = fit$alignment_design, return = c("matrix", "lemur_fit") )
fit |
an |
data |
a matrix with observations in the columns and features in the rows.
Or a |
col_data |
col_data an optional data frame with |
use_assay |
if |
design , alignment_design
|
the design formulas or design matrices that are used
to project the data on the correct latent subspace. Both default to the designs
from the |
return |
which data structure is returned. |
Either a matrix with the low-dimensional embeddings of the data
or
an object of class lemur_fit
wrapping that embedding.
data(glioblastoma_example_data) subset1 <- glioblastoma_example_data[,1:2500] subset2 <- glioblastoma_example_data[,2501:5000] fit <- lemur(subset1, design = ~ condition, n_emb = 5, test_fraction = 0, verbose = FALSE) # Returns a `lemur_fit` object with the projection of `subset2` fit2 <- project_on_lemur_fit(fit, subset2, return = "lemur_fit") fit2
data(glioblastoma_example_data) subset1 <- glioblastoma_example_data[,1:2500] subset2 <- glioblastoma_example_data[,2501:5000] fit <- lemur(subset1, design = ~ condition, n_emb = 5, test_fraction = 0, verbose = FALSE) # Returns a `lemur_fit` object with the projection of `subset2` fit2 <- project_on_lemur_fit(fit, subset2, return = "lemur_fit") fit2
lemur_fit
objectPredict values from lemur_fit
object
## S4 method for signature 'lemur_fit' residuals(object, with_linear_model = TRUE, with_embedding = TRUE, ...)
## S4 method for signature 'lemur_fit' residuals(object, with_linear_model = TRUE, with_embedding = TRUE, ...)
object |
an |
with_linear_model |
a boolean to indicate if the linear regression offset is included in the prediction. |
with_embedding |
a boolean to indicate if the embedding contributes to the output. |
... |
ignored. |
A matrix with the same dimension dim(object)
.
data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) resid <- residuals(fit) dim(resid)
data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) resid <- residuals(fit) dim(resid)
Predict log fold changes between conditions for each cell
test_de( fit, contrast, embedding = NULL, consider = c("embedding+linear", "embedding", "linear"), new_assay_name = "DE" )
test_de( fit, contrast, embedding = NULL, consider = c("embedding+linear", "embedding", "linear"), new_assay_name = "DE" )
fit |
the result of calling |
contrast |
Specification of the contrast: a call to |
embedding |
matrix of size |
consider |
specify which part of the model are considered for the differential expression test. |
new_assay_name |
the name of the assay added to the |
If is.null(embedding)
the fit
object with a new assay called "DE"
. Otherwise
return a matrix with the differential expression values.
library(SummarizedExperiment) library(SingleCellExperiment) data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) # Optional alignment # fit <- align_harmony(fit) fit <- test_de(fit, contrast = cond(condition = "panobinostat") - cond(condition = "ctrl")) # The fit object contains a new assay called "DE" assayNames(fit) # The DE assay captures differences between conditions is_ctrl_cond <- fit$colData$condition == "ctrl" mean(logcounts(fit)[1,!is_ctrl_cond]) - mean(logcounts(fit)[1,is_ctrl_cond]) mean(assay(fit, "DE")[1,])
library(SummarizedExperiment) library(SingleCellExperiment) data(glioblastoma_example_data) fit <- lemur(glioblastoma_example_data, design = ~ patient_id + condition, n_emb = 5, verbose = FALSE) # Optional alignment # fit <- align_harmony(fit) fit <- test_de(fit, contrast = cond(condition = "panobinostat") - cond(condition = "ctrl")) # The fit object contains a new assay called "DE" assayNames(fit) # The DE assay captures differences between conditions is_ctrl_cond <- fit$colData$condition == "ctrl" mean(logcounts(fit)[1,!is_ctrl_cond]) - mean(logcounts(fit)[1,is_ctrl_cond]) mean(assay(fit, "DE")[1,])
Differential embedding for each condition
test_global( fit, contrast, reduced_design = NULL, consider = c("embedding+linear", "embedding", "linear"), variance_est = c("analytical", "resampling", "none"), verbose = TRUE, ... )
test_global( fit, contrast, reduced_design = NULL, consider = c("embedding+linear", "embedding", "linear"), variance_est = c("analytical", "resampling", "none"), verbose = TRUE, ... )
fit |
the result of calling |
contrast |
Specification of the contrast: a call to |
reduced_design |
an alternative specification of the null hypothesis. |
consider |
specify which part of the model are considered for the differential expression test. |
variance_est |
How or if the variance should be estimated. |
verbose |
should the method print information during the fitting. Default: |
... |
additional arguments. |
a data.frame