Title: | Scoring and Marker Selection Method Based on Modified TF-IDF |
---|---|
Description: | This package enables automated selection of group specific signature, especially for rare population. The package is developed for generating specifc lists of signature genes based on Term Frequency-Inverse Document Frequency (TF-IDF) modified methods. It can also be used as a new gene-set scoring method or data transformation method. Multiple visualization functions are implemented in this package. |
Authors: | Jinjin Chen [aut, cre] |
Maintainer: | Jinjin Chen <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.3.2 |
Built: | 2024-12-13 03:44:11 UTC |
Source: | https://github.com/bioc/smartid |
compute TF (term/feature frequency), IDF (inverse document/cell frequency), IAE (inverse average expression of features) and combine the the final score
cal_score( data, tf = c("logtf", "tf"), idf = "prob", iae = "prob", slot = "counts", new.slot = "score", par.idf = NULL, par.iae = NULL ) ## S4 method for signature 'AnyMatrix' cal_score( data, tf = c("logtf", "tf"), idf = "prob", iae = "prob", par.idf = NULL, par.iae = NULL ) ## S4 method for signature 'SummarizedExperiment' cal_score( data, tf = c("logtf", "tf"), idf = "prob", iae = "prob", slot = "counts", new.slot = "score", par.idf = NULL, par.iae = NULL )
cal_score( data, tf = c("logtf", "tf"), idf = "prob", iae = "prob", slot = "counts", new.slot = "score", par.idf = NULL, par.iae = NULL ) ## S4 method for signature 'AnyMatrix' cal_score( data, tf = c("logtf", "tf"), idf = "prob", iae = "prob", par.idf = NULL, par.iae = NULL ) ## S4 method for signature 'SummarizedExperiment' cal_score( data, tf = c("logtf", "tf"), idf = "prob", iae = "prob", slot = "counts", new.slot = "score", par.idf = NULL, par.iae = NULL )
data |
an expression object, can be matrix or SummarizedExperiment |
tf |
a character, specify the TF method to use, can be "tf" or "logtf" |
idf |
a character, specify the IDF method to use. Available methods can
be accessed using |
iae |
a character, specify the IAE method to use. Available methods can
be accessed using |
slot |
a character, specify which slot to use when data is se object, optional, default 'counts' |
new.slot |
a character, specify the name of slot to save score in se object, optional, default 'score' |
par.idf |
other parameters for specified IDF methods |
par.iae |
other parameters for specified IAE methods |
A list of matrices or se object containing combined score
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) cal_score( data, par.idf = list(label = sample(c("A", "B"), 10, replace = TRUE)), par.iae = list(label = sample(c("A", "B"), 10, replace = TRUE)) )
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) cal_score( data, par.idf = list(label = sample(c("A", "B"), 10, replace = TRUE)), par.iae = list(label = sample(c("A", "B"), 10, replace = TRUE)) )
Calculate score for each feature in each cell
cal_score_init( expr, tf = c("logtf", "tf"), idf = "prob", iae = "prob", par.idf = NULL, par.iae = NULL )
cal_score_init( expr, tf = c("logtf", "tf"), idf = "prob", iae = "prob", par.idf = NULL, par.iae = NULL )
expr |
a count matrix, features in row and cells in column |
tf |
a character, specify the TF method to use, can be "tf" or "logtf" |
idf |
a character, specify the IDF method to use. Available methods can
be accessed using |
iae |
a character, specify the IAE method to use. Available methods can
be accessed using |
par.idf |
other parameters for specified IDF methods |
par.iae |
other parameters for specified IAE methods |
a list of combined score, tf, idf and iae
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) label <- sample(c("A", "B"), 10, replace = TRUE) smartid:::cal_score_init(data, par.idf = list(label = label), par.iae = list(label = label) )
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) label <- sample(c("A", "B"), 10, replace = TRUE) smartid:::cal_score_init(data, par.idf = list(label = label), par.iae = list(label = label) )
compute overall score based on the given marker list
gs_score(data, features = NULL, slot = "score", suffix = "score") ## S4 method for signature 'AnyMatrix,ANY' gs_score(data, features = NULL) ## S4 method for signature 'AnyMatrix,list' gs_score(data, features = NULL, suffix = "score") ## S4 method for signature 'SummarizedExperiment,ANY' gs_score(data, features = NULL, slot = "score", suffix = "score")
gs_score(data, features = NULL, slot = "score", suffix = "score") ## S4 method for signature 'AnyMatrix,ANY' gs_score(data, features = NULL) ## S4 method for signature 'AnyMatrix,list' gs_score(data, features = NULL, suffix = "score") ## S4 method for signature 'SummarizedExperiment,ANY' gs_score(data, features = NULL, slot = "score", suffix = "score")
data |
an expression object, can be matrix or SummarizedExperiment |
features |
vector or named list, feature names to compute score |
slot |
a character, specify which slot to use when data is se object, optional, default 'score' |
suffix |
a character, specify the name suffix to save score when features is a named list |
A vector of overall score for each sample
data <- matrix(rnorm(100), 10, dimnames = list(seq_len(10))) gs_score(data, features = seq_len(3))
data <- matrix(rnorm(100), 10, dimnames = list(seq_len(10))) gs_score(data, features = seq_len(3))
Calculate scores of each cell on given features
gs_score_init(score, features = NULL)
gs_score_init(score, features = NULL)
score |
matrix, features in row and samples in column |
features |
vector, feature names to compute score |
a vector of score
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) gs_score_init(data, 1:5)
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) gs_score_init(data, 1:5)
standard inverse average expression
iae(expr, features = NULL, thres = 0)
iae(expr, features = NULL, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the total number of cells,
is the counts of
feature
in cell
.
a vector of inverse average expression score for each feature
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae(data)
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae(data)
inverse average expression using hdbscan cluster as label
iae_hdb(expr, features = NULL, multi = TRUE, thres = 0, minPts = 2, ...)
iae_hdb(expr, features = NULL, multi = TRUE, thres = 0, minPts = 2, ...)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
multi |
logical, if to compute based on binary (FALSE) or multi-class (TRUE) |
thres |
numeric, cell only counts when expr > threshold, default 0 |
minPts |
integer, minimum size of clusters, default 2.
Details in |
... |
parameters for |
Details as iae_prob()
.
a matrix of inverse average expression score
set.seed(123) data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_hdb(data)
set.seed(123) data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_hdb(data)
labeled inverse average expression: IGM
iae_igm(expr, features = NULL, label, lambda = 7, thres = 0)
iae_igm(expr, features = NULL, label, lambda = 7, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
label |
vector, group label of each cell |
lambda |
numeric, hyperparameter for IGM |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the hyper parameter,
is the counts
of feature
in cell
within class
, and
is the
rank of
.
a vector of inverse gravity moment score for each feature
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_igm(data, label = sample(c("A", "B"), 10, replace = TRUE))
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_igm(data, label = sample(c("A", "B"), 10, replace = TRUE))
inverse average expression: max
iae_m(expr, features = NULL, thres = 0)
iae_m(expr, features = NULL, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the feature
and
is the feature except
,
is the counts of feature
in cell
, and
is
.
a matrix of inverse average expression score for each feature
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_m(data)
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_m(data)
labeled inverse average expression: probability based
iae_prob(expr, features = NULL, label, multi = TRUE, thres = 0)
iae_prob(expr, features = NULL, label, multi = TRUE, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
label |
vector, group label of each cell |
multi |
logical, if to compute based on binary (FALSE) or multi-class (TRUE) |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the counts of feature
in cell
within class
, and
is the class except
.
a matrix of inverse average expression score
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_prob(data, label = sample(c("A", "B"), 10, replace = TRUE))
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_prob(data, label = sample(c("A", "B"), 10, replace = TRUE))
labeled inverse average expression: relative frequency
iae_rf(expr, features = NULL, label, multi = TRUE, thres = 0)
iae_rf(expr, features = NULL, label, multi = TRUE, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
label |
vector, group label of each cell |
multi |
logical, if to compute based on binary (FALSE) or multi-class (TRUE) |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the counts of feature
in cell
within class
, and
is the class except
.
a matrix of inverse average expression score
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_rf(data, label = sample(c("A", "B"), 10, replace = TRUE))
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_rf(data, label = sample(c("A", "B"), 10, replace = TRUE))
inverse average expression using standard deviation (SD)
iae_sd(expr, features = NULL, log = FALSE, thres = 0)
iae_sd(expr, features = NULL, log = FALSE, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
log |
logical, if to do log-transformation |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the term frequency of feature
, see details in
tf()
, is the total number of cells and
is the counts
of feature
in cell
.
a vector of inverse average expression score for each feature
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_sd(data)
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::iae_sd(data)
standard inverse cell frequency
idf(expr, features = NULL, thres = 0)
idf(expr, features = NULL, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the total number of cells,
is the number of cells
containing feature i.
a vector of inverse cell frequency score for each feature
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf(data)
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf(data)
inverse document frequency using hdbscan cluster as label
idf_hdb(expr, features = NULL, multi = TRUE, thres = 0, minPts = 2, ...)
idf_hdb(expr, features = NULL, multi = TRUE, thres = 0, minPts = 2, ...)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
multi |
logical, if to compute based on binary (FALSE) or multi-class (TRUE) |
thres |
numeric, cell only counts when expr > threshold, default 0 |
minPts |
integer, minimum size of clusters, default 2.
Details in |
... |
parameters for |
Details as idf_prob()
.
a matrix of inverse cell frequency score
set.seed(123) data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_hdb(data)
set.seed(123) data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_hdb(data)
Returns a named vector of IDF/IAE methods
idf_iae_methods()
idf_iae_methods()
names of methods implemented
idf_iae_methods()
idf_iae_methods()
labeled inverse cell frequency: IGM
idf_igm(expr, features = NULL, label, lambda = 7, thres = 0)
idf_igm(expr, features = NULL, label, lambda = 7, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
label |
vector, group label of each cell |
lambda |
numeric, hyperparameter for IGM |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the hyper parameter,
is the number
of cells containing feature
in class
,
is the rank
of
.
a vector of inverse gravity moment score for each feature
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_igm(data, label = sample(c("A", "B"), 10, replace = TRUE))
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_igm(data, label = sample(c("A", "B"), 10, replace = TRUE))
inverse cell frequency: max
idf_m(expr, features = NULL, thres = 0)
idf_m(expr, features = NULL, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the feature
and
is the feature except
,
is the number of cells containing feature i, and
is the number of cells containing feature
.
a matrix of inverse cell frequency score for each feature
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_m(data)
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_m(data)
labeled inverse cell frequency: probability based
idf_prob(expr, features = NULL, label, multi = TRUE, thres = 0)
idf_prob(expr, features = NULL, label, multi = TRUE, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
label |
vector, group label of each cell |
multi |
logical, if to compute based on binary (FALSE) or multi-class (TRUE) |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the number of cells containing feature
in
class
,
is the total number of cells in class
,
is the class except
.
a matrix of inverse cell frequency score
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_prob(data, label = sample(c("A", "B"), 10, replace = TRUE))
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_prob(data, label = sample(c("A", "B"), 10, replace = TRUE))
labeled inverse cell frequency: relative frequency
idf_rf(expr, features = NULL, label, multi = TRUE, thres = 0)
idf_rf(expr, features = NULL, label, multi = TRUE, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
label |
vector, group label of each cell |
multi |
logical, if to compute based on binary (FALSE) or multi-class (TRUE) |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the number of cells containing feature
in
class
,
is the total number of cells in class
,
is the class except
.
a matrix of inverse cell frequency score
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_rf(data, label = sample(c("A", "B"), 10, replace = TRUE))
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_rf(data, label = sample(c("A", "B"), 10, replace = TRUE))
inverse cell frequency using standard deviation (SD)
idf_sd(expr, features = NULL, log = FALSE, thres = 0)
idf_sd(expr, features = NULL, log = FALSE, thres = 0)
expr |
a matrix, features in row and cells in column |
features |
vector, feature names or indexes to compute |
log |
logical, if to do log-transformation |
thres |
numeric, cell only counts when expr > threshold, default 0 |
where is the term frequency of feature
, see details in
tf()
, is the total number of cells and
is the number of
cells containing feature
.
a vector of inverse cell frequency score for each feature
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_sd(data)
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::idf_sd(data)
select markers using HDBSCAN method
markers_hdbscan( top_markers, column = ".dot", s_thres = NULL, method = c("max.one", "remove.min"), minPts = 5, plot = FALSE, ... )
markers_hdbscan( top_markers, column = ".dot", s_thres = NULL, method = c("max.one", "remove.min"), minPts = 5, plot = FALSE, ... )
top_markers |
output of |
column |
character, specify which column used as group label |
s_thres |
NULL or numeric, only features with score > threshold will be returned, if NULL will use 2 * average probability as threshold |
method |
can be "max.one" or "remove.min", if to only keep features in 1st component or return features not in the last component |
minPts |
integer, minimum size of clusters for |
plot |
logical, if to plot mixture density and hist |
... |
other params for |
a list of markers for each group
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) top_n <- top_markers(data, label = rep(c("A", "B"), 5)) markers_hdbscan(top_n, minPts = 2)
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) top_n <- top_markers(data, label = rep(c("A", "B"), 5)) markers_hdbscan(top_n, minPts = 2)
select markers using mclust EM method
markers_mclust( top_markers, column = ".dot", prob = 0.99, s_thres = NULL, method = c("max.one", "remove.min"), plot = FALSE, ... )
markers_mclust( top_markers, column = ".dot", prob = 0.99, s_thres = NULL, method = c("max.one", "remove.min"), plot = FALSE, ... )
top_markers |
output of |
column |
character, specify which column used as group label |
prob |
numeric, probability cutoff for 1st component classification |
s_thres |
NULL or numeric, only features with score > threshold will be returned, if NULL will use 2 * average probability as threshold |
method |
can be "max.one" or "remove.min", if to only keep features in 1st component or return features not in the last component |
plot |
logical, if to plot mixture density and hist |
... |
other params for |
a list of markers for each group
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) top_n <- top_markers(data, label = rep(c("A", "B"), 5)) markers_mclust(top_n)
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) top_n <- top_markers(data, label = rep(c("A", "B"), 5)) markers_mclust(top_n)
select markers using mixtools EM method
markers_mixmdl( top_markers, column = ".dot", prob = 0.99, k = 3, ratio = 2, dist = c("norm", "gamma"), maxit = 1e+05, plot = FALSE, ... )
markers_mixmdl( top_markers, column = ".dot", prob = 0.99, k = 3, ratio = 2, dist = c("norm", "gamma"), maxit = 1e+05, plot = FALSE, ... )
top_markers |
output of |
column |
character, specify which column used as group label |
prob |
numeric, probability cutoff for 1st component classification |
k |
integer, number of components of mixtures |
ratio |
numeric, ratio cutoff of 1st component mu to 2nd component mu, only when ratio > cutoff will return markers for the group |
dist |
can be one of "norm" and "gamma", specify if to use
|
maxit |
integer, maximum number of iterations for EM |
plot |
logical, if to plot mixture density and hist |
... |
other params for |
a list of markers for each group
set.seed(1000) data <- matrix(rnorm(100), 10, dimnames = list(1:10)) top_n <- top_markers(data, label = rep(c("A", "B"), 5)) markers_mixmdl(top_n, k = 3)
set.seed(1000) data <- matrix(rnorm(100), 10, dimnames = list(1:10)) top_n <- top_markers(data, label = rep(c("A", "B"), 5)) markers_mixmdl(top_n, k = 3)
boxplot of features overall score
ova_score_boxplot(data, features, ref.group, label, method = "t.test")
ova_score_boxplot(data, features, ref.group, label, method = "t.test")
data |
matrix, features in row and samples in column |
features |
vector, feature names to plot |
ref.group |
character, reference group name |
label |
vector, group labels |
method |
character, statistical test to use,
details in |
ggplot object
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) ova_score_boxplot(data, 1:5, ref.group = "A", label = rep(c("A", "B"), 5))
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) ova_score_boxplot(data, 1:5, ref.group = "A", label = rep(c("A", "B"), 5))
scale by mean of group mean for imbalanced data
scale_mgm(expr, label, pooled.sd = FALSE)
scale_mgm(expr, label, pooled.sd = FALSE)
expr |
matrix |
label |
a vector of group label |
pooled.sd |
logical, if to use pooled SD for scaling |
where is the mean of x in
class, and
is
the number of classes,
is the standard deviation of x,
when
pooled.sd
is set to be TRUE, will be replaced with
,
scaled matrix
scale_mgm(matrix(rnorm(100), 10), label = rep(letters[1:2], 5))
scale_mgm(matrix(rnorm(100), 10), label = rep(letters[1:2], 5))
barplot of processed score
score_barplot(top_markers, column = ".dot", f_list, n = 30)
score_barplot(top_markers, column = ".dot", f_list, n = 30)
top_markers |
output of |
column |
character, specify which column used as group label |
f_list |
a named list of markers |
n |
numeric, number of returned top genes for each group |
ggplot object
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) top_n <- top_markers(data, label = rep(c("A", "B"), 5)) score_barplot(top_n)
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) top_n <- top_markers(data, label = rep(c("A", "B"), 5)) score_barplot(top_n)
splatter
.A SingleCellExperiment object containing 4 groups with each group up-regulated DEGs saved in metadata.
data(sim_sce_test)
data(sim_sce_test)
A SingleCellExperiment object of 100genes * 400 cells.
SingleCellExperiment
boxplot of split single feature score
sin_score_boxplot(data, features = NULL, ref.group, label, method = "t.test")
sin_score_boxplot(data, features = NULL, ref.group, label, method = "t.test")
data |
matrix, features in row and samples in column |
features |
vector, feature names to plot |
ref.group |
character, reference group name |
label |
vector, group labels |
method |
character, statistical test to use,
details in |
faceted ggplot object
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) sin_score_boxplot(data, 1:2, ref.group = "A", label = rep(c("A", "B"), 5))
data <- matrix(rnorm(100), 10, dimnames = list(1:10)) sin_score_boxplot(data, 1:2, ref.group = "A", label = rep(c("A", "B"), 5))
compute term/feature frequency within each cell
tf(expr, log = FALSE)
tf(expr, log = FALSE)
expr |
a count matrix, features in row and cells in column |
log |
logical, if to do log-transformation |
where is the counts of feature i in cell j.
a matrix of term/gene frequency
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::tf(data)
data <- matrix(rpois(100, 2), 10, dimnames = list(1:10)) smartid:::tf(data)
scale and transform score and output top markers for groups
top_markers( data, label, n = 10, use.glm = TRUE, batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, slot = "score", ... ) ## S4 method for signature 'AnyMatrix' top_markers( data, label, n = 10, use.glm = TRUE, batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, slot = "score", ... ) ## S4 method for signature 'SummarizedExperiment' top_markers( data, label, n = 10, use.glm = TRUE, batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, slot = "score", ... )
top_markers( data, label, n = 10, use.glm = TRUE, batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, slot = "score", ... ) ## S4 method for signature 'AnyMatrix' top_markers( data, label, n = 10, use.glm = TRUE, batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, slot = "score", ... ) ## S4 method for signature 'SummarizedExperiment' top_markers( data, label, n = 10, use.glm = TRUE, batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, slot = "score", ... )
data |
an expression object, can be matrix or SummarizedExperiment |
label |
a vector of group label |
n |
integer, number of returned top genes for each group |
use.glm |
logical, if to use |
batch |
a vector of batch labels, default NULL |
scale |
logical, if to scale data by row |
use.mgm |
logical, if to scale data using |
softmax |
logical, if to apply softmax transformation on output |
slot |
a character, specify which slot to use when data is se object, optional, default 'score' |
... |
params for |
A tibble with top n feature names, group labels and ordered scores
data <- matrix(rgamma(100, 2), 10, dimnames = list(1:10)) top_markers(data, label = rep(c("A", "B"), 5))
data <- matrix(rgamma(100, 2), 10, dimnames = list(1:10)) top_markers(data, label = rep(c("A", "B"), 5))
calculate group median, MAD or mean score and order genes based on scores
top_markers_abs( data, label, n = 10, pooled.sd = FALSE, method = c("median", "mad", "mean"), scale = TRUE, use.mgm = TRUE, softmax = TRUE, tau = 1 )
top_markers_abs( data, label, n = 10, pooled.sd = FALSE, method = c("median", "mad", "mean"), scale = TRUE, use.mgm = TRUE, softmax = TRUE, tau = 1 )
data |
matrix, features in row and samples in column |
label |
a vector of group label |
n |
integer, number of returned top genes for each group |
pooled.sd |
logical, if to use pooled SD for scaling |
method |
character, specify metric to compute, can be one of "median", "mad", "mean" |
scale |
logical, if to scale data by row |
use.mgm |
logical, if to scale data using |
softmax |
logical, if to apply softmax transformation on output |
tau |
numeric, hyper parameter for softmax |
a tibble with feature names, group labels and ordered processed scores
data <- matrix(rgamma(100, 2), 10, dimnames = list(1:10)) top_markers_abs(data, label = rep(c("A", "B"), 5))
data <- matrix(rgamma(100, 2), 10, dimnames = list(1:10)) top_markers_abs(data, label = rep(c("A", "B"), 5))
calculate group mean score using glm and order genes based on scores difference
top_markers_glm( data, label, n = 10, family = gaussian(), batch = NULL, scale = TRUE, use.mgm = TRUE, pooled.sd = FALSE, softmax = TRUE, tau = 1 )
top_markers_glm( data, label, n = 10, family = gaussian(), batch = NULL, scale = TRUE, use.mgm = TRUE, pooled.sd = FALSE, softmax = TRUE, tau = 1 )
data |
matrix, features in row and samples in column |
label |
a vector of group label |
n |
integer, number of returned top genes for each group |
family |
family for glm, details in |
batch |
a vector of batch labels, default NULL |
scale |
logical, if to scale data by row |
use.mgm |
logical, if to scale data using |
pooled.sd |
logical, if to use pooled SD for scaling |
softmax |
logical, if to apply softmax transformation on output |
tau |
numeric, hyper parameter for softmax |
a tibble with feature names, group labels and ordered processed scores
data <- matrix(rgamma(100, 2), 10, dimnames = list(1:10)) top_markers_glm(data, label = rep(c("A", "B"), 5))
data <- matrix(rgamma(100, 2), 10, dimnames = list(1:10)) top_markers_glm(data, label = rep(c("A", "B"), 5))
compute group summarized score and order genes based on processed scores
top_markers_init( data, label, n = 10, use.glm = TRUE, batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, ... )
top_markers_init( data, label, n = 10, use.glm = TRUE, batch = NULL, scale = TRUE, use.mgm = TRUE, softmax = TRUE, ... )
data |
matrix, features in row and samples in column |
label |
a vector of group label |
n |
integer, number of returned top genes for each group |
use.glm |
logical, if to use |
batch |
a vector of batch labels, default NULL |
scale |
logical, if to scale data by row |
use.mgm |
logical, if to scale data using |
softmax |
logical, if to apply softmax transformation on output |
... |
params for |
a tibble with feature names, group labels and ordered processed scores
data <- matrix(rgamma(100, 2), 10, dimnames = list(1:10)) top_markers_init(data, label = rep(c("A", "B"), 5))
data <- matrix(rgamma(100, 2), 10, dimnames = list(1:10)) top_markers_init(data, label = rep(c("A", "B"), 5))