Title: | Functional annotation-driven unsupervised clustering for single-cell data |
---|---|
Description: | ASURAT is a software for single-cell data analysis. Using ASURAT, one can simultaneously perform unsupervised clustering and biological interpretation in terms of cell type, disease, biological process, and signaling pathway activity. Inputting a single-cell RNA-seq data and knowledge-based databases, such as Cell Ontology, Gene Ontology, KEGG, etc., ASURAT transforms gene expression tables into original multivariate tables, termed sign-by-sample matrices (SSMs). |
Authors: | Keita Iida [aut, cre] , Johannes Nicolaus Wibisana [ctb] |
Maintainer: | Keita Iida <[email protected]> |
License: | GPL-3 + file LICENSE |
Version: | 1.11.0 |
Built: | 2024-10-30 03:45:37 UTC |
Source: | https://github.com/bioc/ASURAT |
This function adds metadata of variables and samples.
add_metadata(sce = NULL, mitochondria_symbol = NULL)
add_metadata(sce = NULL, mitochondria_symbol = NULL)
sce |
A SingleCellExperiment object. |
mitochondria_symbol |
A string representing for mitochondrial genes. This function computes percents of reads that map to the mitochondrial genes. Examples are '^MT-', '^mt-', etc. |
A SingleCellExperiment object.
data(pbmc_eg) pbmc <- add_metadata(sce = pbmc_eg, mitochondria_symbol = "^MT-")
data(pbmc_eg) pbmc <- add_metadata(sce = pbmc_eg, mitochondria_symbol = "^MT-")
ASURAT is a software for single-cell data analysis. Using ASURAT, one can simultaneously perform unsupervised clustering and biological interpretation in terms of cell type, disease, biological process, and signaling pathway activity. Inputting a single-cell RNA-seq data and knowledge-based databases, such as Cell Ontology, Gene Ontology, KEGG, etc., ASURAT transforms gene expression tables into original multivariate tables, termed sign-by-sample matrices (SSMs).
Perform bubble sorting, counting the number of steps.
bubble_sort(listdata)
bubble_sort(listdata)
listdata |
A list of vector and integer. For example, in R code, listdata = list(vec = c(1, 0, 1, ...), cnt = 0). The integer (cnt = 0) is the initial number of steps for bubble sorting. |
A List.
bubble_sort(list(vec = c(1, 1, 0), cnt = 0))
bubble_sort(list(vec = c(1, 1, 0), cnt = 0))
This function clusters each functional gene set into strongly, variably, and weakly correlated gene sets.
cluster_genesets(sce = NULL, cormat = NULL, th_posi = NULL, th_nega = NULL)
cluster_genesets(sce = NULL, cormat = NULL, th_posi = NULL, th_nega = NULL)
sce |
A SingleCellExperiment object. |
cormat |
A correlation matrix of gene expressions. |
th_posi |
A threshold of positive correlation coefficient. |
th_nega |
A threshold of negative correlation coefficient. |
A SingleCellExperiment object.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) # The results are stored in `metadata(pbmcs$GO)$sign`.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) # The results are stored in `metadata(pbmcs$GO)$sign`.
This function computes separation indices for each cluster versus the others.
compute_sepI_all(sce = NULL, labels = NULL, nrand_samples = NULL)
compute_sepI_all(sce = NULL, labels = NULL, nrand_samples = NULL)
sce |
A SingleCellExperiment object. |
labels |
A vector of labels of all the samples (cells). |
nrand_samples |
An integer for the number of samples used for random sampling, which samples at least one sample per cluster. |
A SingleCellExperiment object.
data(pbmcs_eg) labels <- SummarizedExperiment::colData(pbmcs_eg$GO)$seurat_clusters pbmcs_eg$GO <- compute_sepI_all(sce = pbmcs_eg$GO, labels = labels, nrand_samples = 10) # The results are stored in `metadata(pbmcs_eg$GO)$marker_signs`.
data(pbmcs_eg) labels <- SummarizedExperiment::colData(pbmcs_eg$GO)$seurat_clusters pbmcs_eg$GO <- compute_sepI_all(sce = pbmcs_eg$GO, labels = labels, nrand_samples = 10) # The results are stored in `metadata(pbmcs_eg$GO)$marker_signs`.
This function computes separation indices of sign scores for given two clusters.
compute_sepI_clusters( sce = NULL, labels = NULL, nrand_samples = NULL, ident_1 = NULL, ident_2 = NULL )
compute_sepI_clusters( sce = NULL, labels = NULL, nrand_samples = NULL, ident_1 = NULL, ident_2 = NULL )
sce |
A SingleCellExperiment object. |
labels |
A vector of labels of all the samples. |
nrand_samples |
An integer for the number of samples used for random sampling, which samples at least one sample per cluster. |
ident_1 |
Label names identifying cluster numbers, e.g., ident_1 = 1, ident_1 = c(1, 3). |
ident_2 |
Label names identifying cluster numbers, e.g., ident_2 = 2, ident_2 = c(2, 4). |
A SingleCellExperiment object.
data(pbmcs_eg) labels <- SummarizedExperiment::colData(pbmcs_eg$GO)$seurat_clusters pbmcs_eg$GO <- compute_sepI_clusters(sce = pbmcs_eg$GO, labels = labels, nrand_samples = 10, ident_1 = 1, ident_2 = c(0, 2)) # The results are stored in `metadata(pbmcs_eg$GO)$marker_signs`.
data(pbmcs_eg) labels <- SummarizedExperiment::colData(pbmcs_eg$GO)$seurat_clusters pbmcs_eg$GO <- compute_sepI_clusters(sce = pbmcs_eg$GO, labels = labels, nrand_samples = 10, ident_1 = 1, ident_2 = c(0, 2)) # The results are stored in `metadata(pbmcs_eg$GO)$marker_signs`.
This function define signs for strongly and variably correlated gene sets.
create_signs(sce = NULL, min_cnt_strg = 2, min_cnt_vari = 2)
create_signs(sce = NULL, min_cnt_strg = 2, min_cnt_vari = 2)
sce |
A SingleCellExperiment object. |
min_cnt_strg |
An integer for the cutoff value for strongly correlated gene sets. |
min_cnt_vari |
An integer for the cutoff value for variably correlated gene sets. |
A SingleCellExperiment object.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) pbmcs$GO <- create_signs(sce = pbmcs$GO, min_cnt_strg = 2, min_cnt_vari = 2) # The results are stored in `metadata(pbmcs$GO)$sign_all`.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) pbmcs$GO <- create_signs(sce = pbmcs$GO, min_cnt_strg = 2, min_cnt_vari = 2) # The results are stored in `metadata(pbmcs$GO)$sign_all`.
A list of small Cell Ontology and MSigDB databases for human.
human_COMSig_eg
human_COMSig_eg
A list of dataframe.
A list of small Gene Ontology database for human.
human_GO_eg
human_GO_eg
A list of dataframe.
A list of small KEGG database for human.
human_KEGG_eg
human_KEGG_eg
A list of dataframe.
This function creates a new SingleCellExperiment object for sign-by-sample matrices (SSM) by concatenating SSMs for strongly and variably correlated gene sets.
makeSignMatrix(sce = NULL, weight_strg = 0.5, weight_vari = 0.5)
makeSignMatrix(sce = NULL, weight_strg = 0.5, weight_vari = 0.5)
sce |
A SingleCellExperiment object. |
weight_strg |
A weight parameter for strongly correlated gene sets. |
weight_vari |
A weight parameter for variably correlated gene sets. |
A SingleCellExperiment object.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) pbmcs$GO <- create_signs(sce = pbmcs$GO, min_cnt_strg = 2, min_cnt_vari = 2) pbmcs$GO <- makeSignMatrix(sce = pbmcs$GO, weight_strg = 0.5, weight_vari = 0.5) # The resutls can be check by, e.g., assay(pbmcs$GO, "counts").
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) pbmcs$GO <- create_signs(sce = pbmcs$GO, min_cnt_strg = 2, min_cnt_vari = 2) pbmcs$GO <- makeSignMatrix(sce = pbmcs$GO, weight_strg = 0.5, weight_vari = 0.5) # The resutls can be check by, e.g., assay(pbmcs$GO, "counts").
A SingleCellExperiment object, including 50 genes and 50 cells. The original data "4k PBMCs from a Healthy Donor" was downloaded from 10x Genomics database.
pbmc_eg
pbmc_eg
SingleCellExperiment object.
https://support.10xgenomics.com/single-cell-gene-expression
A list of SingleCellExperiment objects, consisting of small sign-by-sample matrices, pbmcs_eg$CM (using Cell Ontology and MSigDB databases), pbmcs_eg$GO (using Gene Ontology database), and pbmcs_eg$KG (KEGG). Here, pbmcs_eg$CM, pbmcs_eg$GO, and pbmcs_eg$KG include 87, 72, and 64 signs, respectively, and 50 cells.
pbmcs_eg
pbmcs_eg
A list of SingleCellExperiment objects.
This function visualizes a three-dimensional data with labels and colors.
plot_dataframe3D( dataframe3D = NULL, labels = NULL, colors = NULL, theta = 30, phi = 30, title = "", xlabel = "", ylabel = "", zlabel = "" )
plot_dataframe3D( dataframe3D = NULL, labels = NULL, colors = NULL, theta = 30, phi = 30, title = "", xlabel = "", ylabel = "", zlabel = "" )
dataframe3D |
A dataframe with three columns. |
labels |
NULL or a vector of labels of all the samples, corresponding to colors. |
colors |
NULL or a vector of colors of all the samples, corresponding to labels. |
theta |
Angle of the plot. |
phi |
Angle of the plot. |
title |
Title. |
xlabel |
x-axis label. |
ylabel |
y-axis label. |
zlabel |
z-axis label. |
A scatter3D object in plot3D package.
data(pbmcs_eg) mat <- SingleCellExperiment::reducedDim(pbmcs_eg$CM, "UMAP")[, 1:3] dataframe3D <- as.data.frame(mat) labels <- SummarizedExperiment::colData(pbmcs_eg$CM)$seurat_clusters plot_dataframe3D(dataframe3D = dataframe3D, labels = labels, colors = NULL, theta = 45, phi = 20, title = "PBMC (CO & MSigDB)", xlabel = "UMAP_1", ylabel = "UMAP_2", zlabel = "UMAP_3")
data(pbmcs_eg) mat <- SingleCellExperiment::reducedDim(pbmcs_eg$CM, "UMAP")[, 1:3] dataframe3D <- as.data.frame(mat) labels <- SummarizedExperiment::colData(pbmcs_eg$CM)$seurat_clusters plot_dataframe3D(dataframe3D = dataframe3D, labels = labels, colors = NULL, theta = 45, phi = 20, title = "PBMC (CO & MSigDB)", xlabel = "UMAP_1", ylabel = "UMAP_2", zlabel = "UMAP_3")
This function visualizes multivariate data by heatmaps.
plot_multiheatmaps( ssm_list = NULL, gem_list = NULL, ssmlabel_list = NULL, gemlabel_list = NULL, nrand_samples = NULL, show_row_names = FALSE, title = NULL )
plot_multiheatmaps( ssm_list = NULL, gem_list = NULL, ssmlabel_list = NULL, gemlabel_list = NULL, nrand_samples = NULL, show_row_names = FALSE, title = NULL )
ssm_list |
A list of sign-by-sample matrices. |
gem_list |
A list of gene-by-sample matrices. |
ssmlabel_list |
NULL or a list of dataframes of sample (cell) labels and colors. The length of the list must be as same as that of ssm_list, and the order of labels in each list must be as same as those in ssm_list. |
gemlabel_list |
NULL or a list of dataframes of sample (cell) annotations and colors. The length of the list must be as same as that of gem_list, and the order of labels in each list must be as same as those in gem_list. |
nrand_samples |
Number of samples (cells) used for random sampling. |
show_row_names |
TRUE or FALSE: if TRUE, row names are shown. |
title |
Title. |
A ComplexHeatmap object.
data(pbmcs_eg) mat_CM <- SummarizedExperiment::assay(pbmcs_eg$CM, "counts") mat_GO <- SummarizedExperiment::assay(pbmcs_eg$GO, "counts") mat_KG <- SummarizedExperiment::assay(pbmcs_eg$KG, "counts") ssm_list <- list(SSM_COMSig = mat_CM, SSM_GO = mat_GO, SSM_KEGG = mat_KG) se <- SingleCellExperiment::altExp(pbmcs_eg$CM, "logcounts") mat <- SummarizedExperiment::assay(se, "counts") se <- SingleCellExperiment::altExp(pbmcs_eg$CM, "logcounts") gem_list <- list(GeneExpr = SummarizedExperiment::assay(se, "counts")) labels <- list() ; ssmlabel_list <- list() for(i in seq_along(pbmcs_eg)){ fa <- SummarizedExperiment::colData(pbmcs_eg[[i]])$seurat_clusters labels[[i]] <- data.frame(label = fa) colors <- rainbow(length(unique(labels[[i]]$label)))[labels[[i]]$label] labels[[i]]$color <- colors ssmlabel_list[[i]] <- labels[[i]] } names(ssmlabel_list) <- c("Label_COMSig", "Label_GO", "Label_KEGG") phases <- SummarizedExperiment::colData(pbmcs_eg$CM)$Phase label_CC <- data.frame(label = phases, color = NA) gemlabel_list <- list(CellCycle = label_CC) plot_multiheatmaps(ssm_list = ssm_list, gem_list = gem_list, ssmlabel_list = ssmlabel_list, gemlabel_list = gemlabel_list, nrand_samples = 50, show_row_names = FALSE, title = "PBMC")
data(pbmcs_eg) mat_CM <- SummarizedExperiment::assay(pbmcs_eg$CM, "counts") mat_GO <- SummarizedExperiment::assay(pbmcs_eg$GO, "counts") mat_KG <- SummarizedExperiment::assay(pbmcs_eg$KG, "counts") ssm_list <- list(SSM_COMSig = mat_CM, SSM_GO = mat_GO, SSM_KEGG = mat_KG) se <- SingleCellExperiment::altExp(pbmcs_eg$CM, "logcounts") mat <- SummarizedExperiment::assay(se, "counts") se <- SingleCellExperiment::altExp(pbmcs_eg$CM, "logcounts") gem_list <- list(GeneExpr = SummarizedExperiment::assay(se, "counts")) labels <- list() ; ssmlabel_list <- list() for(i in seq_along(pbmcs_eg)){ fa <- SummarizedExperiment::colData(pbmcs_eg[[i]])$seurat_clusters labels[[i]] <- data.frame(label = fa) colors <- rainbow(length(unique(labels[[i]]$label)))[labels[[i]]$label] labels[[i]]$color <- colors ssmlabel_list[[i]] <- labels[[i]] } names(ssmlabel_list) <- c("Label_COMSig", "Label_GO", "Label_KEGG") phases <- SummarizedExperiment::colData(pbmcs_eg$CM)$Phase label_CC <- data.frame(label = phases, color = NA) gemlabel_list <- list(CellCycle = label_CC) plot_multiheatmaps(ssm_list = ssm_list, gem_list = gem_list, ssmlabel_list = ssmlabel_list, gemlabel_list = gemlabel_list, nrand_samples = 50, show_row_names = FALSE, title = "PBMC")
This function removes sample data by setting minimum and maximum threshold values for the metadata.
remove_samples( sce = NULL, min_nReads = NULL, max_nReads = NULL, min_nGenes = NULL, max_nGenes = NULL, min_percMT = NULL, max_percMT = NULL )
remove_samples( sce = NULL, min_nReads = NULL, max_nReads = NULL, min_nGenes = NULL, max_nGenes = NULL, min_percMT = NULL, max_percMT = NULL )
sce |
A SingleCellExperiment object. |
min_nReads |
A minimum threshold value of the number of reads. |
max_nReads |
A maximum threshold value of the number of reads. |
min_nGenes |
A minimum threshold value of the number of non-zero expressed genes. |
max_nGenes |
A maximum threshold value of the number of non-zero expressed genes. |
min_percMT |
A minimum threshold value of the percent of reads that map to mitochondrial genes. |
max_percMT |
A maximum threshold value of the percent of reads that map to mitochondrial genes. |
A SingleCellExperiment object.
data(pbmc_eg) pbmc <- add_metadata(sce = pbmc_eg, mitochondria_symbol = "^MT-") pbmc <- remove_samples(sce = pbmc, min_nReads = 0, max_nReads = 1e+10, min_nGenes = 0, max_nGenes = 1e+10, min_percMT = NULL, max_percMT = NULL)
data(pbmc_eg) pbmc <- add_metadata(sce = pbmc_eg, mitochondria_symbol = "^MT-") pbmc <- remove_samples(sce = pbmc, min_nReads = 0, max_nReads = 1e+10, min_nGenes = 0, max_nGenes = 1e+10, min_percMT = NULL, max_percMT = NULL)
This function removes signs including too few or too many genes.
remove_signs(sce = NULL, min_ngenes = 2, max_ngenes = 1000)
remove_signs(sce = NULL, min_ngenes = 2, max_ngenes = 1000)
sce |
A SingleCellExperiment object. |
min_ngenes |
Minimum number of genes, which must be greater than one. |
max_ngenes |
Maximum number of genes, which must be greater than one. |
A SingleCellExperiment object.
data(pbmc_eg) data(human_GO_eg) pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) # The results are stored in `metadata(pbmcs$GO)$sign`.
data(pbmc_eg) data(human_GO_eg) pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) # The results are stored in `metadata(pbmcs$GO)$sign`.
This function removes signs by specifying keywords.
remove_signs_manually(sce = NULL, keywords = NULL)
remove_signs_manually(sce = NULL, keywords = NULL)
sce |
A SingleCellExperiment object. |
keywords |
keywords separated by pipes '|'. |
A SingleCellExperiment object.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) pbmcs$GO <- create_signs(sce = pbmcs$GO, min_cnt_strg = 2, min_cnt_vari = 2) keywords <- "Covid19|foofoo|hogehoge" pbmcs$GO <- remove_signs_manually(sce = pbmcs$GO, keywords = keywords) # The results are stored in `metadata(pbmcs$GO)$sign_SCG`, # `metadata(pbmcs$GO)$sign_VCG`, and `metadata(pbmcs$GO)$sign_all`.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) pbmcs$GO <- create_signs(sce = pbmcs$GO, min_cnt_strg = 2, min_cnt_vari = 2) keywords <- "Covid19|foofoo|hogehoge" pbmcs$GO <- remove_signs_manually(sce = pbmcs$GO, keywords = keywords) # The results are stored in `metadata(pbmcs$GO)$sign_SCG`, # `metadata(pbmcs$GO)$sign_VCG`, and `metadata(pbmcs$GO)$sign_all`.
This function removes redundant signs using semantic similarity matrices.
remove_signs_redundant( sce = NULL, similarity_matrix = NULL, threshold = NULL, keep_rareID = NULL )
remove_signs_redundant( sce = NULL, similarity_matrix = NULL, threshold = NULL, keep_rareID = NULL )
sce |
A SingleCellExperiment object. |
similarity_matrix |
A semantic similarity matrix. |
threshold |
A threshold value of semantic similarity, used for regarding biological terms as similar ones |
keep_rareID |
If TRUE, biological terms with the larger ICs are kept. |
A SingleCellExperiment object.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) pbmcs$GO <- create_signs(sce = pbmcs$GO, min_cnt_strg = 2, min_cnt_vari = 2) pbmcs$GO <- remove_signs_redundant( sce = pbmcs$GO, similarity_matrix = human_GO_eg$similarity_matrix$BP, threshold = 0.80, keep_rareID = TRUE) # The results are stored in `metadata(pbmcs$GO)$sign_SCG`, # `metadata(pbmcs$GO)$sign_VCG`, `metadata(pbmcs$GO)$sign_all`, # and if there exist, `metadata(pbmcs$GO)$sign_SCG_redundant` and # `metadata(pbmcs$GO)$sign_VCG_redundant`.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) pbmcs$GO <- create_signs(sce = pbmcs$GO, min_cnt_strg = 2, min_cnt_vari = 2) pbmcs$GO <- remove_signs_redundant( sce = pbmcs$GO, similarity_matrix = human_GO_eg$similarity_matrix$BP, threshold = 0.80, keep_rareID = TRUE) # The results are stored in `metadata(pbmcs$GO)$sign_SCG`, # `metadata(pbmcs$GO)$sign_VCG`, `metadata(pbmcs$GO)$sign_all`, # and if there exist, `metadata(pbmcs$GO)$sign_SCG_redundant` and # `metadata(pbmcs$GO)$sign_VCG_redundant`.
This function removes low expressed variable data.
remove_variables(sce = NULL, min_nsamples = 0)
remove_variables(sce = NULL, min_nsamples = 0)
sce |
A SingleCellExperiment object. |
min_nsamples |
An integer. This function removes variables for which the numbers of non-zero expressing samples are less than this value. |
A SingleCellExperiment object.
data(pbmc_eg) pbmc <- add_metadata(sce = pbmc_eg, mitochondria_symbol = "^MT-") pbmc <- remove_variables(sce = pbmc, min_nsamples = 10)
data(pbmc_eg) pbmc <- add_metadata(sce = pbmc_eg, mitochondria_symbol = "^MT-") pbmc <- remove_variables(sce = pbmc, min_nsamples = 10)
This function removes variable data such that the mean expression levels across samples are less than 'min_meannReads'.
remove_variables_second(sce = NULL, min_meannReads = 0)
remove_variables_second(sce = NULL, min_meannReads = 0)
sce |
A SingleCellExperiment object. |
min_meannReads |
An integer. This function removes variables for which the mean read counts are less than this value. |
A SingleCellExperiment object.
data(pbmc_eg) pbmc <- remove_variables_second(sce = pbmc_eg, min_meannReads = 0.01)
data(pbmc_eg) pbmc <- remove_variables_second(sce = pbmc_eg, min_meannReads = 0.01)
This function selects signs by specifying keywords.
select_signs_manually(sce = NULL, keywords = NULL)
select_signs_manually(sce = NULL, keywords = NULL)
sce |
An ASURAT object. |
keywords |
Keywords separated by a pipe. |
An ASURAT object.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) pbmcs$GO <- create_signs(sce = pbmcs$GO, min_cnt_strg = 2, min_cnt_vari = 2) keywords <- "cell|process" pbmcs$GO <- select_signs_manually(sce = pbmcs$GO, keywords = keywords) # The results are stored in `metadata(pbmcs$GO)$sign_SCG`, # `metadata(pbmcs$GO)$sign_VCG`, and `metadata(pbmcs$GO)$sign_all`.
data(pbmc_eg) data(human_GO_eg) mat <- t(as.matrix(SummarizedExperiment::assay(pbmc_eg, "centered"))) pbmc_cormat <- cor(mat, method = "spearman") pbmcs <- list(GO = pbmc_eg) S4Vectors::metadata(pbmcs$GO) <- list(sign = human_GO_eg[["BP"]]) pbmcs$GO <- remove_signs(sce = pbmcs$GO, min_ngenes = 2, max_ngenes = 1000) pbmcs$GO <- cluster_genesets(sce = pbmcs$GO, cormat = pbmc_cormat, th_posi = 0.24, th_nega = -0.20) pbmcs$GO <- create_signs(sce = pbmcs$GO, min_cnt_strg = 2, min_cnt_vari = 2) keywords <- "cell|process" pbmcs$GO <- select_signs_manually(sce = pbmcs$GO, keywords = keywords) # The results are stored in `metadata(pbmcs$GO)$sign_SCG`, # `metadata(pbmcs$GO)$sign_VCG`, and `metadata(pbmcs$GO)$sign_all`.
Perform one-shot adjacent swapping for each element.
swap_pass(listdata)
swap_pass(listdata)
listdata |
A list of vector and integer. |
A List.
swap_pass(list(vec = c(1, 1, 0), cnt = 0))
swap_pass(list(vec = c(1, 1, 0), cnt = 0))