| Title: | Utilities for scRNA-seq data analysis |
|---|---|
| Description: | hammers is a utilities suite for scRNA-seq data analysis compatible with both Seurat and SingleCellExperiment. It provides simple tools to address tasks such as retrieving aggregate gene statistics, finding and removing rare genes, performing representation analysis, computing the center of mass for the expression of a gene of interest in low-dimensional space, and calculating silhouette and cluster-normalized silhouette. |
| Authors: | Andrei-Florian Stoica [aut, cre] (ORCID: <https://orcid.org/0000-0002-5253-0826>) |
| Maintainer: | Andrei-Florian Stoica <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 1.1.0 |
| Built: | 2026-05-30 08:08:10 UTC |
| Source: | https://github.com/bioc/hammers |
This function adds a categorical column to a data frame based on another column.
addCategory(df, col, newCol, keys, values)addCategory(df, col, newCol, keys, values)
df |
A data frame. |
col |
Column whose values will be used for creating the new column. |
newCol |
Column to be added. |
keys |
A list of keys. If vectors are part of the keys, each of their elements will be assigned the corresponding value. |
values |
A vector of values.
Must have the same length as |
A data frame with a new categorical column.
df <- data.frame(fruit = c('apple', 'banana', 'cherry', 'grape')) df <- addCategory(df, 'fruit', 'color', list(c('apple', 'cherry'), 'banana', 'grape'), c('red', 'yellow', 'purple'))df <- data.frame(fruit = c('apple', 'banana', 'cherry', 'grape')) df <- addCategory(df, 'fruit', 'color', list(c('apple', 'cherry'), 'banana', 'grape'), c('red', 'yellow', 'purple'))
Add a categorical column to a Seurat metadata or SingleCellExperiment coldata
addMetadataCategory( scObj, col, newCol, keys, values, newCol2 = NULL, values2 = NULL )addMetadataCategory( scObj, col, newCol, keys, values, newCol2 = NULL, values2 = NULL )
scObj |
A |
col |
Column whose values will be used for creating the new column. |
newCol |
Column to be added. |
keys |
A list of keys. If vectors are part of the keys, each of their elements will be assigned the corresponding value. |
values |
A vector of values.
Must have the same length as |
newCol2 |
A second column to be added based on the same keys.
Default is |
values2 |
A vector of values corresponding to the second
column. Default is |
A Seurat or SingleCellExpression object with one or
two new categorical column(s) in the metadata/coldata.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) sceObj <- addMetadataCategory(sceObj, 'Cell_Cycle', 'Type', list(c('G0', 'G1'), 'G2M', 'S'), c(2, 3, 1))scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) sceObj <- addMetadataCategory(sceObj, 'Cell_Cycle', 'Type', list(c('G0', 'G1'), 'G2M', 'S'), c(2, 3, 1))
This function adds a normalized silhouette column to a single-cell expression object.
addNormSilhouette(scObj, normSilDF, normSilCol = "normSilhouette")addNormSilhouette(scObj, normSilDF, normSilCol = "normSilhouette")
scObj |
A |
normSilDF |
Normalized silhouette data frame. |
normSilCol |
The name of the normalized silhouette column to be added. |
The input object (Seurat or SingleCellExperiment) with
an added metadata normalized silhouette column.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) df <- normalizeSilhouette(sceObj, 'Cluster') sceObj <- addNormSilhouette(sceObj, df)scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) df <- normalizeSilhouette(sceObj, 'Cluster') sceObj <- addNormSilhouette(sceObj, df)
This function calculates the center of mass based on the columns of a data frame or matrix and a vector of weights.
centerOfMass(obj, weights)centerOfMass(obj, weights)
obj |
A data frame or matrix. |
weights |
A vector of weights. |
A vector containing the center of mass.
obj <- matrix(data=c(2, 3, 1, 3, 6, 8), nrow=3, ncol=2) weights <- c(0.8, 6, 16) centerOfMass(obj, weights)obj <- matrix(data=c(2, 3, 1, 3, 6, 8), nrow=3, ncol=2) weights <- c(0.8, 6, 16) centerOfMass(obj, weights)
This function checks if all genes exist in the single-cell expression object.
checkGenes(scObj, genes)checkGenes(scObj, genes)
scObj |
A |
genes |
A character vector of genes. |
None. This function is called for its side effect.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) checkGenes(sceObj, c('Gene_0480', 'Gene_0481', 'Gene_0482'))scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) checkGenes(sceObj, c('Gene_0480', 'Gene_0481', 'Gene_0482'))
This function calculates the centers of mass of selected metadata/coldata
columns from a Seurat or SingleCellExpression
object.
colCenters(scObj, columns)colCenters(scObj, columns)
scObj |
A |
columns |
Numeric columns. |
A data frame containing the coordinates of centers of mass.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) colCenters(sceObj, c('sizeFactor'))scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) colCenters(sceObj, c('sizeFactor'))
This function creates a single-cell dimensionality reduction plot with added labeled points for metadata numeric columns.
colsDimPlot(scObj, cols, ...)colsDimPlot(scObj, cols, ...)
scObj |
A |
cols |
Columns whose centers of mass will be plotted. |
... |
Additional parameters passed to |
A ggplot object.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) colsDimPlot(sceObj, c('sizeFactor', 'silhouette'))scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) colsDimPlot(sceObj, c('sizeFactor', 'silhouette'))
This function computes the silhouette for each cell in the Seurat
or SingleCellExperiment object.
computeSilhouette(scObj, idClass, silCol = "silhouette")computeSilhouette(scObj, idClass, silCol = "silhouette")
scObj |
A |
idClass |
Identity class. Must be present among the metadata columns of the single-cell expression object. |
silCol |
The name of the silhouette column to be added. |
The input object (Seurat or SingleCellExperiment) with
an added metadata silhouette column.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) sceObj <- computeSilhouette(sceObj, 'Cluster')scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) sceObj <- computeSilhouette(sceObj, 'Cluster')
This function saves a plot or list of plots as a pdf. Can also take as input a function that returns a ggplot object together with its arguments.
## Default S3 method: devPlot(plotObject, ...) ## S3 method for class ''function'' devPlot(plotObject, ...) ## S3 method for class 'ggplot' devPlot(plotObject, ...) ## S3 method for class 'list' devPlot(plotObject, ...) devPlot(plotObject, ...)## Default S3 method: devPlot(plotObject, ...) ## S3 method for class ''function'' devPlot(plotObject, ...) ## S3 method for class 'ggplot' devPlot(plotObject, ...) ## S3 method for class 'list' devPlot(plotObject, ...) devPlot(plotObject, ...)
plotObject |
A function, ggplot object, or list of ggplot objects. |
... |
Additional arguments. |
No value. This function is called for its side effect.
library(ggplot2) df <- data.frame(x = c(1, 2), y = c(3, 5)) p <- ggplot(df) + geom_point(aes(x, y)) devPlot(p) simplePlot <- function(df, title) return(ggplot(df) + geom_point(aes(x, y)) + ggtitle(title)) devPlot(simplePlot, df, 'Plot title') if (file.exists('Rplots.pdf')) file.remove('Rplots.pdf') if (file.exists('Rplots1.pdf')) file.remove('Rplots1.pdf')library(ggplot2) df <- data.frame(x = c(1, 2), y = c(3, 5)) p <- ggplot(df) + geom_point(aes(x, y)) devPlot(p) simplePlot <- function(df, title) return(ggplot(df) + geom_point(aes(x, y)) + ggtitle(title)) devPlot(simplePlot, df, 'Plot title') if (file.exists('Rplots.pdf')) file.remove('Rplots.pdf') if (file.exists('Rplots1.pdf')) file.remove('Rplots1.pdf')
This function plots the distribution of cells across two columns.
distributionPlot( scObj, title = NULL, col1 = "seurat_clusters", col2 = "orig.ident", type = c("counts", "percs"), xLab = col1, yLab = if (type == "counts") "Count" else "Percentage", legendLab = col2, palette = "Spectral", legendPos = "right", legendTextSize = 10, legendTitleSize = 10, axisTextSize = 12, axisTitleSize = 12, sigDigits = 2 )distributionPlot( scObj, title = NULL, col1 = "seurat_clusters", col2 = "orig.ident", type = c("counts", "percs"), xLab = col1, yLab = if (type == "counts") "Count" else "Percentage", legendLab = col2, palette = "Spectral", legendPos = "right", legendTextSize = 10, legendTitleSize = 10, axisTextSize = 12, axisTitleSize = 12, sigDigits = 2 )
scObj |
A |
title |
Plot title. |
col1 |
Column as string. |
col2 |
Column as string. |
type |
Whether the plot should display counts ('counts', default) or percentages ('percs'). |
xLab |
x axis label. |
yLab |
y axis label. |
legendLab |
Legend label. |
palette |
Color palette. |
legendPos |
Legend position. |
legendTextSize |
Legend text size. |
legendTitleSize |
Legend title size. |
axisTextSize |
Axis text size. |
axisTitleSize |
Axis title size. |
sigDigits |
Number of significant digits used by percentages displayed
on the plot. Ignored if |
A ggplot object.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) p <- distributionPlot(sceObj, col1='Cluster', col2='Donor')scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) p <- distributionPlot(sceObj, col1='Cluster', col2='Donor')
This function finds genes expressed in a low number of cells in a Seurat or SingleCellExpression object.
findRareGenes(scObj, nCells = 10)findRareGenes(scObj, nCells = 10)
scObj |
A |
nCells |
Minimum number of cells in which a gene must be expressed to be regarded as non-rare. |
A data frame with the rare genes as rownames and a single column representing their frequencies.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) df <- findRareGenes(sceObj)scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) df <- findRareGenes(sceObj)
This function constructs, for each input gene, sets of cells expressing the gene
geneCellSets(scObj, genes = NULL)geneCellSets(scObj, genes = NULL)
scObj |
A |
genes |
A character vector of genes. |
A named list of character vectors of cell names.
mat <- matrix(0, 1000, 500) rownames(mat) <- paste0('G', seq(1000)) colnames(mat) <- paste0('C', seq(500)) mat[sample(length(mat), 70000)] <- sample(50, 70000, TRUE) mat <- mat[paste0('G', sample(1000, 3)), ] geneCellSets(mat)mat <- matrix(0, 1000, 500) rownames(mat) <- paste0('G', seq(1000)) colnames(mat) <- paste0('C', seq(500)) mat[sample(length(mat), 70000)] <- sample(50, 70000, TRUE) mat <- mat[paste0('G', sample(1000, 3)), ] geneCellSets(mat)
This function calculates the centers of mass of the expression of input genes.
geneCenters(scObj, genes)geneCenters(scObj, genes)
scObj |
A |
genes |
A character vector of genes. |
A data frame containing the centers of mass.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) geneCenters(sceObj, c('Gene_0480', 'Gene_0481', 'Gene_0482'))scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) geneCenters(sceObj, c('Gene_0480', 'Gene_0481', 'Gene_0482'))
This function extracts the number of cells in which a gene from a Seurat or SingleCellExperiment is expressed.
genePresence(scObj, genes = NULL, minCutoff = NULL, maxCutoff = NULL)genePresence(scObj, genes = NULL, minCutoff = NULL, maxCutoff = NULL)
scObj |
A |
genes |
Genes for which the number of cells in which the gene is
expressed will be computed. If |
minCutoff |
Minimum cutoff for gene counts. Genes with counts below this value will be omitted. |
maxCutoff |
Maximum cutoff for gene counts. Genes with counts above this value will be omitted. |
A data frame with two columns. The first column lists the genes ordered decreasingly by the number of cells in which they appear, the second lists the corresponding numbers of cells.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) df <- genePresence(sceObj)scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) df <- genePresence(sceObj)
This function creates a single-cell dimensionality reduction plot with added labeled points for genes.
genesDimPlot(scObj, genes, ...)genesDimPlot(scObj, genes, ...)
scObj |
A |
genes |
Genes whose centers of mass will be plotted. |
... |
Additional parameters passed to |
A ggplot object.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) genesDimPlot(sceObj, c('Gene_0364', 'Gene_0388', 'Gene_0477'))scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) genesDimPlot(sceObj, c('Gene_0364', 'Gene_0388', 'Gene_0477'))
This function joins all combinations of elements from character vectors with a separating character.
joinCharCombs(..., joinChar = "_")joinCharCombs(..., joinChar = "_")
... |
Vectors passed to |
joinChar |
Character used to join combinations. |
A character vector.
joinCharCombs(c('a', 'b', 'c', 'd'), c('eee', 'ff'), c(1, 2, 3))joinCharCombs(c('a', 'b', 'c', 'd'), c('eee', 'ff'), c(1, 2, 3))
This function creates a map from keys to values.
keyvalMap(keys, values)keyvalMap(keys, values)
keys |
A list of keys. If vectors are part of the keys, each of their elements will be assigned the corresponding value. |
values |
A vector of values.
Must have the same length as |
A named vector.
keyvalMap(list(2, c(3, 4, 5), 6, 8), c('a', 'b', 'c', 'd'))keyvalMap(list(2, c(3, 4, 5), 6, 8), c('a', 'b', 'c', 'd'))
This function normalizes the already computed silhouette for each identity class in the single-cell expression object.
normalizeSilhouette(scObj, idClass, silCol = "silhouette")normalizeSilhouette(scObj, idClass, silCol = "silhouette")
scObj |
A |
idClass |
Identity class. Must be present among the metadata columns of the single-cell expression object. |
silCol |
The name of the silhouette column. |
A data frame with normalized silhouettes for each unique element in the identity class.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) df <- normalizeSilhouette(sceObj, 'Cluster')scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) df <- normalizeSilhouette(sceObj, 'Cluster')
This function creates a single-cell dimensionality reduction plot with added labeled points.
pointsDimPlot( scObj, title = NULL, pointsObj = NULL, alpha = 1, pointShape = 4, pointSize = 2, pointColor = "black", labelSize = 2.5, maxOverlaps = 30, ... )pointsDimPlot( scObj, title = NULL, pointsObj = NULL, alpha = 1, pointShape = 4, pointSize = 2, pointColor = "black", labelSize = 2.5, maxOverlaps = 30, ... )
scObj |
A |
title |
Plot title. |
pointsObj |
A data frame or matrix of points with two columns representing x and y coordinates. |
alpha |
Opaqueness level. |
pointShape |
Point shape. |
pointSize |
Point size. |
pointColor |
Point color. |
labelSize |
Label size. If |
maxOverlaps |
Maximum overlaps. |
... |
Additional parameters passed to |
A wrapper around scLang::dimPlot.
A ggplot object.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) pointsObj <- data.frame(x = c(2, 3), y = c(1, 0), row.names = c('P1', 'P2')) pointsDimPlot(sceObj, pointsObj=pointsObj)scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) pointsObj <- data.frame(x = c(2, 3), y = c(1, 0), row.names = c('P1', 'P2')) pointsDimPlot(sceObj, pointsObj=pointsObj)
This function extracts the relevant information from a data frame and adjusts p-values to be used as weights for the alluvia.
prepAlluvial( df, pvalCol = "pvalAdj", colIndices = c(1, 2), weightExp = 1/2, pvalOffset = 1.00000023069254e-317 )prepAlluvial( df, pvalCol = "pvalAdj", colIndices = c(1, 2), weightExp = 1/2, pvalOffset = 1.00000023069254e-317 )
df |
A data frame. |
pvalCol |
Name of p-value column to be used by the alluvial plot. |
colIndices |
A vector respresenting the indices of the two categorical columns from the data frame that will be used. |
weightExp |
Exponent used in constructing weight from p-values. |
pvalOffset |
Offset used to avoid zeros inside the logarithm function. |
A data frame with weight scores in lieu of p-values.
df <- data.frame(A = c('a1', 'a2', 'a3', 'a4'), B = c('b1', 'b2', 'b3', 'b4'), pvalAdj = c(0.81, 1e-6, 1e-3, 0.022)) prepAlluvial(df)df <- data.frame(A = c('a1', 'a2', 'a3', 'a4'), B = c('b1', 'b2', 'b3', 'b4'), pvalAdj = c(0.81, 1e-6, 1e-3, 0.022)) prepAlluvial(df)
This function plots representation data frame as an alluvial plot.
pvalRiverPlot( df, pvalCol = "pvalAdj", colIndices = c(1, 2), weightExp = 1/2, pvalOffset = 1.00000023069254e-317, ... )pvalRiverPlot( df, pvalCol = "pvalAdj", colIndices = c(1, 2), weightExp = 1/2, pvalOffset = 1.00000023069254e-317, ... )
df |
A data frame. |
pvalCol |
Name of p-value column to be used by the alluvial plot. |
colIndices |
A vector respresenting the indices of the two categorical columns from the data frame that will be used. |
weightExp |
Exponent used in constructing weight from p-values. |
pvalOffset |
Offset used to avoid zeros inside the logarithm function. |
... |
Additional parameters passed to |
A ggplot object
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) df <- repAnalysis(sceObj, 'Cluster', 'Donor') pvalRiverPlot(df)scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) df <- repAnalysis(sceObj, 'Cluster', 'Donor') pvalRiverPlot(df)
This function removes genes expressed in a low number of cells in a
Seurat or SingleCellExpression object.
removeRareGenes(scObj, nCells = 10, verbose = TRUE)removeRareGenes(scObj, nCells = 10, verbose = TRUE)
scObj |
A |
nCells |
Minimum number of cells in which a gene must be expressed to be retained. |
verbose |
Logical; whether the output should be verbose. |
A Seurat or SingleCellExpression object with
rare genes removed.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) sceObj <- removeRareGenes(sceObj, 30)scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) sceObj <- removeRareGenes(sceObj, 30)
This function finds the differential representation of two
Seurat or SingleCellExperiment columns.
repAnalysis( scObj, col1 = "seurat_clusters", col2 = "orig.ident", doOverrep = TRUE, mtMethod = c("BY", "holm", "hochberg", "hommel", "bonferroni", "BH", "fdr", "none"), ... )repAnalysis( scObj, col1 = "seurat_clusters", col2 = "orig.ident", doOverrep = TRUE, mtMethod = c("BY", "holm", "hochberg", "hommel", "bonferroni", "BH", "fdr", "none"), ... )
scObj |
A |
col1 |
Column as string. |
col2 |
Column as string. |
doOverrep |
Whether to perform overrepresentation analysis ( |
mtMethod |
Multiple testing correction method. Choices are 'BY' (default), 'holm', hochberg', hommel', 'bonferroni', 'BH', 'fdr' and 'none'. |
... |
Additional parameters passed to |
An overrepresentation or underrepresentation data frame.
scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) repAnalysis(sceObj, 'Cluster', 'Donor')scePath <- system.file('extdata', 'sceObj.qs2', package='hammers') sceObj <- qs2::qs_read(scePath) repAnalysis(sceObj, 'Cluster', 'Donor')
This function messages an input if verbose is set to TRUE.
safeMessage(msg, verbose = TRUE)safeMessage(msg, verbose = TRUE)
msg |
Message |
verbose |
Whether the message should be displayed. |
No return value. This function is called for its side effect
(messaging the input if verbose is set to TRUE).
safeMessage('message')safeMessage('message')
This function min-max-normalizes a vector when possible, and otherwise returns a single-value vector.
safeMinmax(scores, safeVal = 0)safeMinmax(scores, safeVal = 0)
scores |
Numeric vector. |
safeVal |
Value to replace all values with when all values in the vector are the same. |
Min-max-normalized scores or a single-value vector.
safeMinmax(c(0, 3, 2, 1, 4, 5.5, 6.32, 8, 1.1))safeMinmax(c(0, 3, 2, 1, 4, 5.5, 6.32, 8, 1.1))