Heatmap for showing clustering results and more
Description
Make heatmap with color scale from one matrix and hiearchical
clustering of samples/features from another. Also built in functionality
for showing the clusterings with the heatmap. Builds on
aheatmap
function of NMF
package.
Usage
## S4 method for signature 'SingleCellExperiment'
plotHeatmap(data, isCount = FALSE, transFun = NULL, ...)
## S4 method for signature 'SummarizedExperiment'
plotHeatmap(data, isCount = FALSE, transFun = NULL, ...)
## S4 method for signature 'table'
plotHeatmap(data, ...)
## S4 method for signature 'ClusterExperiment'
plotHeatmap(
data,
clusterSamplesData = c("dendrogramValue", "hclust", "orderSamplesValue",
"primaryCluster"),
clusterFeaturesData = "var",
nFeatures = NA,
visualizeData = c("transformed", "centeredAndScaled", "original"),
whichClusters = c("primary", "workflow", "all", "none"),
colData = NULL,
clusterFeatures = TRUE,
nBlankLines = 2,
colorScale,
whichAssay = 1,
...
)
## S4 method for signature 'data.frame'
plotHeatmap(data, ...)
## S4 method for signature 'ExpressionSet'
plotHeatmap(data, ...)
## S4 method for signature 'matrixOrHDF5'
plotHeatmap(
data,
colData = NULL,
clusterSamplesData = NULL,
clusterFeaturesData = NULL,
whColDataCont = NULL,
clusterSamples = TRUE,
showSampleNames = FALSE,
clusterFeatures = TRUE,
showFeatureNames = FALSE,
colorScale = seqPal5,
clusterLegend = NULL,
alignColData = FALSE,
unassignedColor = "white",
missingColor = "grey",
breaks = NA,
symmetricBreaks = FALSE,
capBreaksLegend = FALSE,
isSymmetric = FALSE,
overRideClusterLimit = FALSE,
plot = TRUE,
labelTracks = TRUE,
...
)
## S4 method for signature 'ClusterExperiment'
plotCoClustering(data, invert, saveDistance = FALSE, ...)
plotHeatmap(data, isCount = FALSE, transFun = NULL, ...)
plotHeatmap(data, isCount = FALSE, transFun = NULL, ...)
plotHeatmap(data, ...)
plotHeatmap(
data,
clusterSamplesData = c("dendrogramValue", "hclust", "orderSamplesValue",
"primaryCluster"),
clusterFeaturesData = "var",
nFeatures = NA,
visualizeData = c("transformed", "centeredAndScaled", "original"),
whichClusters = c("primary", "workflow", "all", "none"),
colData = NULL,
clusterFeatures = TRUE,
nBlankLines = 2,
colorScale,
whichAssay = 1,
...
)
plotHeatmap(data, ...)
plotHeatmap(data, ...)
plotHeatmap(
data,
colData = NULL,
clusterSamplesData = NULL,
clusterFeaturesData = NULL,
whColDataCont = NULL,
clusterSamples = TRUE,
showSampleNames = FALSE,
clusterFeatures = TRUE,
showFeatureNames = FALSE,
colorScale = seqPal5,
clusterLegend = NULL,
alignColData = FALSE,
unassignedColor = "white",
missingColor = "grey",
breaks = NA,
symmetricBreaks = FALSE,
capBreaksLegend = FALSE,
isSymmetric = FALSE,
overRideClusterLimit = FALSE,
plot = TRUE,
labelTracks = TRUE,
...
)
plotCoClustering(data, invert, saveDistance = FALSE, ...)
Arguments
data |
data to use to determine the heatmap. Can be a matrix,
ClusterExperiment ,
SingleCellExperiment or
SummarizedExperiment object. The
interpretation of parameters depends on the type of the input to
data .
|
isCount |
if transFun=NULL , then isCount=TRUE will
determine the transformation as defined by function(x){log2(x+1)} ,
and isCount=FALSE will give a transformation function
function(x){x} . Ignored if transFun=NULL . If object is of
class ClusterExperiment , the stored transformation will be used and
giving this parameter will result in an error.
|
transFun |
a transformation function to be applied to the data. If the
transformation applied to the data creates an error or NA values, then the
function will throw an error. If object is of class
ClusterExperiment , the stored transformation will be used and giving
this parameter will result in an error.
|
... |
for signature matrix , arguments passed to aheatmap .
For the other signatures, passed to the method for signature matrix .
Not all arguments can be passed to aheatmap effectively, see
details.
|
clusterSamplesData |
If data is a matrix,
clusterSamplesData is either a matrix that will be used by
hclust to define the hiearchical clustering of samples (e.g.
normalized data) or a pre-existing dendrogram (of class
dendrogram ) that clusters the samples. If
data is a ClusterExperiment object, clusterSamplesData
should be either character or integers or logical which indicates how (and
whether) the samples should be clustered (or gives indices of the order for
the samples). See details.
|
clusterFeaturesData |
If data is a matrix, either a matrix that
will be used in hclust to define the hiearchical clustering of
features (e.g. normalized data) or a pre-existing dendrogram that clusters
the features. If data is a ClusterExperiment object, the
input should be either character or integers indicating which features
should be used (see details).
|
nFeatures |
integer indicating how many features should be used (if
clusterFeaturesData is 'var' or 'PCA').
|
visualizeData |
either a character string, indicating what form of the
data should be used for visualizing the data (i.e. for making the
color-scale), or a data.frame/matrix with same number of samples as
assay(data) . If a new data.frame/matrix, any character arguments to
clusterFeaturesData will be ignored.
|
whichClusters |
argument that can be either numeric or character vector
indicating the clusterings to be used. See details of getClusterIndex .
|
colData |
If input to data is either a
ClusterExperiment ,or SummarizedExperiment object or
SingleCellExperiment , then colData must index the
colData stored as a DataFrame in colData slot of the
object. Whether that data is continuous or not will be determined by the
properties of colData (no user input is needed). If input to
data is matrix, colData is a matrix of additional data on
the samples to show above heatmap. In this case, unless indicated by
whColDataCont , colData will be converted into factors,
even if numeric. “-1” indicates the sample was not assigned to a cluster
and gets color ‘unassignedColor’ and “-2“ gets the color 'missingColor'.
|
clusterFeatures |
Logical as to whether to do hiearchical clustering of
features (if FALSE, any input to clusterFeaturesData is ignored).
|
nBlankLines |
Only applicable if input is ClusterExperiment object. Indicates the number of lines to put between groups of features if clusterFeaturesData gives groups of genes (see details and makeBlankData ).
|
colorScale |
palette of colors for the color scale of the heatmap.
|
whichAssay |
numeric or character specifying which assay to use. See
assay for details.
|
whColDataCont |
Which of the colData columns are continuous
and should not be converted to counts. NULL indicates no additional
colData . Only used if data input is matrix.
|
clusterSamples |
Logical as to whether to do hierarchical clustering of
cells (if FALSE, any input to clusterSamplesData is ignored).
|
showSampleNames |
Logical as to whether show sample names.
|
showFeatureNames |
Logical as to whether show feature names.
|
clusterLegend |
Assignment of colors to the clusters. If NULL ,
colData columns will be assigned colors internally. See details
for more.
|
alignColData |
Logical as to whether should align the colors of the
colData (only if clusterLegend not given and
colData is not NULL ).
|
unassignedColor |
color assigned to cluster values of '-1'
("unassigned").
|
missingColor |
color assigned to cluster values of '-2' ("missing").
|
breaks |
Either a vector of breaks (should be equal to length 52), or a
number between 0 and 1, indicating that the breaks should be equally spaced
(based on the range in the data) upto the ‘breaks’ quantile, see
setBreaks
|
symmetricBreaks |
logical as to whether the breaks created for the color
scale should be symmetrical around 0
|
capBreaksLegend |
logical as to whether the legend for the breaks should
be capped. Only relevant if breaks is a value < 1, in which case if
capBreaksLegend=TRUE , only the values between the quantiles
requested will show in the color scale legend.
|
isSymmetric |
logical. if TRUE indicates that the input matrix is
symmetric. Useful when plotting a co-clustering matrix or other sample by
sample matrices (e.g., correlation).
|
overRideClusterLimit |
logical. Whether to override the internal limit
that only allows 10 clusterings/annotations. If overridden, may result in
incomprehensible errors from aheatmap . Only override this if you
have a very large plotting device and want to see if aheatmap can
render it.
|
plot |
logical indicating whether to plot the heatmap. Mainly useful for
package mantaince to avoid calls to aheatmap on unit tests that take a long
time.
|
labelTracks |
logical, whether to put labels next to the color tracks
corresponding to the colData.
|
invert |
logical determining whether the coClustering matrix should be
inverted to be 1-coClustering for plotting. By default, if the diagonal
elements are all zero, invert=TRUE, and otherwise invert=FALSE. If
coClustering matrix is not a 0-1 matrix (e.g. if equal to a distance matrix
output from clusterSingle , then the user should manually set
this parameter to FALSE.)
|
saveDistance |
logical. When the coClustering slot contains
indices of the clusterings or a NxB set of clusterings, the hamming
distance will be calculated before running the plot. This argument
determines whether the ClusterExperiment object with that distance
in coClustering slot should be returned (so as to avoid
re-calculating it in the future) or not.
|
Details
The plotHeatmap function calls aheatmap
to draw
the heatmap. The main points of plotHeatmap
are to 1) allow for
different matrix inputs, separating out the color scale visualization and
the clustering of the samples/features. 2) to visualize the clusters and
meta data with the heatmap. The intended use case is to allow the user to
visualize the original count scale of the data (on the log-scale), but
create the hierarchical clustering on another, more appropriate dataset for
clustering, such as normalized data. Similarly, some of the palettes in the
package were developed assuming that the visualization might be on
unscaled/uncentered data, rather than the residual from the mean of the
gene, and thus palettes need to take on a greater range of relevant values
so as to show meaningful comparisons with genes on very different scales.
If data
is a ClusterExperiment
object,
visualizeData
indicates what kind of transformation should be done
to assay(data)
for calculating the color scale. The features will be
clustered based on these data as well. A different data.frame or matrix can
be given for the visualization. For example, if the
ClusterExperiment
object contains normalized data, but the user
wishes that the color scale be based on the log-counts for easier
interpretation, visualizeData
could be set to be the
log2(counts + 1)
.
If data
is a ClusterExperiment
object,
clusterSamplesData
can be used to indicate the type of clustering
for the samples. If equal to 'dendrogramValue' the dendrogram stored in
data
will be used; if dendrogram is missing, a new one will be
created based on the primaryCluster
of data using
makeDendrogram
, assuming no errors are created (if errors are
created, then clusterSamplesData
will be set to "primaryCluster").
If clusterSamplesData
is equal to "hclust", then standard
hierachical clustering of the transformed data will be used. If
clusterSamplesData
is equal to 'orderSamplesValue' no clustering of
the samples will be done, and instead the samples will be ordered as in the
slot orderSamples
of data
. If clusterSamplesData
is
equal to 'primaryCluster', again no clustering will be done, and instead
the samples will be ordered based on grouping the samples to match the
primaryCluster of data
; however, if the primaryCluster of
data
is only one cluster or consists soley of -1/-2 values,
clusterSamplesData
will be set to "hclust". If
clusterSamplesData
is not a character value,
clusterSamplesData
can be a integer valued vector giving the order
of the samples.
If data
is a matrix, then colData
is a data.frame
of annotation data to be plotted above the heatmap and
whColDataCont
gives the index of the column(s) of this dataset
that should be consider continuous. Otherwise the annotation data for
colData
will be forced into a factor (which will be nonsensical
for continous data). If data
is a ClusterExperiment
object,
colData
should refer to a index or column name of the
colData
slot of data
. In this case colData
will be
added to any choices of clusterings chosen by the whichClusters
argument (if any). If both clusterings and sample data are chosen, the
clusterings will be shown closest to data (i.e. on bottom).
If data
is a ClusterExperiment
object,
clusterFeaturesData
is not a dataset, but instead indicates which
features should be shown in the heatmap. In this case
clusterFeatures
can be one of the following:
-
"all"
All rows/genes will be shown
-
character giving dimensionality
reductionShould match one of values saved in reducedDims
slot or a
builtin function in listBuiltInReducedDims()
. nFeatures
then
gives the number of dimensions to show. The heatmap will then be of the
dimension reduction vectors
-
character giving filtering Should
match one of values saved in filterStats
slot or a builtin function
in listBuiltInFilterStats()
. nFeatures
gives the number of
genes to keep after filtering.
-
character giving gene/row names
-
vector of integers giving row indices
-
a list of indices or
rownamesThis is used to indicate that the features should be grouped
according to the elements of the list, with blank (white) space between
them (see makeBlankData
for more details). In this case, no
clustering is done of the features.
If breaks
is a numeric value between 0 and 1, then
breaks
is assumed to indicate the upper quantile (on the log scale)
at which the heatmap color scale should stop. For example, if
breaks=0.9
, then the breaks will evenly spaced up until the 0.9
upper quantile of data
, and then all values after the 0.9 quantile
will be absorbed by the upper-most color bin. This can help to reduce the
visual impact of a few highly expressed genes (features).
Note that plotHeatmap calls aheatmap
under the
hood. This allows you to plot multiple heatmaps via
par(mfrow=c(2,2))
, etc. However, the dendrograms do not resize if
you change the size of your plot window in an interactive session of R
(this might be a problem for RStudio if you want to pop it out into a large
window...). Also, plotting to a pdf adds a blank page; see help pages of
aheatmap
for how to turn this off.
clusterLegend
takes the place of argument annColors
from aheatmap
for giving colors to the annotation on the heatmap.
clusterLegend
should be list of length equal to
ncol(colData)
with names equal to the colnames of
colData
. Each element of the list should be a either the format
requested by aheatmap
(a vector of colors with names
corresponding to the levels of the column of colData
), or should
be format of the clusterLegend
slot in a ClusterExperiment
object. Color assignments to the rows/genes should also be passed via
clusterLegend
(assuming annRow
is an argument passed to
...
). If clusterFeaturesData
is a named list
describing groupings of genes then the colors for those groups can be given
in clusterLegend
under the name "Gene Group".
If you have a factor with many levels, it is important to note that
aheatmap
does not recycle colors across factors in the
colData
, and in fact runs out of colors and the remaining levels
get the color white. Thus if you have many factors or many levels in those
factors, you should set their colors via clusterLegend
.
Many arguments can be passed on to aheatmap
, however, some are set
internally by plotHeatmap.
In particular, setting the values of
Rowv
or Colv
will cause errors. color
in
aheatmap
is replaced by colorScale
in plotHeatmap.
The
annCol
to give annotation to the samples is replaced by the
colData
; moreover, the annColors
option in aheatmap
will also be set internally to give more vibrant colors than the default in
aheatmap
(for ClusterExperiment
objects, these values can
also be set in the clusterLegend
slot ). Other options should be
passed on to aheatmap
, though they have not been all tested. Useful options
include treeheight=0
to suppress plotting of the dendrograms,
annLegend=FALSE
to suppress the legend of factors shown beside columns/rows,
and cexRow=0
or cexCol=0
to suppress plotting of row/column labels.
plotCoClustering
is a convenience function to plot the
heatmap of the co-clustering distance matrix from the coClustering
slot of a ClusterExperiment
object (either by calculating the
hamming distance of the clusterings stored in the coClustering
slot,
or the distance stored in the coClustering
slot if it has already
been calculated.
Value
Returns (invisibly) a list with elements
-
aheatmapOut
The output from the final call of
aheatmap
.
-
colData
the annotation data.frame given to the argument
annCol
in aheatmap
.
-
clusterLegend
the annotation colors given to the argument
annColors
aheatmap
.
-
breaks
The breaks used for aheatmap
, after adjusting
for quantile.
Author(s)
Elizabeth Purdom
See Also
aheatmap
, makeBlankData
, showHeatmapPalettes
, makeDendrogram
, dendrogram
Examples
## Not run:
data(simData)
cl <- rep(1:3,each=100)
cl2 <- cl
changeAssign <- sample(1:length(cl), 80)
cl2[changeAssign] <- sample(cl[changeAssign])
ce <- ClusterExperiment(simCount, cl2, transformation=function(x){log2(x+1)})
#simple, minimal, example. Show counts, but cluster on underlying means
plotHeatmap(ce)
#assign cluster colors
colors <- bigPalette[20:23]
names(colors) <- 1:3
plotHeatmap(data=simCount, clusterSamplesData=simData,
colData=data.frame(cl), clusterLegend=list(colors))
#show two different clusters
anno <- data.frame(cluster1=cl, cluster2=cl2)
out <- plotHeatmap(simData, colData=anno)
#return the values to see format for giving colors to the annotations
out$clusterLegend
#assign colors to the clusters based on plotClusters algorithm
plotHeatmap(simData, colData=anno, alignColData=TRUE)
#assign colors manually
annoColors <- list(cluster1=c("black", "red", "green"),
cluster2=c("blue","purple","yellow"))
plotHeatmap(simData, colData=anno, clusterLegend=annoColors)
#give a continuous valued -- need to indicate columns
anno2 <- cbind(anno, Cont=c(rnorm(100, 0), rnorm(100, 2), rnorm(100, 3)))
plotHeatmap(simData, colData=anno2, whColDataCont=3)
#compare changing breaks quantile on visual effect
par(mfrow=c(2,2))
plotHeatmap(simData, colorScale=seqPal1, breaks=1, main="Full length")
plotHeatmap(simData,colorScale=seqPal1, breaks=.99, main="0.99 Quantile Upper
Limit")
plotHeatmap(simData,colorScale=seqPal1, breaks=.95, main="0.95 Quantile Upper
Limit")
plotHeatmap(simData, colorScale=seqPal1, breaks=.90, main="0.90 Quantile
Upper Limit")
## End(Not run)
data(simData)
cl <- rep(1:3,each=100)
cl2 <- cl
changeAssign <- sample(1:length(cl), 80)
cl2[changeAssign] <- sample(cl[changeAssign])
ce <- ClusterExperiment(simCount, cl2, transformation=function(x){log2(x+1)})
plotHeatmap(ce)
colors <- bigPalette[20:23]
names(colors) <- 1:3
plotHeatmap(data=simCount, clusterSamplesData=simData,
colData=data.frame(cl), clusterLegend=list(colors))
anno <- data.frame(cluster1=cl, cluster2=cl2)
out <- plotHeatmap(simData, colData=anno)
out$clusterLegend
plotHeatmap(simData, colData=anno, alignColData=TRUE)
annoColors <- list(cluster1=c("black", "red", "green"),
cluster2=c("blue","purple","yellow"))
plotHeatmap(simData, colData=anno, clusterLegend=annoColors)
anno2 <- cbind(anno, Cont=c(rnorm(100, 0), rnorm(100, 2), rnorm(100, 3)))
plotHeatmap(simData, colData=anno2, whColDataCont=3)
par(mfrow=c(2,2))
plotHeatmap(simData, colorScale=seqPal1, breaks=1, main="Full length")
plotHeatmap(simData,colorScale=seqPal1, breaks=.99, main="0.99 Quantile Upper
Limit")
plotHeatmap(simData,colorScale=seqPal1, breaks=.95, main="0.95 Quantile Upper
Limit")
plotHeatmap(simData, colorScale=seqPal1, breaks=.90, main="0.90 Quantile
Upper Limit")