Domain segmentation (STARmap PLUS mouse brain)

Here, we demonstrate BANKSY domain segmentation on a STARmap PLUS dataset of the mouse brain from Shi et al. (2022).

library(Banksy)

library(data.table)
library(SummarizedExperiment)
library(SpatialExperiment)

library(scater)
library(cowplot)
library(ggplot2)

Data preprocessing

Data from the study is available from the Single Cell Portal. We analyze data from well11. The data comprise 1,022 genes profiled at subcellular resolution in 43,341 cells.

#' Change paths accordingly
gcm_path <- "../data/well11processed_expression_pd.csv.gz"
mdata_path <- "../data/well11_spatial.csv.gz"

#' Gene cell matrix
gcm <- fread(gcm_path)
genes <- gcm$GENE
gcm <- as.matrix(gcm[, -1])
rownames(gcm) <- genes

#' Spatial coordinates and metadata
mdata <- fread(mdata_path, skip = 1)
headers <- names(fread(mdata_path, nrows = 0))
colnames(mdata) <- headers
#' Orient spatial coordinates
xx <- mdata$X
yy <- mdata$Y
mdata$X <- max(yy) - yy
mdata$Y <- max(xx) - xx
mdata <- data.frame(mdata)
rownames(mdata) <- colnames(gcm)

locs <- as.matrix(mdata[, c("X", "Y", "Z")])

#' Create SpatialExperiment
se <- SpatialExperiment(
    assay = list(processedExp = gcm),
    spatialCoords = locs,
    colData = mdata
)

Running BANKSY

Run BANKSY in domain segmentation mode with lambda=0.8. This places larger weights on the mean neighborhood expression and azimuthal Gabor filter in constructing the BANKSY matrix. We adjust the resolution to yield 23 clusters based on the results from Maher et al. (2023) (see Fig. 1, 2).

lambda <- 0.8
k_geom <- 30
npcs <- 50
aname <- "processedExp"
se <- Banksy::computeBanksy(se, assay_name = aname, k_geom = k_geom)

set.seed(1000)
se <- Banksy::runBanksyPCA(se, lambda = lambda, npcs = npcs)

set.seed(1000)
se <- Banksy::clusterBanksy(se, lambda = lambda, npcs = npcs, resolution = 0.8)

Cluster labels are stored in the colData slot:

head(colData(se))
#> DataFrame with 6 rows and 4 columns
#>           X         Y clust_M1_lam0.8_k50_res0.8   sample_id
#>   <numeric> <numeric>                   <factor> <character>
#> 1   24225.5   23984.2                         10    sample01
#> 2   24849.2   22679.1                         10    sample01
#> 3   24488.3   22970.3                         10    sample01
#> 4   24371.4   23727.5                         10    sample01
#> 5   24362.2   23300.6                         10    sample01
#> 6   24644.5   23112.8                         10    sample01

Visualize clustering results:

cnames <- colnames(colData(se))
cnames <- cnames[grep("^clust", cnames)]

plotColData(se, x = "X", y = "Y", point_size = 0.01, colour_by = cnames[1]) +
    scale_color_manual(values = pals::glasbey()) +
    coord_equal() +
    theme(legend.position = "none")

Session information

sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] ExperimentHub_2.13.1        AnnotationHub_3.13.3       
#>  [3] BiocFileCache_2.13.2        dbplyr_2.5.0               
#>  [5] spatialLIBD_1.17.8          cowplot_1.1.3              
#>  [7] scater_1.33.4               ggplot2_3.5.1              
#>  [9] harmony_1.2.1               Rcpp_1.0.13                
#> [11] data.table_1.16.2           scran_1.33.2               
#> [13] scuttle_1.15.5              Seurat_5.1.0               
#> [15] SeuratObject_5.0.2          sp_2.1-4                   
#> [17] SpatialExperiment_1.15.1    SingleCellExperiment_1.27.2
#> [19] SummarizedExperiment_1.35.5 Biobase_2.65.1             
#> [21] GenomicRanges_1.57.2        GenomeInfoDb_1.41.2        
#> [23] IRanges_2.39.2              S4Vectors_0.43.2           
#> [25] BiocGenerics_0.51.3         MatrixGenerics_1.17.1      
#> [27] matrixStats_1.4.1           Banksy_1.3.0               
#> [29] BiocStyle_2.33.1           
#> 
#> loaded via a namespace (and not attached):
#>   [1] bitops_1.0-9             spatstat.sparse_3.1-0    doParallel_1.0.17       
#>   [4] httr_1.4.7               RColorBrewer_1.1-3       tools_4.4.1             
#>   [7] sctransform_0.4.1        DT_0.33                  utf8_1.2.4              
#>  [10] R6_2.5.1                 lazyeval_0.2.2           uwot_0.2.2              
#>  [13] withr_3.0.2              gridExtra_2.3            progressr_0.15.0        
#>  [16] cli_3.6.3                spatstat.explore_3.3-3   fastDummies_1.7.4       
#>  [19] labeling_0.4.3           sass_0.4.9               spatstat.data_3.1-2     
#>  [22] ggridges_0.5.6           pbapply_1.7-2            Rsamtools_2.21.2        
#>  [25] dbscan_1.2-0             aricode_1.0.3            dichromat_2.0-0.1       
#>  [28] sessioninfo_1.2.2        parallelly_1.38.0        attempt_0.3.1           
#>  [31] maps_3.4.2               limma_3.61.12            pals_1.9                
#>  [34] RSQLite_2.3.7            BiocIO_1.15.2            generics_0.1.3          
#>  [37] ica_1.0-3                spatstat.random_3.3-2    dplyr_1.1.4             
#>  [40] Matrix_1.7-1             ggbeeswarm_0.7.2         fansi_1.0.6             
#>  [43] abind_1.4-8              lifecycle_1.0.4          yaml_2.3.10             
#>  [46] edgeR_4.3.21             SparseArray_1.5.45       Rtsne_0.17              
#>  [49] paletteer_1.6.0          grid_4.4.1               blob_1.2.4              
#>  [52] promises_1.3.0           dqrng_0.4.1              crayon_1.5.3            
#>  [55] miniUI_0.1.1.1           lattice_0.22-6           beachmat_2.21.9         
#>  [58] mapproj_1.2.11           KEGGREST_1.45.1          magick_2.8.5            
#>  [61] sys_3.4.3                maketools_1.3.1          pillar_1.9.0            
#>  [64] knitr_1.48               metapod_1.13.0           rjson_0.2.23            
#>  [67] future.apply_1.11.3      codetools_0.2-20         leiden_0.4.3.1          
#>  [70] glue_1.8.0               spatstat.univar_3.0-1    vctrs_0.6.5             
#>  [73] png_0.1-8                spam_2.11-0              gtable_0.3.6            
#>  [76] rematch2_2.1.2           cachem_1.1.0             xfun_0.48               
#>  [79] S4Arrays_1.5.11          mime_0.12                survival_3.7-0          
#>  [82] RcppHungarian_0.3        iterators_1.0.14         fields_16.3             
#>  [85] statmod_1.5.0            bluster_1.15.1           fitdistrplus_1.2-1      
#>  [88] ROCR_1.0-11              nlme_3.1-166             bit64_4.5.2             
#>  [91] filelock_1.0.3           RcppAnnoy_0.0.22         bslib_0.8.0             
#>  [94] irlba_2.3.5.1            vipor_0.4.7              KernSmooth_2.23-24      
#>  [97] colorspace_2.1-1         DBI_1.2.3                tidyselect_1.2.1        
#> [100] bit_4.5.0                compiler_4.4.1           curl_5.2.3              
#> [103] BiocNeighbors_1.99.3     DelayedArray_0.31.14     plotly_4.10.4           
#> [106] rtracklayer_1.65.0       scales_1.3.0             lmtest_0.9-40           
#> [109] rappdirs_0.3.3           stringr_1.5.1            digest_0.6.37           
#> [112] goftest_1.2-3            spatstat.utils_3.1-0     rmarkdown_2.28          
#> [115] benchmarkmeData_1.0.4    RhpcBLASctl_0.23-42      XVector_0.45.0          
#> [118] htmltools_0.5.8.1        pkgconfig_2.0.3          highr_0.11              
#> [121] fastmap_1.2.0            rlang_1.1.4              htmlwidgets_1.6.4       
#> [124] UCSC.utils_1.1.0         shiny_1.9.1              farver_2.1.2            
#> [127] jquerylib_0.1.4          zoo_1.8-12               jsonlite_1.8.9          
#> [130] BiocParallel_1.39.0      mclust_6.1.1             config_0.3.2            
#> [133] RCurl_1.98-1.16          BiocSingular_1.21.4      magrittr_2.0.3          
#> [136] GenomeInfoDbData_1.2.13  dotCall64_1.2            patchwork_1.3.0         
#> [139] munsell_0.5.1            viridis_0.6.5            reticulate_1.39.0       
#> [142] leidenAlg_1.1.4          stringi_1.8.4            zlibbioc_1.51.2         
#> [145] MASS_7.3-61              plyr_1.8.9               parallel_4.4.1          
#> [148] listenv_0.9.1            ggrepel_0.9.6            deldir_2.0-4            
#> [151] Biostrings_2.73.2        sccore_1.0.5             splines_4.4.1           
#> [154] tensor_1.5               locfit_1.5-9.10          igraph_2.1.1            
#> [157] spatstat.geom_3.3-3      RcppHNSW_0.6.0           buildtools_1.0.0        
#> [160] reshape2_1.4.4           ScaledMatrix_1.13.0      XML_3.99-0.17           
#> [163] BiocVersion_3.20.0       evaluate_1.0.1           golem_0.5.1             
#> [166] BiocManager_1.30.25      foreach_1.5.2            httpuv_1.6.15           
#> [169] RANN_2.6.2               tidyr_1.3.1              purrr_1.0.2             
#> [172] polyclip_1.10-7          benchmarkme_1.0.8        future_1.34.0           
#> [175] scattermore_1.2          rsvd_1.0.5               xtable_1.8-4            
#> [178] restfulr_0.0.15          RSpectra_0.16-2          later_1.3.2             
#> [181] viridisLite_0.4.2        tibble_3.2.1             GenomicAlignments_1.41.0
#> [184] memoise_2.0.1            beeswarm_0.4.0           AnnotationDbi_1.67.0    
#> [187] cluster_2.1.6            shinyWidgets_0.8.7       globals_0.16.3