Introduction to spatialDE

Introduction

SpatialDE by Svensson et al., 2018, is a method to identify spatially variable genes (SVGs) in spatially resolved transcriptomics data.

Installation

You can install spatialDE from Bioconductor with the following code:

if (!requireNamespace("BiocManager", quietly = TRUE)) {
    install.packages("BiocManager")
}
BiocManager::install("spatialDE")

Example: Mouse Olfactory Bulb

Reproducing the example analysis from the original SpatialDE Python package.

library(spatialDE)
library(ggplot2)

Load data

Files originally retrieved from SpatialDE GitHub repository from the following links: https://github.com/Teichlab/SpatialDE/blob/master/Analysis/MouseOB/data/Rep11_MOB_0.csv https://github.com/Teichlab/SpatialDE/blob/master/Analysis/MouseOB/MOB_sample_info.csv

# Expression file used in python SpatialDE. 
data("Rep11_MOB_0")

# Sample Info file used in python SpatialDE
data("MOB_sample_info")

The Rep11_MOB_0 object contains spatial expression data for 16218 genes on 262 spots, with genes as rows and spots as columns.

Rep11_MOB_0[1:5, 1:5]
#>         16.92x9.015 16.945x11.075 16.97x10.118 16.939x12.132 16.949x13.055
#> Nrf1              1             0            0             1             0
#> Zbtb5             1             0            1             0             0
#> Ccnl1             1             3            1             1             0
#> Lrrfip1           2             2            0             0             3
#> Bbs1              1             2            0             4             0
dim(Rep11_MOB_0)
#> [1] 16218   262

The MOB_sample_info object contains a data.frame with coordinates for each spot.

head(MOB_sample_info)
#>                    x      y total_counts
#> 16.92x9.015   16.920  9.015        18790
#> 16.945x11.075 16.945 11.075        36990
#> 16.97x10.118  16.970 10.118        12471
#> 16.939x12.132 16.939 12.132        22703
#> 16.949x13.055 16.949 13.055        18641
#> 16.942x15.088 16.942 15.088        23408

Filter out pratically unobserved genes

Rep11_MOB_0 <- Rep11_MOB_0[rowSums(Rep11_MOB_0) >= 3, ]

Get total_counts for every spot

Rep11_MOB_0 <- Rep11_MOB_0[, row.names(MOB_sample_info)]
MOB_sample_info$total_counts <- colSums(Rep11_MOB_0)
head(MOB_sample_info)
#>                    x      y total_counts
#> 16.92x9.015   16.920  9.015        18790
#> 16.945x11.075 16.945 11.075        36990
#> 16.97x10.118  16.970 10.118        12471
#> 16.939x12.132 16.939 12.132        22703
#> 16.949x13.055 16.949 13.055        18641
#> 16.942x15.088 16.942 15.088        23408

Get coordinates from MOB_sample_info

X <- MOB_sample_info[, c("x", "y")]
head(X)
#>                    x      y
#> 16.92x9.015   16.920  9.015
#> 16.945x11.075 16.945 11.075
#> 16.97x10.118  16.970 10.118
#> 16.939x12.132 16.939 12.132
#> 16.949x13.055 16.949 13.055
#> 16.942x15.088 16.942 15.088

stabilize

The SpatialDE method assumes normally distributed data, so we stabilize the variance of the negative binomial distributed counts data using Anscombe’s approximation. The stabilize() function takes as input a data.frame of expression values with samples in columns and genes in rows. Thus, in this case, we have to transpose the data.

norm_expr <- stabilize(Rep11_MOB_0)
#> + /github/home/.cache/R/basilisk/1.19.0/0/bin/conda create --yes --prefix /github/home/.cache/R/basilisk/1.19.0/spatialDE/1.13.0/env 'python=3.10.14' --quiet -c conda-forge --override-channels
#> + /github/home/.cache/R/basilisk/1.19.0/0/bin/conda install --yes --prefix /github/home/.cache/R/basilisk/1.19.0/spatialDE/1.13.0/env 'python=3.10.14' -c conda-forge --override-channels
#> + /github/home/.cache/R/basilisk/1.19.0/0/bin/conda install --yes --prefix /github/home/.cache/R/basilisk/1.19.0/spatialDE/1.13.0/env -c conda-forge 'python=3.10.14' 'numpy=1.23.5' 'scipy=1.9.3' 'patsy=0.5.3' 'pandas=1.5.2' --override-channels
norm_expr[1:5, 1:5]
#>         16.92x9.015 16.945x11.075 16.97x10.118 16.939x12.132 16.949x13.055
#> Nrf1       1.227749     0.8810934    0.8810934     1.2277491     0.8810934
#> Zbtb5      1.227749     0.8810934    1.2277491     0.8810934     0.8810934
#> Ccnl1      1.227749     1.6889027    1.2277491     1.2277491     0.8810934
#> Lrrfip1    1.484676     1.4846765    0.8810934     0.8810934     1.6889027
#> Bbs1       1.227749     1.4846765    0.8810934     1.8584110     0.8810934

regress_out

Next, we account for differences in library size between the samples by regressing out the effect of the total counts for each gene using linear regression.

resid_expr <- regress_out(norm_expr, sample_info = MOB_sample_info)
resid_expr[1:5, 1:5]
#>         16.92x9.015 16.945x11.075 16.97x10.118 16.939x12.132 16.949x13.055
#> Nrf1    -0.75226761    -1.2352000   -1.0164479    -0.7903289    -1.0973214
#> Zbtb5    0.09242373    -0.3323719    0.1397144    -0.2760560    -0.2533134
#> Ccnl1   -2.77597164    -2.5903783   -2.6092013    -2.8529340    -3.1193883
#> Lrrfip1 -1.92331333    -2.1578718   -2.3849405    -2.5924072    -1.7163300
#> Bbs1    -1.12186064    -1.0266476   -1.3706460    -0.5363646    -1.4666155

run

To reduce running time, the SpatialDE test is run on a subset of 1000 genes. Running it on the complete data set takes about 10 minutes.

# For this example, run spatialDE on the first 1000 genes
sample_resid_expr <- head(resid_expr, 1000)

results <- spatialDE::run(sample_resid_expr, coordinates = X)
head(results[order(results$qval), ])
#>           FSV M     g        l max_delta    max_ll max_mu_hat max_s2_t_hat
#> 752 0.5869020 4 Fabp7 1.135190 0.6872704 -154.7691   5.036316    3.6039400
#> 671 0.5276876 4   Apc 1.135190 0.8739619 -146.9456  -3.922771    2.1552117
#> 884 0.3651030 4 Kcnh3 1.907609 1.6225593 -177.5658  -8.066532    3.7081686
#> 668 0.3837007 4  Pcp4 1.135190 1.5683362 -127.1354 -11.866414   16.9252387
#> 722 0.3345161 4  Penk 1.135190 1.9424986 -163.0520  -9.491798   10.3640716
#> 725 0.3931224 4  Frzb 1.135190 1.5073475 -217.1284   1.180284    0.3109513
#>     model   n       s2_FSV s2_logdelta        time      BIC max_ll_null
#> 752    SE 260 0.0004068815 0.007999172 0.001467228 331.7809   -196.7353
#> 671    SE 260 0.0004218510 0.007939872 0.001475334 316.1339   -178.4892
#> 884    SE 260 0.0006305732 0.013314547 0.001681328 377.3743   -200.6982
#> 668    SE 260 0.0007326393 0.014985240 0.001700640 276.5136   -144.3654
#> 722    SE 260 0.0006993170 0.015755817 0.001932621 348.3468   -179.1260
#> 725    SE 260 0.0009077964 0.018308366 0.001434803 456.4996   -233.0538
#>          LLR         pval         qval
#> 752 41.96623 9.286349e-11 9.286349e-08
#> 671 31.54359 1.950129e-08 9.750644e-06
#> 884 23.13242 1.512193e-06 5.040644e-04
#> 668 17.22998 3.311683e-05 8.279207e-03
#> 722 16.07398 6.091542e-05 1.098165e-02
#> 725 15.92536 6.588988e-05 1.098165e-02

spatial_patterns

Furthermore, we can group spatially variable genes (SVGs) into spatial patterns using automatic expression histology (AEH).

sp <- spatial_patterns(
    sample_resid_expr,
    coordinates = X,
    de_results = de_results,
    n_patterns = 4L, length = 1.5
)
sp$pattern_results
#>                  g pattern membership
#> 595 X2900097C17Rik       2          1
#> 660         Eef1a1       0          1
#> 667         Pmepa1       1          1
#> 668           Pcp4       3          1
#> 671            Apc       2          1
#> 696         Rbfox3       3          1
#> 722           Penk       3          1
#> 725           Frzb       1          1
#> 752          Fabp7       1          1
#> 791        Gm13889       3          1
#> 808           Sez6       3          1
#> 829          Gng13       1          1
#> 833           Gng4       3          1
#> 884          Kcnh3       3          1
#> 906         Camk2b       3          1

Plots

Visualizing one of the most significant genes.

gene <- "Pcp4"

ggplot(data = MOB_sample_info, aes(x = x, y = y, color = norm_expr[gene, ])) +
    geom_point(size = 7) +
    ggtitle(gene) +
    scale_color_viridis_c() +
    labs(color = gene)

Plot Spatial Patterns of Multiple Genes

As an alternative to the previous figure, we can plot multiple genes using the normalized expression values.

ordered_de_results <- de_results[order(de_results$qval), ]

multiGenePlots(norm_expr,
    coordinates = X,
    ordered_de_results[1:6, ]$g,
    point_size = 4,
    viridis_option = "D",
    dark_theme = FALSE
)

Plot Fraction Spatial Variance vs Q-value

FSV_sig(results, ms_results)
#> Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps

SpatialExperiment integration

The SpatialDE workflow can also be executed with a SpatialExperiment object as input.

library(SpatialExperiment)

# For SpatialExperiment object, we neeed to transpose the counts matrix in order
# to have genes on rows and spot on columns. 
# For this example, run spatialDE on the first 1000 genes

partial_counts <- head(Rep11_MOB_0, 1000)

spe <- SpatialExperiment(
  assays = list(counts = partial_counts),
  spatialData = DataFrame(MOB_sample_info[, c("x", "y")]),
  spatialCoordsNames = c("x", "y")
)

out <- spatialDE(spe, assay_type = "counts", verbose = FALSE)
head(out[order(out$qval), ])
#>           FSV M     g        l max_delta    max_ll max_mu_hat max_s2_t_hat
#> 759 0.5853287 4 Fabp7 1.135190 0.6917423 -157.8641   4.805770    3.2961235
#> 684 0.5215592 4   Apc 1.135190 0.8957046 -152.4253  -2.852122    1.1971964
#> 886 0.3425473 4 Kcnh3 1.907609 1.7908397 -201.1959  -7.162902    2.9230555
#> 681 0.3671680 4  Pcp4 1.135190 1.6829213 -124.8416  -9.209852   10.0688118
#> 735 0.3820350 4  Frzb 1.135190 1.5794319 -239.8355   1.156500    0.3240445
#> 832 0.2378540 4 Gng13 1.907609 2.9897888 -204.1019   3.648425    0.7506023
#>     model   n       s2_FSV s2_logdelta        time      BIC max_ll_null
#> 759    SE 260 0.0004113112 0.008071972 0.001441717 337.9709   -199.4754
#> 684    SE 260 0.0004271087 0.008023534 0.001456261 327.0933   -183.5046
#> 886    SE 260 0.0006874927 0.015203449 0.001703501 424.6346   -221.4201
#> 681    SE 260 0.0008102845 0.017044171 0.001708746 271.9259   -140.0619
#> 735    SE 260 0.0009344367 0.019163063 0.001443863 501.9136   -255.0426
#> 832    SE 260 0.0008862383 0.027973295 0.001982689 430.4466   -218.4267
#>          LLR         pval         qval
#> 759 41.61134 1.113458e-10 1.113458e-07
#> 684 31.07932 2.476965e-08 1.238482e-05
#> 886 20.22416 6.887758e-06 2.295919e-03
#> 681 15.22028 9.567033e-05 1.926777e-02
#> 735 15.20713 9.633886e-05 1.926777e-02
#> 832 14.32476 1.538286e-04 2.563810e-02

Plot Spatial Patterns of Multiple Genes (using SpatialExperiment object)

We can plot spatial patterns of multiples genes using the spe object.

spe_results <- out[out$qval < 0.05, ]

ordered_spe_results <- spe_results[order(spe_results$qval), ]

multiGenePlots(spe,
    assay_type = "counts",
    ordered_spe_results[1:6, ]$g,
    point_size = 4,
    viridis_option = "D",
    dark_theme = FALSE
)

Classify spatially variable genes with model_search and spatial_patterns

msearch <- modelSearch(spe,
    de_results = out, qval_thresh = 0.05,
    verbose = FALSE
)

head(msearch)
#>        BIC        FSV      LLR M              g        l max_delta    max_ll
#> 0 287.6679 0.06406448      NaN 4 X2900097C17Rik 5.386796 14.914618 -132.7126
#> 1 263.5043 0.09590705      NaN 4           Pcp4 9.052138  9.860377 -120.6308
#> 2 370.8671 0.09681127      NaN 4         Rbfox3 9.052138  9.758510 -174.3122
#> 3 428.9359 0.08063297      NaN 4          Gng13 9.052138 11.926341 -203.3466
#> 4 401.7737 0.14867725      NaN 4          Kcnh3 9.052138  5.989364 -189.7655
#> 5 479.4804 0.36943862 14.01272 4         Pmepa1 1.135190  1.666576 -228.6188
#>   max_mu_hat max_s2_t_hat model   n       s2_FSV s2_logdelta        time
#> 0 -3.6572048    0.8870009   PER 260 0.0001665651  0.03805707 0.007651329
#> 1 -9.1839788    8.0150340   PER 260 0.0004344069  0.05008671 0.001957417
#> 2 -7.7847610    5.8291532   PER 260 0.0004545203  0.05160913 0.001939058
#> 3  3.6145343    1.0492566   PER 260 0.0004240747  0.06523479 0.001945972
#> 4 -7.1054452    7.8745493   PER 260 0.0006681386  0.03907065 0.001951933
#> 5  0.1238686    0.1463362    SE 260 0.0007025967  0.01471845 0.001713753
#>     PER_prob      SE_prob  linear_prob         pval        qval max_ll_null
#> 0 0.55766485 4.423351e-01 5.814904e-10 3.104656e-04 0.032188921   -145.8346
#> 1 0.99977999 2.200120e-04 3.452128e-15 9.567033e-05 0.019267772   -140.0619
#> 2 0.99999990 1.031935e-07 1.553124e-16 3.218892e-04 0.032188921   -195.2940
#> 3 0.81916872 1.808313e-01 1.701216e-11 1.538286e-04 0.025638100   -218.4267
#> 4 1.00000000 1.179426e-10 8.336815e-26 6.887758e-06 0.002295919   -221.4201
#> 5 0.03133507 9.686649e-01 1.700936e-10 1.815779e-04 0.025717859   -242.6315
spatterns <- spatialPatterns(spe,
    de_results = de_results, qval_thresh = 0.05,
    n_patterns = 4L, length = 1.5, verbose = FALSE
)

spatterns$pattern_results
#>                  g pattern membership
#> 595 X2900097C17Rik       2          1
#> 660         Eef1a1       3          1
#> 667         Pmepa1       0          1
#> 668           Pcp4       3          1
#> 671            Apc       0          1
#> 696         Rbfox3       3          1
#> 722           Penk       3          1
#> 725           Frzb       0          1
#> 752          Fabp7       0          1
#> 791        Gm13889       3          1
#> 808           Sez6       3          1
#> 829          Gng13       0          1
#> 833           Gng4       3          1
#> 884          Kcnh3       3          1
#> 906         Camk2b       3          1

sessionInfo

Session info
#> [1] "2024-11-30 04:51:58 UTC"
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] SpatialExperiment_1.17.0    SingleCellExperiment_1.29.1
#>  [3] SummarizedExperiment_1.37.0 Biobase_2.67.0             
#>  [5] GenomicRanges_1.59.1        GenomeInfoDb_1.43.2        
#>  [7] IRanges_2.41.1              S4Vectors_0.45.2           
#>  [9] BiocGenerics_0.53.3         generics_0.1.3             
#> [11] MatrixGenerics_1.19.0       matrixStats_1.4.1          
#> [13] ggplot2_3.5.1               spatialDE_1.13.0           
#> [15] BiocStyle_2.35.0           
#> 
#> loaded via a namespace (and not attached):
#>  [1] gtable_0.3.6            dir.expiry_1.15.0       rjson_0.2.23           
#>  [4] xfun_0.49               bslib_0.8.0             ggrepel_0.9.6          
#>  [7] lattice_0.22-6          vctrs_0.6.5             tools_4.4.2            
#> [10] parallel_4.4.2          tibble_3.2.1            fansi_1.0.6            
#> [13] pkgconfig_2.0.3         Matrix_1.7-1            checkmate_2.3.2        
#> [16] lifecycle_1.0.4         GenomeInfoDbData_1.2.13 farver_2.1.2           
#> [19] compiler_4.4.2          munsell_0.5.1           htmltools_0.5.8.1      
#> [22] sys_3.4.3               buildtools_1.0.0        sass_0.4.9             
#> [25] yaml_2.3.10             pillar_1.9.0            crayon_1.5.3           
#> [28] jquerylib_0.1.4         cachem_1.1.0            DelayedArray_0.33.2    
#> [31] magick_2.8.5            abind_1.4-8             basilisk_1.19.0        
#> [34] tidyselect_1.2.1        digest_0.6.37           dplyr_1.1.4            
#> [37] labeling_0.4.3          maketools_1.3.1         fastmap_1.2.0          
#> [40] grid_4.4.2              colorspace_2.1-1        cli_3.6.3              
#> [43] SparseArray_1.7.2       magrittr_2.0.3          S4Arrays_1.7.1         
#> [46] utf8_1.2.4              withr_3.0.2             backports_1.5.0        
#> [49] filelock_1.0.3          scales_1.3.0            UCSC.utils_1.3.0       
#> [52] rmarkdown_2.29          XVector_0.47.0          httr_1.4.7             
#> [55] gridExtra_2.3           reticulate_1.40.0       png_0.1-8              
#> [58] evaluate_1.0.1          knitr_1.49              basilisk.utils_1.19.0  
#> [61] viridisLite_0.4.2       rlang_1.1.4             Rcpp_1.0.13-1          
#> [64] glue_1.8.0              BiocManager_1.30.25     jsonlite_1.8.9         
#> [67] R6_2.5.1                zlibbioc_1.52.0