Introduction

mistyR can be used to analyze spatial transcriptomics data sets stored in SeuratObject with just a couple of functions. In this vignette we demonstrate how to build a user friendly workflow starting from data preprocessing, through running mistyR, to analysis of results, i.e., the spatial interactions between markers stored in alternative assays and specific locations.

In principle mistyR can be used with data extracted from a SeuratObject only. For the sake of completeness of the process of modeling with mistyR we demonstrate a complete workflow, including data preprocessing with Seurat. The functions provided in this notebook can be adapted to the user preference but the main objective is to exploit as much as possible the flexibility of workflow creation from mistyR and object manipulation from Seurat.

# MISTy
library(mistyR)
library(future)

# Seurat
library(Seurat)

# data manipulation
library(Matrix)
library(tibble)
library(dplyr)
library(purrr)

# normalization
library(sctransform)

# resource
library(progeny)

# setup parallel execution
options(future.globals.maxSize = 1024^3)
plan(multisession)

The skeleton of mistyR pipelines

For user convenience and to facilitate the use of mistyR and Seurat, run_misty_seurat() is a function describing a general skeleton of a mistyR workflow for analysing a 10x Visium slide given in a Seurat object. The function allows for:

  1. Defining a number of assays/views to be used in the model.
  2. Defining the type of spatial context for each view and their parameters.
  3. Defining the specific assay and features to be used for the view creation of each view.
  4. Defining the specific spots where the model will be built.
run_misty_seurat <- function(visium.slide,
                             # Seurat object with spatial transcriptomics data.
                             view.assays,
                             # Named list of assays for each view.
                             view.features = NULL,
                             # Named list of features/markers to use.
                             # Use all by default.
                             view.types,
                             # Named list of the type of view to construct
                             # from the assay.
                             view.params,
                             # Named list with parameters (NULL or value)
                             # for each view.
                             spot.ids = NULL,
                             # spot IDs to use. Use all by default.
                             out.alias = "results"
                             # folder name for output
) {

  # Extracting geometry
  geometry <- GetTissueCoordinates(visium.slide,
    cols = c("row", "col"), scale = NULL
  )

  # Extracting data
  view.data <- map(view.assays,
    extract_seurat_data,
    geometry = geometry,
    visium.slide = visium.slide
  )

  # Constructing and running a workflow
  build_misty_pipeline(
    view.data = view.data,
    view.features = view.features,
    view.types = view.types,
    view.params = view.params,
    geometry = geometry,
    spot.ids = spot.ids,
    out.alias = out.alias
  )
}

Extracting specific information from Seurat objects

These are helper functions that allow to extract from Seurat objects specific assays and transform them into tibble which is a preferred format for mistyR.

# Extracts data from an specific assay from a Seurat object
# and aligns the IDs to the geometry
extract_seurat_data <- function(visium.slide,
                                assay,
                                geometry) {
  data <- GetAssayData(visium.slide, assay = assay) %>%
    t() %>%
    as_tibble(rownames = NA)

  return(data %>% slice(match(rownames(.), rownames(geometry))))
}

# Filters data to contain only features of interest
filter_data_features <- function(data,
                                 features) {
  if (is.null(features)) features <- colnames(data)

  return(data %>% rownames_to_column() %>%
    select(rowname, all_of(features)) %>% rename_with(make.names) %>%
    column_to_rownames())
}

View creation

This helper function wraps the three options available by default for view creation in mistyR with additional features that allow for creating views for specific spots.

# Builds views depending on the paramaters defined
create_default_views <- function(data,
                                 view.type,
                                 view.param,
                                 view.name,
                                 spot.ids,
                                 geometry) {
  view.data.init <- create_initial_view(data)

  if (!(view.type %in% c("intra", "para", "juxta"))) {
    view.type <- "intra"
  }

  if (view.type == "intra") {
    data.red <- view.data.tmp$data %>%
      rownames_to_column() %>%
      filter(rowname %in% spot.ids) %>%
      select(-rowname)
  } else if (view.type == "para") {
    view.data.tmp <- view.data.init %>%
      add_paraview(geometry, l = view.param)

    data.ix <- paste0("paraview.", view.param)
    data.red <- view.data.tmp[[data.ix]]$data %>%
      mutate(rowname = rownames(data)) %>%
      filter(rowname %in% spot.ids) %>%
      select(-rowname)
  } else if (view.type == "juxta") {
    view.data.tmp <- view.data.init %>%
      add_juxtaview(
        positions = geometry,
        neighbor.thr = view.param
      )

    data.ix <- paste0("juxtaview.", view.param)
    data.red <- view.data.tmp[[data.ix]]$data %>%
      mutate(rowname = rownames(data)) %>%
      filter(rowname %in% spot.ids) %>%
      select(-rowname)
  }

  if (is.null(view.param) == TRUE) {
    misty.view <- create_view(
      paste0(view.name),
      data.red
    )
  } else {
    misty.view <- create_view(
      paste0(view.name, "_", view.param),
      data.red
    )
  }

  return(misty.view)
}

Building a mistyR pipeline and running the model

Finally, we show a wrapper function build_misty_pipeline() that allows you to build automatically a mistyR workflow from a list of data frames, with specified spatial context, parameters, features and locations.

# Builds automatic MISTy workflow and runs it
build_misty_pipeline <- function(view.data,
                                 view.features,
                                 view.types,
                                 view.params,
                                 geometry,
                                 spot.ids = NULL,
                                 out.alias = "default") {

  # Adding all spots ids in case they are not defined
  if (is.null(spot.ids)) {
    spot.ids <- rownames(view.data[[1]])
  }

  # First filter the features from the data
  view.data.filt <- map2(view.data, view.features, filter_data_features)

  # Create initial view
  views.main <- create_initial_view(view.data.filt[[1]] %>%
    rownames_to_column() %>%
    filter(rowname %in% spot.ids) %>%
    select(-rowname))

  # Create other views
  view.names <- names(view.data.filt)

  all.views <- pmap(list(
    view.data.filt[-1],
    view.types[-1],
    view.params[-1],
    view.names[-1]
  ),
  create_default_views,
  spot.ids = spot.ids,
  geometry = geometry
  )

  pline.views <- add_views(
    views.main,
    unlist(all.views, recursive = FALSE)
  )


  # Run MISTy
  run_misty(pline.views, out.alias)
}

Use case

As an example, we will analyze a 10X Visium spatial gene expression dataset of one breast cancer section (Block A Section 1) available here [https://support.10xgenomics.com/spatial-gene-expression/datasets]. We assume that the required files Feature/cell matrix HDF5 (filtered) and the Spatial imaging data (extracted) are in a folder named ‘breast_A_1’ in the current working directory.

We will explore the spatial interactions of the Hypoxia pathway responsive genes with the Estrogen pathway responsive genes. To this end we will use the model matrix with top significant genes for each pathway from the package progeny and the previously described functions.

Loading and normalizing the data sets

folder <- "breast_A_1"

# Load the HDF5 object and normalize the expression
seurat.vs <-
  Load10X_Spatial(
    data.dir = folder,
    filename = "V1_Breast_Cancer_Block_A_Section_1_filtered_feature_bc_matrix.h5"
  )

sct.data <- vst(GetAssayData(
  object = seurat.vs,
  slot = "counts",
  assay = "Spatial"
),
verbosity = 0
)

seurat.vs[["SCT"]] <- CreateAssayObject(data = sct.data$y)

Filtering genes that are expressed in at least 5% of spots

gene.expression <- GetAssayData(seurat.vs, assay = "SCT")
coverage <- rowSums(gene.expression > 0) / ncol(gene.expression)
slide.markers <- names(which(coverage >= 0.05))

Defining Hypoxia and Estrogen responsive genes

For this simple example we will pick the top 15 most significantly responsive genes of each pathway from the model matrix from the package progeny.

estrogen.footprints <- getModel(top = 15) %>%
  rownames_to_column("gene") %>%
  filter(Estrogen != 0, gene %in% slide.markers) %>%
  pull(gene)

hypoxia.footprints <- getModel(top = 15) %>%
  rownames_to_column("gene") %>%
  filter(Hypoxia != 0, gene %in% slide.markers) %>%
  pull(gene)

Defining the parameters of the workflow

In this example we will explain the expression of hypoxia responsive genes in terms of three different views:

  1. Main (intrinsic) view (containing genes to be predicted): intrinsic expression of hypoxia.footprints.
  2. Paraview - hypoxia genes: expression of hypoxia markers (hypoxia.footprints) in a significance radius of 10 spots.
  3. Paraview - estrogen genes: expression of estrogen markers (estrogen.footprints) in a significance radius of 10 spots.
# Define assay for each view
view.assays <- list(
  "main" = "SCT",
  "para.hypoxia" = "SCT",
  "para.estrogen" = "SCT"
)

# Define features for each view
view.features <- list(
  "main" = hypoxia.footprints,
  "para.hypoxia" = hypoxia.footprints,
  "para.estrogen" = estrogen.footprints
)

# Define spatial context for each view
view.types <- list(
  "main" = "intra",
  "para.hypoxia" = "para",
  "para.estrogen" = "para"
)

# Define additional parameters (l in the case of paraview)
view.params <- list(
  "main" = NULL,
  "para.hypoxia" = 10,
  "para.estrogen" = 10
)

misty.out <- "vignette_model_seurat"

Run MISTy pipeline and collect results

Now that we have preprocessed the data and have decided on a question to analyze, we can create and run a mistyR workflow.

misty.results <- run_misty_seurat(
  visium.slide = seurat.vs,
  view.assays = view.assays,
  view.features = view.features,
  view.types = view.types,
  view.params = view.params,
  spot.ids = NULL, # Using the whole slide
  out.alias = misty.out
) %>%
  collect_results()
#> 
#> Generating paraview
#> 
#> Generating paraview
#> Training models
#> 
#> Collecting improvements
#> 
#> Collecting contributions
#> 
#> Collecting importances
#> 
#> Aggregating

Interpretation and downstream analysis

MISTy gives answers to three general questions:

1. How much can the broader spatial context explain the expression of markers (in contrast to the intraview)?

This can be observed in the gain in R2 (or RMSE) of using the multiview model in contrast to the single main view model.

misty.results %>%
  plot_improvement_stats("gain.R2") %>%
  plot_improvement_stats("gain.RMSE")

In this example, PGK1 is a marker whose expression can be explained better by modeling the broader spatial context around each spot.

We can further inspect the significance of the gain in variance explained, by the assigned p-value of improvement based on cross-validation.

misty.results$improvements %>%
  filter(measure == "p.R2") %>%
  arrange(value)
#> Registered S3 method overwritten by 'cli':
#>   method     from         
#>   print.boxx spatstat.geom
#> # A tibble: 15 × 4
#>    target  sample                                                measure   value
#>    <chr>   <chr>                                                 <chr>     <dbl>
#>  1 PGK1    /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    1.11e-4
#>  2 ANKZF1  /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    4.42e-3
#>  3 INSIG2  /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    6.26e-3
#>  4 FUT11   /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    1.50e-2
#>  5 NDRG1   /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    2.20e-2
#>  6 PDK1    /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    2.66e-2
#>  7 EGLN1   /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    2.89e-2
#>  8 HILPDA  /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    3.68e-2
#>  9 ENO2    /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    1.05e-1
#> 10 PFKFB4  /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    1.07e-1
#> 11 FAM162A /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    1.12e-1
#> 12 BNIP3L  /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    1.19e-1
#> 13 ANKRD37 /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    1.57e-1
#> 14 KDM3A   /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    1.89e-1
#> 15 GBE1    /home/runner/work/mistyR/mistyR/vignettes/vignette_m… p.R2    6.89e-1

In general, the significant gain in R2 can be interpreted as the following:

“We can better explain the expression of marker X, when we consider additional views, other than the intrinsic view.”

2.How much do different view components contribute to explaining the expression?

misty.results %>% plot_view_contributions()


misty.results$contributions.stats %>% filter(target == "PGK1")
#> # A tibble: 3 × 6
#>   target view              mean fraction   p.mean  p.sd
#>   <chr>  <chr>            <dbl>    <dbl>    <dbl> <dbl>
#> 1 PGK1   intra            0.485    0.369 9.10e-70    NA
#> 2 PGK1   para.estrogen_10 0.623    0.475 8.41e-58    NA
#> 3 PGK1   para.hypoxia_10  0.204    0.156 1.55e- 7    NA

In the case of PGK1, we observe that around 50% of the contribution in the final model comes from the expression of other markers of hypoxia intrinsically or from the broader tissue structure. The rest comes from the expression of estrogen responsive genes from the broader tissue structure.

3.What are the specific relations that can explain the contributions?

To explain the contributions, we can visualize the importances of markers coming from each view separately as predictors of the expression of the intrinsic markers of hypoxia.

First, the intrinsic importances of the hypoxia markers.

misty.results %>% plot_interaction_heatmap(view = "intra")

These importances are associated to the relationship between markers in the same spot. Let’s pick the best predictor of PGK1 to confirm this:

misty.results$importances.aggregated[["intra"]] %>%
  select(Predictor, PGK1) %>%
  arrange(-PGK1)
#> # A tibble: 15 × 2
#>    Predictor    PGK1
#>    <chr>       <dbl>
#>  1 NDRG1      2.62  
#>  2 FAM162A    1.82  
#>  3 HILPDA     0.207 
#>  4 PFKFB4     0.198 
#>  5 ENO2       0.0514
#>  6 EGLN1     -0.327 
#>  7 ANKRD37   -0.449 
#>  8 BNIP3L    -0.477 
#>  9 GBE1      -0.501 
#> 10 INSIG2    -0.542 
#> 11 KDM3A     -0.577 
#> 12 ANKZF1    -0.604 
#> 13 PDK1      -0.699 
#> 14 FUT11     -0.711 
#> 15 PGK1      NA

SpatialFeaturePlot(seurat.vs, features = c("PGK1", "NDRG1"))
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.

#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.

Second, the paraview importances of the hypoxia markers.

misty.results %>% plot_interaction_heatmap(view = "para.hypoxia_10")

These importances are associated to the relationship between markers in the spot and markers in the neighborhood (controlled by our parameter l).

SpatialFeaturePlot(seurat.vs, features = c("PGK1", "PFKFB4"))
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.

#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.

As expected, the expression of PFKFB4 (the best predictor from this view) in the neighborhood of each spot allows to explain the expression of PGK1.

Finally, the paraview importances of the estrogen markers. We will inspect the best predictor in this view.

misty.results %>% plot_interaction_heatmap(view = "para.estrogen_10")

SpatialFeaturePlot(seurat.vs, features = c("PGK1", "TPD52L1"))
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.

#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.

It is visible that in some areas the local expression of TPD52L1 overlaps with the areas with the highest expression of PGK1.

Important notes

  • The relationships captured in the importances are not to assumed or interpreted as linear or casual.

  • 1-to-1 importances between predictor and markers should always be interpreted in the context of the other predictors, since training MISTy models is multivariate predictive task.

Other use cases

The shown example is not the only way to use mistyR to analyze spatial transcriptomics data. Similar and complementary workflows can be constructed to describe different aspects of biology, for example:

  • Spatial interactions between pathway activities and putative ligands, as shown here.

  • Spatial interactions between cell-state lineage markers and putative ligands, as shown here.

  • Spatial interactions between cell-type abundances leveraging deconvolution methods and creating descriptions of cell colocalization and tissue architecture.

Additionally, mistyR through the function collect_results() allows you to group the results of multiple slides, allowing for a more robust, integrative or comparative analysis of spatial interactions.


See also

Publication

Jovan Tanevski, Attila Gabor, Ricardo Omar Ramirez Flores, Denis Schapiro, Julio Saez-Rodriguez (2020). Explainable multi-view framework for dissecting inter-cellular signaling from highly multiplexed spatial data. bioRxiv. doi: 10.1101/2020.05.08.084145

Session info

Here is the output of sessionInfo() at the point when this document was compiled:

#> R version 4.1.0 (2021-05-18)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.2 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] progeny_1.14.0     sctransform_0.3.2  purrr_0.3.4        dplyr_1.0.7       
#>  [5] tibble_3.1.3       Matrix_1.3-3       SeuratObject_4.0.2 Seurat_4.0.3      
#>  [9] future_1.21.0      mistyR_1.0.3       BiocStyle_2.20.2  
#> 
#> loaded via a namespace (and not attached):
#>   [1] systemfonts_1.0.2     plyr_1.8.6            igraph_1.2.6         
#>   [4] lazyeval_0.2.2        splines_4.1.0         listenv_0.8.0        
#>   [7] scattermore_0.7       ggplot2_3.3.5         digest_0.6.27        
#>  [10] htmltools_0.5.1.1     fansi_0.5.0           magrittr_2.0.1       
#>  [13] memoise_2.0.0         tensor_1.5            cluster_2.1.2        
#>  [16] ROCR_1.0-11           globals_0.14.0        matrixStats_0.60.0   
#>  [19] R.utils_2.10.1        pkgdown_1.6.1         spatstat.sparse_2.0-0
#>  [22] colorspace_2.0-2      ggrepel_0.9.1         textshaping_0.3.5    
#>  [25] xfun_0.25             crayon_1.4.1          jsonlite_1.7.2       
#>  [28] spatstat.data_2.1-0   survival_3.2-11       zoo_1.8-9            
#>  [31] glue_1.4.2            polyclip_1.10-0       gtable_0.3.0         
#>  [34] leiden_0.3.9          future.apply_1.7.0    abind_1.4-5          
#>  [37] scales_1.1.1          DBI_1.1.1             miniUI_0.1.1.1       
#>  [40] Rcpp_1.0.7            viridisLite_0.4.0     xtable_1.8-4         
#>  [43] reticulate_1.20       spatstat.core_2.3-0   bit_4.0.4            
#>  [46] htmlwidgets_1.5.3     httr_1.4.2            RColorBrewer_1.1-2   
#>  [49] ellipsis_0.3.2        ica_1.0-2             pkgconfig_2.0.3      
#>  [52] R.methodsS3_1.8.1     farver_2.1.0          sass_0.4.0           
#>  [55] uwot_0.1.10           deldir_0.2-10         utf8_1.2.2           
#>  [58] tidyselect_1.1.1      labeling_0.4.2        rlang_0.4.11         
#>  [61] reshape2_1.4.4        later_1.2.0           munsell_0.5.0        
#>  [64] tools_4.1.0           cachem_1.0.5          cli_3.0.1            
#>  [67] generics_0.1.0        ggridges_0.5.3        evaluate_0.14        
#>  [70] stringr_1.4.0         fastmap_1.1.0         yaml_2.2.1           
#>  [73] ragg_1.1.3            goftest_1.2-2         knitr_1.33           
#>  [76] bit64_4.0.5           fs_1.5.0              fitdistrplus_1.1-5   
#>  [79] RANN_2.6.1            pbapply_1.4-3         nlme_3.1-152         
#>  [82] mime_0.11             R.oo_1.24.0           hdf5r_1.3.3          
#>  [85] compiler_4.1.0        plotly_4.9.4.1        filelock_1.0.2       
#>  [88] png_0.1-7             spatstat.utils_2.2-0  bslib_0.2.5.1        
#>  [91] stringi_1.7.3         highr_0.9             desc_1.3.0           
#>  [94] lattice_0.20-44       vctrs_0.3.8           pillar_1.6.2         
#>  [97] lifecycle_1.0.0       furrr_0.2.3           BiocManager_1.30.16  
#> [100] spatstat.geom_2.2-2   lmtest_0.9-38         jquerylib_0.1.4      
#> [103] RcppAnnoy_0.0.19      data.table_1.14.0     cowplot_1.1.1        
#> [106] irlba_2.3.3           httpuv_1.6.1          patchwork_1.1.1      
#> [109] R6_2.5.0              bookdown_0.22         promises_1.2.0.1     
#> [112] KernSmooth_2.23-20    gridExtra_2.3         parallelly_1.27.0    
#> [115] codetools_0.2-18      MASS_7.3-54           assertthat_0.2.1     
#> [118] rprojroot_2.0.2       rlist_0.4.6.1         mgcv_1.8-35          
#> [121] parallel_4.1.0        grid_4.1.0            rpart_4.1-15         
#> [124] tidyr_1.1.3           rmarkdown_2.10        distances_0.1.8      
#> [127] Rtsne_0.15            shiny_1.6.0