knitr::opts_chunk$set(echo = TRUE)

See the source code of this vignette for the script used to compute the Moran's I values analyzed in 2. Tumor spatial autocorrelation and clinical prognosis.

library(sf)
library(stars)
library(raster)
library(adespatial)
library(maptools)
library(spdep)
library(stringr)
library(readr)
library(dplyr)

Image loading and spatial processing

Download the mibiTOF data from the Angelo Lab website. Move the datasets into the data directory.

This portion of the script converts each image into a more analysis-friendly format, and computes Gabriel graphs/neighborhoods, which is used as the spatial weighting matrix (SWM) in the Moran's I calculation below.

Some parts of this image processing steps of this script are adapted from code by Pratheepa Jeganathan and Kris Sankaran. Github: @PratheepaJ @krisrs1128

DATAPATH <- "../data"
SAVEPATH <- "../data/spatial_proc"

data_dir <- file.path(DATAPATH, "TNBC_shareCellData")

process_tiffs <- function(dirname){
  fns <- list.files(data_dir, str_interp(".tiff"))
  moran_list <- list()

  for(fn in fns){
    samp_list <- list()

    f <- paste(data_dir, fn, sep = '/')

    samp_id <- str_split_fixed(fn,'_',2)[1] # with p
    sample_id <- as.numeric(str_split_fixed(str_split_fixed(fn,'_',2)[1],'p',2)[2]) # without p as numeric
    samp_list[['samp']] <- samp_id

    polys <- read_stars(f) %>%
      st_as_sf(merge = TRUE) %>%
      st_cast("POLYGON")
    colnames(polys)[1] <- colnames(cell_data)[2]

    cell_data <- read_csv(file.path(data_dir, "cellData.csv")) %>%
      filter(SampleID == sample_id)

    polys <- polys %>%
      inner_join(cell_data) %>%
      group_by(cellLabelInImage) %>% # some regions get split into two adjacent polys --> merge
      summarise_all(first)

    samp_list[['polys']] <- polys

    nb.maf <- poly2nb(polys)
    xy <- polys %>% st_cast("POINT") %>% st_coordinates()
    nbgab <- graph2nb(gabrielneigh(xy), sym = TRUE)

    samp_list[['nb.maf']] <- nb.maf
    samp_list[['xy']] <- xy
    samp_list[['nbgab']] <- nbgab

    nblgab <- nb2listw(nbgab)
    distgab <- nbdists(nbgab, xy)

    samp_list[['nblgab']] <- nblgab
    samp_list[['distgab']] <- distgab

    saveRDS(samp_list, file = paste0(SAVEPATH,'/', samp_id,'.rds'))
  }
}

process_tiffs(dirname = data_dir)

Computing Moran's Index

This portion reads in the processed files generated above and computes Moran's I for each variable, which includes attributes like protein expression values (Moran's I is computed for each), cell type dummy variable, and cell size.

The output of this code is the resMoranTest.csv file included in the data folder.

rdsfolder <- SAVEPATH
pFiles <- file.path(rdsfolder, dir(rdsfolder))

runMoranScript <- function(pFiles){
  pFilesNames <-  reshape2::colsplit(reshape2::colsplit(pFiles,"/",1:4)[,4],"\\.",1:2)[,1]

  res<-lapply(pFiles,sc)
  cols <- unique(unlist(sapply(res, names)))

  resM <- matrix(NA, ncol=length(cols), nrow=length(pFilesNames), dimnames = list(pFilesNames,cols))
  for (i in seq_along(res)) resM[i,names(res[[i]])]<-res[[i]]
  return(resM)
}

sc<-function(pFile) {
  print(pFile)
  p<-readRDS(pFile)
  if (!exists("p")) Sys.sleep(5)
  print(ls())
  mp<- moranS(sf=p)
  rm(p)
  return(mp)
}

moranS<-function(sf){
  # Exclude non number cols or where max-min =0
  to_drop <- c("cellLabelInImage" ,"SampleID" , "geometry", # non numeric
               'Na','Si','P','Ca','Fe','Background', 'dsDNA','Ta','Au',  # elemental / irrelevant
               'tumorCluster', 'immuneCluster','immuneGroup','Group') # factors
  p <- sf$polys
  nbl <- sf$nblgab
  n <- colnames(p) [! colnames(p) %in% to_drop]
  n <- n[!apply(p[, n,drop=TRUE],2, function(x)max(x)-min(x))==0]
  tt <- lapply(n, function(x) (moran.test(p[,x, drop=TRUE],nbl)))
  ms <- sapply(tt, function(x) x$estimate[1])
  names(ms) <- n
  return(ms)
}

resMoranTest <- runMoranScript(pFiles)
write.csv(resMoranTest, file=file.path(SAVEPATH,"resMoranTest.csv"))


laurenhsu1/BIRSBIO2020.scProteomics.exploratory documentation built on July 26, 2020, 7:23 a.m.