In lulizou/spASE: Detecting Allele-Specific Expression in Spatial Transcriptomics

library(dplyr)
library(spASE)
set.seed(params$seed)

expit <- function(x) {
  return(exp(x)/(1+exp(x)))
}

# Slide-seq data
# puck 4 needs to be rotated and shifted
load('pucks.3-4.spatial.RData')
puck3.spatial <- puck3.spatial %>%
  mutate(total = CAST+X129) %>%
  mutate(X = xcoord/1000, Y = ycoord/1000)
puck4.spatial <- puck4.spatial %>%
  mutate(total = CAST+X129) %>%
  mutate(X = xcoord/1000, Y = ycoord/1000)


angle <- 261*pi/180
x.shift <- 0.37
y.shift <- -0.16
x.center <- 3.25
y.center <- 3.025
# rotate and shift puck 4
new.X <- (puck4.spatial$X-x.center)*cos(angle) - (puck4.spatial$Y-y.center)*sin(angle)+x.center + x.shift
new.Y <- (puck4.spatial$X-x.center)*sin(angle) + (puck4.spatial$Y-y.center)*cos(angle)+y.center + y.shift
puck4.spatial$X <- new.X
puck4.spatial$Y <- new.Y

# load in RCTD results
rctd.results.puck3 <- readRDS('rctd_puck_3.rds')
coords <- puck3.spatial %>%
  dplyr::select(X, Y, bead, total) %>%
  group_by(X, Y, bead) %>%
  summarise(nUMI = sum(total))
puck3.rctd <- rctd.results.puck3$results_df %>%
  mutate(bead = rownames(rctd.results.puck3$results_df)) %>%
  left_join(coords) %>%
  filter(spot_class == 'singlet')
rctd.results.puck4 <- readRDS('rctd_puck_4.rds')
coords <- puck4.spatial %>%
  dplyr::select(X, Y, bead, total) %>%
  group_by(X, Y, bead) %>%
  summarise(nUMI = sum(total))
puck4.rctd <- rctd.results.puck4$results_df %>%
  mutate(bead = rownames(rctd.results.puck4$results_df)) %>%
  left_join(coords) %>%
  filter(spot_class == 'singlet')

# combine RCTD annotations and both pucks
puck3.spatial <- left_join(puck3.spatial, puck3.rctd %>% dplyr::select(bead, first_type, spot_class), by = 'bead')
puck4.spatial <- left_join(puck4.spatial, puck4.rctd %>% dplyr::select(bead, first_type, spot_class), by = 'bead')

joint <- bind_rows(puck3.spatial, puck4.spatial)
joint <- joint %>%
  filter(!grepl('mt-',gene)) %>% 
  filter(bead!='TTTTTTTTTTTTTT') %>%
  filter(bead!='GGGGGGGGGGGGGG') %>%
  filter(bead!='TTTGGAATTTAACT') %>% # duplicated
  mutate(gene = as.factor(as.character(gene)), bead = as.factor(as.character(bead)))

# get locations at a grid of points
xcoords <- seq(min(joint$X), max(joint$X), by = 0.1)
ycoords <- seq(min(joint$Y), max(joint$Y), by = 0.1)
predictCoords <- expand.grid(xcoords, ycoords)
colnames(predictCoords) <- c('X', 'Y')
predictCoords <- data.frame(predictCoords) %>%
  filter(((X-3.2)^2+(Y-3.1)^2<=2.3^2) | ((X-x.center-x.shift)^2+(Y-y.center-y.shift)^2<=2.3^2))

singlets <- joint %>% 
  filter(spot_class == 'singlet') %>%
  mutate(gene = as.factor(as.character(gene)), bead = as.factor(as.character(bead)))

get_spline_matrix <- function(coords, df = 15) {
  center_coords <- coords
  center_coords <- sweep(center_coords, 2, apply(center_coords,2, mean), '-')
  center_coords <- center_coords / sd(as.matrix(center_coords))
  sm <- smoothCon(s(X,Y,k=df,fx=T,bs='tp'),data=center_coords)[[1]]
  mm <- as.matrix(data.frame(sm$X))
  X2 <- cbind(mm[,(df - 2):df], mm[,1:(df-3)]) #swap intercept, x, and y
  X2[,2:df] <- sweep(X2[,2:df], 2, apply(X2[,2:df],2, mean), '-')
  X2[,2:df] <- sweep(X2[,2:df], 2, apply(X2[,2:df],2, sd), '/') #standardize
  return(X2)
}

pphi_to_ab <- function(p, phi) {
  a <- as.numeric(p*(1-phi)/phi)
  b <- as.numeric((1-phi)*(1-p)/phi)
  return(list(a=a,b=b))
}

# Simulation 1: spatial pattern across all pixels
# simulate a random spatial pattern by taking a random linear combination
# of the basis functions
unique_coords <- joint %>%
  dplyr::select(bead,X,Y,first_type,spot_class) %>%
  distinct()
df <- 15
pval.thresh <- 0.01
X.basis <- get_spline_matrix(unique_coords[,2:3], df)
phis <- c(0.01, 0.1, 0.3, 0.5, 0.8)
#ncells <- c(50,100,250,500,1000,10000)
ncells <- params$ncells
numis <- c(1,2,3,5,10,20,30,40,50,60,70,80,90,100,125,150)
niter <- 100
total.entries <- length(phis)*length(numis)
total.pval.entries <- total.entries*niter
if (params$nullsim) {
  bb.results <- data.frame(numi = numeric(total.entries),
                           ncell = numeric(total.entries),
                           phi = numeric(total.entries),
                           false.pos = numeric(total.entries),
                           true.neg = numeric(total.entries),
                           total.neg = numeric(total.entries))
  pval.results <- numeric(total.pval.entries)
} else {
  bb.results <- data.frame(numi = numeric(total.entries),
                           ncell = numeric(total.entries),
                           phi = numeric(total.entries),
                           true.pos = numeric(total.entries),
                           false.neg = numeric(total.entries),
                           total.pos = numeric(total.entries))
}

idx.bb <- 1
idx.pval <- 1
for (ncell in ncells) {
  for (numi in numis) {
    for (phi in phis) {
      mat1 <- matrix(nrow = niter, ncol = nrow(unique_coords))
      mat2 <- matrix(nrow = niter, ncol = nrow(unique_coords))
      rownames(mat1) <- rownames(mat2) <- paste0('fakegene', seq(1:niter))
      colnames(mat1) <- colnames(mat2) <- unique_coords$bead
      for (i in 1:niter) {
        # sample ncells
        sample.idx <- sample(seq(1,nrow(X.basis)), ncell, replace=F)
        if (params$nullsim) {
          p <- runif(ncell, 0.0001, 0.9999)
        } else {
          X.samp <- X.basis[sample.idx,]
          # draw random normally distributed coefficients
          b.coef <- rnorm(df, 0, 1)
          logit.p <- X.samp%*%b.coef
          p <- expit(logit.p)
        }
        # convert to beta distribution parameters
        ab.list <- pphi_to_ab(p, phi)
        a <- ab.list$a
        b <- ab.list$b
        # draw lambda from the beta distribution
        lambda.p <- rbeta(length(a), a, b)
        # draw binomial for each spot
        y <- rbinom(length(lambda.p), size=numi, prob=lambda.p)
        y2 <- numi-y
        mat1[i,sample.idx] <- y
        mat2[i,sample.idx] <- y2
      }
      bb.result <- spase(mat1, mat2, covariates = unique_coords[,1:3], method='betabinomial',
                         min.pixels = 10, min.umi = 10, cores=1)
      bb.results$numi[idx.bb] <- numi
      bb.results$ncell[idx.bb] <- ncell
      bb.results$phi[idx.bb] <- phi
      not.na.idx <- which(!is.na(bb.result$result$chisq.p))
      if (params$nullsim) {
        bb.results$false.pos[idx.bb] <- sum(bb.result$result$chisq.p<=pval.thresh, na.rm=T)
        bb.results$true.neg[idx.bb] <- sum(bb.result$result$chisq.p>pval.thresh, na.rm=T)
        bb.results$total.neg[idx.bb] <- length(not.na.idx)
        pval.results[idx.pval:(idx.pval+length(not.na.idx)-1)] <- bb.result$result$chisq.p[not.na.idx]
        idx.pval <- idx.pval+length(not.na.idx)
      } else {
        bb.results$true.pos[idx.bb] <- sum(bb.result$result$chisq.p<=pval.thresh, na.rm=T)
        bb.results$false.neg[idx.bb] <- sum(bb.result$result$chisq.p>pval.thresh, na.rm=T)
        bb.results$total.pos[idx.bb] <- length(which(!is.na(bb.result$result$chisq.p)))
      }
      idx.bb <- idx.bb + 1
    }
  }
}
if (params$nullsim) {
  save(bb.results, pval.results, file=paste0('simulations-spatial-gradient-null-params-',params$ncells, '-', params$seed, '.RData'))
} else {
  save(bb.results, file=paste0('simulations-spatial-gradient-params-',params$ncells, '-', params$seed, '.RData'))
}