In lulizou/spASE: Detecting Allele-Specific Expression in Spatial Transcriptomics

Code for reproducing Figures 2-4.

library(dplyr)
library(tidyr)
library(reshape2)
library(ggplot2)
library(spASE)
library(viridis)
library(Matrix)
library(latex2exp)
library(RColorBrewer)
library(xtable)
library(RCurl)
library(scales)
set.seed(1337)

expit <- function(x) {
  return(exp(x)/(1+exp(x)))
}

# xchr annotations for filtering out xchr genes in single-cell
xchr <- read.delim('../inst/extdata/MGImarkerQuery_20190611_114216.txt')$Symbol

# Slide-seq data
# puck 4 needs to be rotated and shifted
load('../inst/extdata/pucks.3-4.spatial.RData')
puck3.spatial <- puck3.spatial %>%
  mutate(total = CAST+X129) %>%
  mutate(X = xcoord/1000, Y = ycoord/1000)
puck4.spatial <- puck4.spatial %>%
  mutate(total = CAST+X129) %>%
  mutate(X = xcoord/1000, Y = ycoord/1000)


angle <- 261*pi/180
x.shift <- 0.37
y.shift <- -0.16
x.center <- 3.25
y.center <- 3.025
# rotate and shift puck 4
new.X <- (puck4.spatial$X-x.center)*cos(angle) - (puck4.spatial$Y-y.center)*sin(angle)+x.center + x.shift
new.Y <- (puck4.spatial$X-x.center)*sin(angle) + (puck4.spatial$Y-y.center)*cos(angle)+y.center + y.shift
puck4.spatial$X <- new.X
puck4.spatial$Y <- new.Y

# load in RCTD results
# use lower threshold for calling a singlet
# doublet_uncertain and doublet_certains will be reclassified as singlets
# if they meet threshold
SINGLET_THRESH <- 100
rctd.results.puck3 <- readRDS('../inst/extdata/rctd_puck_3.rds')
test <- rctd.results.puck3$results_df
test %>%
  mutate(diff = singlet_score-min_score) %>%
  filter(spot_class != 'reject') %>%
  ggplot(aes(x = diff)) +
  geom_histogram(aes(fill = spot_class), color='white') +
  theme_bw() +
  geom_vline(xintercept=100, lty='dashed') +
  xlab('singlet score - min score') +
  ylab('num pixels') +
  ggtitle('singlet threshold - default of 25, changing to 100')

coords <- puck3.spatial %>%
  dplyr::select(X, Y, bead, total) %>%
  group_by(X, Y, bead) %>%
  summarise(nUMI = sum(total))
puck3.rctd <- rctd.results.puck3$results_df %>%
  mutate(bead = rownames(rctd.results.puck3$results_df)) %>%
  left_join(coords) %>%
  mutate(spot_class = ifelse((spot_class != 'reject') & (singlet_score-min_score<=SINGLET_THRESH), 'singlet', 'reject or doublet'))
rctd.results.puck4 <- readRDS('../inst/extdata/rctd_puck_4.rds')
coords <- puck4.spatial %>%
  dplyr::select(X, Y, bead, total) %>%
  group_by(X, Y, bead) %>%
  summarise(nUMI = sum(total))
puck4.rctd <- rctd.results.puck4$results_df %>%
  mutate(bead = rownames(rctd.results.puck4$results_df)) %>%
  left_join(coords)  %>%
  mutate(spot_class = ifelse((spot_class != 'reject') & (singlet_score-min_score<=SINGLET_THRESH), 'singlet', 'reject or doublet'))

# combine RCTD annotations and both pucks
puck3.spatial <- left_join(puck3.spatial, puck3.rctd %>% dplyr::select(bead, first_type, spot_class), by = 'bead')
puck4.spatial <- left_join(puck4.spatial, puck4.rctd %>% dplyr::select(bead, first_type, spot_class), by = 'bead')

joint <- bind_rows(puck3.spatial, puck4.spatial)
joint <- joint %>%
  filter(!grepl('mt-',gene)) %>% 
  filter(bead!='TTTTTTTTTTTTTT') %>%
  filter(bead!='GGGGGGGGGGGGGG') %>%
  filter(bead!='TTTGGAATTTAACT') %>% # duplicated bead
  mutate(gene = as.factor(as.character(gene)), bead = as.factor(as.character(bead))) %>%
  group_by(bead, gene, xcoord, ycoord, X, Y, first_type, spot_class) %>%
  summarise(CAST = sum(CAST), X129 = sum(X129), total = sum(total)) %>% # since the pucks have the same bead barcodes which have the same coordinates
  ungroup()

# get locations at a grid of points
xcoords <- seq(min(joint$X), max(joint$X), by = 0.1)
ycoords <- seq(min(joint$Y), max(joint$Y), by = 0.1)
predictCoords <- expand.grid(xcoords, ycoords)
colnames(predictCoords) <- c('X', 'Y')
predictCoords <- data.frame(predictCoords) %>%
  filter(((X-3.2)^2+(Y-3.1)^2<=2.3^2) | ((X-x.center-x.shift)^2+(Y-y.center-y.shift)^2<=2.3^2))

singlets <- joint %>% 
  filter(spot_class == 'singlet') %>%
  mutate(gene = as.factor(as.character(gene)), bead = as.factor(as.character(bead)))

# only use the following abundant singlet cell types:
# Astrocyte, CA1, CA3, Dentate, Interneuron, Microglia/Macrophages, Oligo, Endothelial tip
# (the RCTD reference also includes more, but for our purposes we are only
# interested in these)
# RCTD reference also misspelled Dentate as Denate

singlets <- singlets %>%
  filter(first_type %in% c('Astrocyte','CA1','CA3','Denate','Interneuron',
                           'Microglia_Macrophages', 'Oligodendrocyte', 'Endothelial_Tip')) %>%
  mutate(first_type = factor(first_type))

remove.beads <- readRDS('../inst/extdata/CA1-CA3-Dentate-remove-these-beads.rds')
singlets <- singlets %>%
  filter(!(bead %in% remove.beads))

# manually refine CA1, CA3, and Dentate labels
# i ran this in the console to get rid of some misc calls for CA1, CA3, and Dentate
# then i saved the beads that were truly CA1.
library(gatepoints)
ca1 <- singlets %>% filter(first_type=='CA1') %>%
  dplyr::select(bead,X,Y) %>%
  distinct()
test <- as.matrix(ca1[,c(2,3)])
X11()
plot(test)
selectedPoints <- fhs(test, mark=T)
remove.ca1.beads <- ca1$bead[-as.numeric(selectedPoints)]
ca3 <- singlets %>% filter(first_type=='CA3') %>%
  dplyr::select(bead,X,Y) %>%
  distinct()
test <- as.matrix(ca3[,c(2,3)])
X11()
plot(test)
selectedPoints <- fhs(test, mark=T)
remove.ca3.beads <- ca3$bead[-as.numeric(selectedPoints)]
dentate <- singlets %>% filter(first_type=='Denate') %>%
  dplyr::select(bead,X,Y) %>%
  distinct()
test <- as.matrix(dentate[,c(2,3)])
X11()
plot(test)
selectedPoints <- fhs(test, mark=T)
remove.dentate.beads <- dentate$bead[-as.numeric(selectedPoints)]
remove.beads <- c(as.character(remove.ca1.beads), as.character(remove.ca3.beads), as.character(remove.dentate.beads))
saveRDS(remove.beads, file='../inst/extdata/CA1-CA3-Dentate-remove-these-beads.rds')

# plot a map of cell types called from RCTD
# make a palette out of Set1 and 2
mypal <- data.frame(first_type=unique(singlets$first_type) %>% sort(),fill=c("#4DAF4A","#FF7F00","#377EB8","#984EA3","#E41A1C","#FFD700","#F781BF","#A65628")) %>%
  arrange(first_type)
mypal$fill <- factor(mypal$fill, levels=unique(mypal$fill))
singlets %>%
  dplyr::select(X,Y,first_type) %>%
  distinct() %>%
  left_join(mypal, by='first_type') %>%
  # arrange(first_type) %>%
  # mutate(first_type = factor(first_type, levels=unique(mypal$first_type))) %>%
  ggplot(aes(x = X, y= Y)) +
  geom_point(aes(color=fill),size=0.2) +
  scale_color_identity(guide='legend', name='', labels=(mypal%>%arrange(fill)%>%
                                                          mutate(first_type = gsub('_',' ',first_type)) %>%
                                                          mutate(first_type = gsub('Denate','Dentate',first_type))%>%mutate(first_type = gsub('Microglia Macrophages','Microglia/Macrophages',first_type))%>%mutate(gsub('Entorihinal','Entorhinal',first_type))%>%pull(first_type))) +
  theme_classic() +
  guides(colour = guide_legend(ncol=1,override.aes = list(size=3))) +
  #theme(legend.position='none') +
  xlab('x1') +
  ylab('x2')
ggsave('figure2a.png', width=4,height=4,units='in')

# singlets, with and without using cell type
mcast <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead))))
covariates <- singlets%>% dplyr::select(bead, X, Y, first_type) %>% distinct() 
res10_singlets_nocelltype<-spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=1, df=10, min.umi=100)
res10_singlets_celltype<-spase(mcast, m129, covariates, cores=2, df=10, min.umi=100)

singlets %>%
  group_by(gene) %>%
  summarise(totalSpots = n()) %>%
  ggplot(aes(x = log2(totalSpots))) +
  geom_histogram(bins=40) +
  geom_vline(xintercept=log2(100), lty='dashed') +
  theme_classic() +
  scale_y_continuous(expand=c(0,0)) +
  xlab('log2(total pixels)') +
  ylab('Number of genes')
ggsave('fig3-totalpixels-singlets.png', height=2, width=2)

# summary of number of pixels for each gene
summary(res10_singlets_nocelltype$result$totalSpots)
# summary of number of UMI per pixel for each gene
summary(res10_singlets_nocelltype$result$totalUMI/res10_singlets_nocelltype$result$totalSpots)
# summary of baseline overdispersion for each gene
summary(res10_singlets_nocelltype$result$phi.baseline)
# summary of spatial model overdispersion for each gene
summary(res10_singlets_nocelltype$result$phi.full)

# summary of number of pixels for each gene
summary(res10_singlets_celltype$result$totalSpots)
# summary of number of UMI per pixel for each gene
summary(res10_singlets_celltype$result$totalUMI/res10_singlets_nocelltype$result$totalSpots)
# summary of baseline overdispersion for each gene
summary(res10_singlets_celltype$result$phi.baseline)
# summary of spatial model overdispersion for each gene
summary(res10_singlets_celltype$result$phi.full)

res10_singlets_nocelltype$result$xchr <- ifelse(res10_singlets_nocelltype$result$gene %in% xchr, 'xchr', 'autosome')
res10_singlets_nocelltype$result %>%
  ggplot(aes(x = phi.baseline, y = phi.full)) +
  geom_point(aes(color=log2(totalSpots)),alpha=1, size=0.05) +
  scale_color_viridis(name = 'log2(total pixels)') +
  geom_abline(intercept=0, slope=1, lty='dashed', color='red',size=0.5) +
  geom_rug(alpha=0.01) +
  theme_classic() +
  theme(legend.position = 'none') +
  xlab(TeX(r'(Estimated $\phi$ null)')) +
  ylab(TeX(r'(Estimated $\phi$ spatial)'))
ggsave('overdispersions-singlets.png', height=2, width=2)

res10_singlets_nocelltype$result$xchr <- ifelse(res10_singlets_nocelltype$result$gene %in% xchr, 'X - no cell type', 'A - no cell type')
res10_singlets_celltype$result$xchr <- ifelse(res10_singlets_celltype$result$gene %in% xchr, 'X - cell type', 'A - cell type')
dd <- res10_singlets_nocelltype$result %>%
  bind_rows(res10_singlets_celltype$result) %>%
  mutate(xchr = factor(xchr, levels=c('A - no cell type', 'A - cell type',
                                      'X - no cell type', 'X - cell type'))) 
dd %>%
  filter(xchr=='A - no cell type') %>%
  pull(chisq.p) %>%
  summary()
dd %>%
  filter(xchr=='A - cell type') %>%
  pull(chisq.p) %>%
  summary()
dd %>%
  filter(xchr=='X - no cell type') %>%
  pull(chisq.p) %>%
  summary()
dd %>%
  filter(xchr=='X - cell type') %>%
  pull(chisq.p) %>%
  summary()

dd %>%
  mutate(p.value.bin = cut(chisq.p, breaks=seq(0,1,0.25))) %>%
  group_by(xchr, p.value.bin) %>%
  summarise(num.genes = n()) %>%
  filter(!is.na(p.value.bin)) %>%
  group_by(xchr) %>%
  mutate(total = sum(num.genes)) %>%
  mutate(prop = num.genes/total) %>%
  ggplot(aes(x = p.value.bin, y = prop)) +
  geom_bar(stat='identity') +
  theme_classic() +
  scale_y_continuous(expand=c(0,0)) +
  facet_wrap(xchr ~ ., nrow=1) +
  xlab('p-value') +
  ylab('Fraction of genes') +
  theme_bw() +
  theme(axis.text.x = element_text(angle=45, hjust=1))
ggsave('fig2b-pvals-singlets.png', height=3, width=6)

ellipseX <- 3
ellipseY <- 3.2
ellipseWidth <- 1.9
ellipseHeight <- 1.1
ellipseAngle <- pi/2.3
predictCoordsEllipse <- predictCoords %>%
  filter(((X-ellipseX)*cos(ellipseAngle)+(Y-ellipseY)*sin(ellipseAngle))^2/ellipseWidth^2 + ((X-ellipseX)*sin(ellipseAngle)+(Y-ellipseY)*cos(ellipseAngle))^2/ellipseHeight^2 <= 1)

#plotSpase(mcast, m129, covariates, res10_singlets_nocelltype, coords = predictCoords, size.scale = F, point.size=1, point.outline=T, crosshairs=T, cross.x1 = 3, cross.x2 = 3, genes=c('Hpca'), save='hpca')
plotSpase(mcast, m129, covariates, res10_singlets_nocelltype, coords = predictCoords, size.scale = F, point.size=1, point.outline=T, crosshairs=T, cross.x1 = 3, cross.x2 = 3, genes=c('Hpca'))

result.nocelltype <- res10_singlets_nocelltype$result %>%
  filter(qval <= 0.01) %>%
  arrange(qval)
result.nocelltype$xchr <- ifelse(result.nocelltype$gene %in% xchr, T, F)
# total number of significant genes with min pixels 400:
nrow(result.nocelltype)
# total number of x chromosome genes 
sum(result.nocelltype$xchr)
xtable(result.nocelltype %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's')) #supp table 2

result.celltype <- res10_singlets_celltype$result %>%
  filter(qval <= 0.01) %>%
  arrange(qval)
result.celltype$xchr <- ifelse(result.celltype$gene %in% xchr, T, F)
# total number of significant genes with min pixels 400:
nrow(result.celltype)
# total number of x chromosome genes 
sum(result.celltype$xchr)
xtable(result.celltype %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's')) #supp table 2

# repeat above two blocks for k=5, 15, 20 to see if reproducible
res5.nocelltype <- spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=2, df=5, min.umi=100)
res10.nocelltype <- spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=2, df=15, min.umi=100)
res20.nocelltype <- spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=2, df=20, min.umi=100)

result.nocelltype.5 <- res5.nocelltype$result %>%
  filter(qval <= 0.01) %>%
  arrange(qval)
result.nocelltype.5$xchr <- ifelse(result.nocelltype.5$gene %in% xchr, T, F)
# total number of significant genes with min total UMI 1000, min pixels 100:
nrow(result.nocelltype.5)
# total number of x chromosome genes 
sum(result.nocelltype.5$xchr)
xtable(result.nocelltype.5 %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's'))

result.nocelltype.10 <- res10.nocelltype$result %>%
  filter(qval <= 0.01) %>%
  arrange(qval)
result.nocelltype.10$xchr <- ifelse(result.nocelltype.10$gene %in% xchr, T, F)
# total number of significant genes with min total UMI 1000, min pixels 100:
nrow(result.nocelltype.10)
# total number of x chromosome genes 
sum(result.nocelltype.10$xchr)
xtable(result.nocelltype.10 %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's'))

result.nocelltype.20 <- res20.nocelltype$result %>%
  filter(qval <= 0.01) %>%
  arrange(qval)
result.nocelltype.20$xchr <- ifelse(result.nocelltype.20$gene %in% xchr, T, F)
# total number of significant genes with min total UMI 1000, min pixels 100:
nrow(result.nocelltype.20)
# total number of x chromosome genes 
sum(result.nocelltype.20$xchr)
xtable(result.nocelltype.20 %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's'))

# use all pixels, spatial coordinates, to increase resolution for estimated p
mcast <- with(joint, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead))))
m129 <- with(joint, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead))))
covariates <- joint%>% dplyr::select(bead, X, Y) %>% distinct() 
# filter out beads with more than two locations
dupebeads <- covariates %>% group_by(bead) %>% summarise(n=n()) %>% filter(n>1) %>% pull(bead)
covariates <- covariates %>% filter(!(bead %in% dupebeads))
res15.nocelltype <- spase(mcast, m129, covariates, cores=2, df=15, min.umi=400)

#plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Tspan7', 'Plp1', 'Xist', 'Tceal3'), crosshairs=F, save='fig3-')
result.nocelltype <- res15.nocelltype$result
result.nocelltype$xchr <- ifelse(result.nocelltype$gene %in% xchr, T, F)
plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Tspan7', 'Plp1', 'Xist', 'Tceal3'), crosshairs=F)

#plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Ptgds', 'Nrip3', 'Sst'), save='fig4-autosome')
plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Ptgds', 'Nrip3', 'Sst'))

#plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=result.nocelltype %>% filter(xchr) %>% pull(gene), save='SuppFig-xchrom')
plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=result.nocelltype %>% filter(xchr) %>% pull(gene))

#plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=res15.nocelltype$result %>% filter(xchr!='autosome', !is.na(phi.baseline)) %>% pull(gene), save='all-xchrom',cross.x1=3, cross.x2=3)
plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=(res15.nocelltype$result %>% dplyr::filter(xchr!='autosome', !is.na(phi.baseline)) %>% pull(gene)),cross.x1=3, cross.x2=3)

# merge the xchr genes except for xist 
xchr.idx <- which(rownames(mcast) %in% xchr)
xchr.idx <- xchr.idx[-which(xchr.idx == (which(rownames(mcast)=='Xist')))]
mcast.xchr <- colSums(mcast[xchr.idx,])
m129.xchr <- colSums(m129[xchr.idx,])
mcast.xchr <- t(as.matrix(mcast.xchr))
m129.xchr <- t(as.matrix(m129.xchr))
rownames(mcast.xchr) <- 'merged xchr'
rownames(m129.xchr) <- 'merged xchr'
res.xchr <- spase(mcast.xchr, m129.xchr, covariates, cores=1, df=15)

#plotSpase(mcast.xchr, m129.xchr, covariates, res.xchr, coords=predictCoords, cross.x1=3, cross.x2=3, save='figure3xchrmerged')
plotSpase(mcast.xchr, m129.xchr, covariates, res.xchr, coords=predictCoords, cross.x1=3, cross.x2=3)

#plotSpase(mcast, m129, covariates, res15.nocelltype, genes=c('Xist', 'Tspan7', 'Plp1', 'Tceal6'), coords=predictCoords,cross.x1=3, cross.x2=3, save='figure3xchr')
plotSpase(mcast, m129, covariates, res15.nocelltype, genes=c('Xist', 'Tspan7', 'Plp1', 'Tceal3'), coords=predictCoords,cross.x1=3, cross.x2=3)

mcast <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead))))
covariates <- singlets%>% dplyr::select(bead, first_type) %>% distinct() 
ss <- scase(mcast, m129, min.cells=300, cores=2)

# test for gradient within cell type for gene/cell type combos with high
# enough cells and total UMI
# THIS LOOP TAKES A WHILE WHICH IS WHY I SAVED THE RESULTS
# singlets.celltype <- singlets %>%
#   group_by(gene, first_type) %>%
#   summarise(nbeads=n(), numis = sum(CAST+X129)) %>%
#   arrange(desc(nbeads, numis)) %>%
#   filter(nbeads > 100, numis > 100)
# singlets.celltype$chisq.p <- NA
# for (i in 1:nrow(singlets.celltype)) {
#   print(i)
#   singlets.subset <- singlets %>%
#     filter(gene==singlets.celltype$gene[i], first_type==singlets.celltype$first_type[i]) %>%
#     mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead))
#   mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead))))
#   m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead))))
#   covariates <- singlets.subset%>% dplyr::select(bead, X, Y) %>% distinct()
#   res.subset <- spase(mcast, m129, covariates, cores=1, df=5, min.umi = 100)
#   singlets.celltype$chisq.p[i] <- res.subset$result$chisq.p
# }
# saveRDS(singlets.celltype, file='../inst/extdata/ASE_paper_singlets_celltype_results.rds')
singlets.celltype <- readRDS('../inst/extdata/ASE_paper_singlets_celltype_results.rds')
singlets.celltype$qval <- p.adjust(singlets.celltype$chisq.p, method='BH')
singlets.celltype <- singlets.celltype %>% arrange(qval) 
colnames(singlets.celltype) <- c('Gene', 'Cell type', 'Beads', 'UMIs', 'pval', 'qval')
xtable(singlets.celltype %>% filter(qval<0.01),display=c('d','s', 's', 'd', 'd', 'e', 'e'))

# investigate/plot cell type specific gradients
df<-5
singlets.subset <- singlets %>%
  filter(gene=='Tspan7', first_type=='Astrocyte') %>%
  mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead)) 
mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead))))
covariates <- singlets.subset%>% dplyr::select(bead, X, Y) %>% distinct()
res.subset <- spase(mcast, m129, covariates, cores=1, df=df, min.umi = 100)
#plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F, save=paste0('tspan7-Astrocyte','-df-',df))
plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F)

# investigate/plot cell type specific gradients
df<-15
singlets.subset <- singlets %>%
  filter(gene=='Plp1', first_type=='Oligodendrocyte') %>%
  mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead)) 
mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead))))
covariates <- singlets.subset%>% dplyr::select(bead, X, Y) %>% distinct()
res.subset <- spase(mcast, m129, covariates, cores=1, df=df, min.umi = 100)
#plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F, save=paste0('plp1-oligodendrocyte-df-',df))
plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F)

# linearize CA1, CA3, Dentate
CA1.linear <- singlets %>% filter(first_type=='CA1') %>%
  dplyr::select(bead, X, Y) %>%
  distinct()
plot(CA1.linear$X, CA1.linear$Y)
# find the start and end using min and max
#v = c(5000 - 2000,2000-4000)
v = c(max(CA1.linear$X)-min(CA1.linear$X),min(CA1.linear$Y)-max(CA1.linear$Y))
CA1.linear$Z <- CA1.linear$X*v[1] + CA1.linear$Y*v[2]
CA1.linear <- CA1.linear %>%
  arrange(Z)
CA1.linear$Z.scaled <- rescale(CA1.linear$Z, to=c(0,5))
# 
df<-3
singlets.subset <- singlets %>%
  filter(gene=='Syt11', first_type=='CA1') %>%
  mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead)) 
mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead))))
covariates <- CA1.linear%>% dplyr::select(bead, Z.scaled) %>% distinct()
colnames(covariates)[2] <- 'x1'
res.subset <- spase(mcast, m129, covariates, cores=1, df=3, min.umi = 100)
sm <- smoothCon(s(x1,k=df,fx=T,bs='tp'),data=covariates)[[1]]
smooth.model <- res.subset$fits[[1]]
smooth.coef <- coef(smooth.model)
Xp <- PredictMat(sm, covariates)
covariates$fit <- Xp%*%smooth.coef
left_join(singlets.subset, covariates, by=c('bead')) %>%
  ggplot(aes(x = X, y = Y)) +
  geom_point(aes(color=expit(fit)))

# redo the CA1 but with linearized
singlets.CA1 <- singlets %>%
  group_by(gene, first_type) %>%
  summarise(nbeads=n(), numis = sum(CAST+X129)) %>%
  arrange(desc(nbeads, numis)) %>%
  filter(nbeads > 100, numis > 100) %>%
  filter(first_type=='CA1')
singlets.CA1$chisq.p <- NA
for (i in 1:nrow(singlets.CA1)) {
  print(i)
  singlets.subset <- singlets %>%
    filter(gene==singlets.CA1$gene[i], first_type==singlets.CA1$first_type[i]) %>%
    mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead))
  mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead))))
  m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead))))
  covariates <- CA1.linear%>% dplyr::select(bead, Z.scaled) %>% distinct()
  colnames(covariates)[2] <- 'x1'
  res.subset <- spase(mcast, m129, covariates, cores=1, df=3, min.umi = 100)
  singlets.CA1$chisq.p[i] <- res.subset$result$chisq.p
}

# get individual CI results for each cell type using scase
# which ones are significant at qval<0.01
signif0.01 <- res10.nocelltype$result %>% arrange(qval) %>% filter(qval<0.01)
signif0.01$xchr <- ifelse(signif0.01$gene%in%xchr, T, F)
mcast <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead))))
covariates <- singlets%>% dplyr::select(bead, X, Y, first_type) %>% distinct() 
sccov <- covariates[,c(1,4)]
sccov$first_type <- as.factor(sccov$first_type)
res_celltype_sc <- scase(mcast, m129, sccov, cores=2, genes = signif0.01$gene)

# for each gene, plot the estimated p, CI, and total UMI
res_celltype_sc <- res_celltype_sc %>%
  mutate(p.low = expit(logit.p-2*logit.p.sd),
         p.high = expit(logit.p+2*logit.p.sd),
         p = expit(logit.p)) %>%
  left_join(mypal, by =c('lvl'='first_type'))

res_celltype_sc %>%
  ggplot(aes(y = totalUMI)) +
  geom_point(aes(x = p, color=fill)) +
  geom_errorbarh(aes(xmin = p.low, xmax=p.high, color=fill)) +
  scale_color_identity() +
  geom_vline(xintercept=0.5, lty='dashed', color='grey') +
  facet_wrap(gene ~ ., scales='free') +
  scale_x_continuous(limits=c(0,1),breaks=c(0,0.25,0.50,0.75,1)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle=45, hjust=1)) +
  xlab('Estimated p maternal') +
  ylab('Total UMI')
#ggsave('figure4-celltype-cis.png', height=8, width=7)

res_celltype_sc %>%
  filter(gene %in% c('Nrip3','Ptgds','Sst')) %>%
  ggplot(aes(y = totalUMI)) +
  geom_point(aes(x = p, color=fill),size=3) +
  geom_errorbarh(aes(xmin = p.low, xmax=p.high, color=fill)) +
  scale_color_identity() +
  geom_vline(xintercept=0.5, lty='dashed', color='grey') +
  facet_wrap(gene ~ ., nrow=1) +
  scale_x_continuous(limits=c(0,1),breaks=c(0,0.25,0.50,0.75,1)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle=45, hjust=1))+
  xlab('Estimated p maternal') +
  ylab('Total UMI')
#ggsave('figure4-celltype-cis-top.png', height=3, width=5)

# compare hippocampal formation to everything else
HPF <- c('CA1', 'CA3', 'Dentate')
hpfcov <- sccov
hpfcov <- hpfcov %>% mutate(first_type = ifelse(first_type %in% HPF,
                                                'HPF', 'other'))
hpfcov$first_type <- as.factor(hpfcov$first_type)
res_celltype_hpf <- scase(mcast, m129, hpfcov, cores=2, genes = signif0.01$gene[!signif0.01$xchr])

res_celltype_hpf %>%
  mutate(p.low = expit(logit.p-2*logit.p.sd),
         p.high = expit(logit.p+2*logit.p.sd),
         p = expit(logit.p)) %>%
  ggplot(aes(y = totalUMI)) +
  geom_point(aes(x = p, color=lvl)) +
  geom_errorbarh(aes(xmin = p.low, xmax=p.high, color=lvl)) +
  geom_vline(xintercept=0.5, lty='dashed', color='grey') +
  facet_wrap(gene ~ ., scales='free') +
  scale_x_continuous(limits=c(0,1),breaks=c(0,0.25,0.50,0.75,1)) +
  theme_bw()

# plotting the non-xchr cell type points with CIs
res_celltype_sc %>%
  ggplot(aes(y = gene)) +
  geom_errorbarh(aes(xmin = p.low, xmax = p.high, color = fill),
                 position=position_dodge(width=0.9)) +
  geom_point(aes(x=p, color=fill), position=position_dodge(width=0.9), size=0.5) +
  geom_vline(xintercept=0.5, lty='dashed') +
  geom_hline(yintercept=1.5) +
  geom_hline(yintercept=2.5) +
  geom_hline(yintercept=3.5) +
  geom_hline(yintercept=4.5) +
  geom_hline(yintercept=5.5) +
  geom_hline(yintercept=6.5) +
  scale_color_identity() +
  scale_x_continuous(limits=c(0,1),breaks=c(0,0.25,0.50,0.75,1)) +
  theme_classic() +
  theme(axis.text.x = element_text(angle=90, hjust=1)) +
  ylab('')
ggsave(filename='figure4b.png', height=4, width=1.5, units='in')

# plot only oligodendrocytes for Ptgds
both.idx <- which(covariates$first_type%in%c('Oligodendrocyte','Endothelial_Tip'))
#both.idx <- c(both.idx, grep('Endothelial', covariates$first_type))
both.beads <- covariates$bead[both.idx]
y <- mcast['Ptgds',both.beads]
total <- y+m129['Ptgds',both.beads]
idx <- which(total>0)
y<-y[idx]; total<-total[idx]; covari<-covariates[both.idx,]
covari <- covari[idx,]
covari$shape <- ifelse(covari$first_type=='Oligodendrocyte',21,24)
ggplot(cbind(data.frame(y=y, total=total), covari),
       aes(x=X,y=Y)) +
  geom_point(aes(fill=y/total,size=total,shape=shape),  color='black') +
  scale_fill_gradient2(name='CAST/total', low='blue', mid='white', high='red',
                       midpoint=0.5, limits=c(0,1)) +
  scale_shape_identity(labels=c('Oligo', 'Endo')) +
  scale_size_continuous(limits=c(1,43), breaks=c(1,10,20,30,40)) +
  ylim(c(0.5,5.5)) +
  xlim(c(1,6)) +
  xlab('x1') +
  ylab('x2') +
  theme_classic() 
#ggsave('bottom legend.png', height=3,width=7)

# plot astrocytes for Nrip3
inter.idx <- which(covariates$first_type%in%c('CA1', 'CA3', 'Denate', 'Interneuron','Astrocyte'))
inter.beads <- covariates$bead[inter.idx]
y <- mcast['Nrip3',inter.beads]
total <- y+m129['Nrip3',inter.beads]
covari <- covariates[inter.idx,]
idx <- which(total>0)
y<-y[idx]; total<-total[idx]; covari<-covari[idx,]
ggplot(cbind(data.frame(y=y, total=total), covari),
       aes(x=X,y=Y)) +
  geom_point(aes(fill=y/total,size=total), shape=21, color='black') +
  scale_fill_gradient2(name='CAST/total', low='blue', mid='white', high='red',
                       midpoint=0.5, limits=c(0,1)) +
  scale_size_continuous(limits=c(1,43), breaks=c(1,10,20,30,40)) +
  ylim(c(0.5,5.5)) +
  xlim(c(1,6)) +
  xlab('x1') +
  ylab('x2') +
  theme_classic() 
ggsave('figure4nrip-astrocyte.png', height=3,width=4)

# plot interneurons for Sst
inter.idx <- which(grepl('Interneuron', covariates$first_type))
inter.beads <- covariates$bead[inter.idx]
y <- mcast['Sst',inter.beads]
total <- y+m129['Sst',inter.beads]
covari <- covariates[inter.idx,]
idx <- which(total>0)
y<-y[idx]; total<-total[idx]; covari<-covari[idx,]
ggplot(cbind(data.frame(y=y, total=total), covari),
       aes(x=X,y=Y)) +
  geom_point(aes(fill=y/total,size=total), shape=21, color='black') +
  scale_fill_gradient2(name='CAST/total', low='blue', mid='white', high='red',
                       midpoint=0.5, limits=c(0,1)) +
  scale_size_continuous(limits=c(1,43), breaks=c(1,10,20,30,40)) +
  ylim(c(0.5,5.5)) +
  xlim(c(1,6)) +
  xlab('x1') +
  ylab('x2') +
  theme_classic() 
ggsave('figure4sst-inter.png', height=3,width=4)