Code for reproducing Figures 2-4.
library(dplyr) library(tidyr) library(reshape2) library(ggplot2) library(spASE) library(viridis) library(Matrix) library(latex2exp) library(RColorBrewer) library(xtable) library(RCurl) library(scales) set.seed(1337) expit <- function(x) { return(exp(x)/(1+exp(x))) }
# xchr annotations for filtering out xchr genes in single-cell xchr <- read.delim('../inst/extdata/MGImarkerQuery_20190611_114216.txt')$Symbol
# Slide-seq data # puck 4 needs to be rotated and shifted load('../inst/extdata/pucks.3-4.spatial.RData') puck3.spatial <- puck3.spatial %>% mutate(total = CAST+X129) %>% mutate(X = xcoord/1000, Y = ycoord/1000) puck4.spatial <- puck4.spatial %>% mutate(total = CAST+X129) %>% mutate(X = xcoord/1000, Y = ycoord/1000) angle <- 261*pi/180 x.shift <- 0.37 y.shift <- -0.16 x.center <- 3.25 y.center <- 3.025 # rotate and shift puck 4 new.X <- (puck4.spatial$X-x.center)*cos(angle) - (puck4.spatial$Y-y.center)*sin(angle)+x.center + x.shift new.Y <- (puck4.spatial$X-x.center)*sin(angle) + (puck4.spatial$Y-y.center)*cos(angle)+y.center + y.shift puck4.spatial$X <- new.X puck4.spatial$Y <- new.Y # load in RCTD results # use lower threshold for calling a singlet # doublet_uncertain and doublet_certains will be reclassified as singlets # if they meet threshold SINGLET_THRESH <- 100 rctd.results.puck3 <- readRDS('../inst/extdata/rctd_puck_3.rds') test <- rctd.results.puck3$results_df test %>% mutate(diff = singlet_score-min_score) %>% filter(spot_class != 'reject') %>% ggplot(aes(x = diff)) + geom_histogram(aes(fill = spot_class), color='white') + theme_bw() + geom_vline(xintercept=100, lty='dashed') + xlab('singlet score - min score') + ylab('num pixels') + ggtitle('singlet threshold - default of 25, changing to 100') coords <- puck3.spatial %>% dplyr::select(X, Y, bead, total) %>% group_by(X, Y, bead) %>% summarise(nUMI = sum(total)) puck3.rctd <- rctd.results.puck3$results_df %>% mutate(bead = rownames(rctd.results.puck3$results_df)) %>% left_join(coords) %>% mutate(spot_class = ifelse((spot_class != 'reject') & (singlet_score-min_score<=SINGLET_THRESH), 'singlet', 'reject or doublet')) rctd.results.puck4 <- readRDS('../inst/extdata/rctd_puck_4.rds') coords <- puck4.spatial %>% dplyr::select(X, Y, bead, total) %>% group_by(X, Y, bead) %>% summarise(nUMI = sum(total)) puck4.rctd <- rctd.results.puck4$results_df %>% mutate(bead = rownames(rctd.results.puck4$results_df)) %>% left_join(coords) %>% mutate(spot_class = ifelse((spot_class != 'reject') & (singlet_score-min_score<=SINGLET_THRESH), 'singlet', 'reject or doublet')) # combine RCTD annotations and both pucks puck3.spatial <- left_join(puck3.spatial, puck3.rctd %>% dplyr::select(bead, first_type, spot_class), by = 'bead') puck4.spatial <- left_join(puck4.spatial, puck4.rctd %>% dplyr::select(bead, first_type, spot_class), by = 'bead') joint <- bind_rows(puck3.spatial, puck4.spatial) joint <- joint %>% filter(!grepl('mt-',gene)) %>% filter(bead!='TTTTTTTTTTTTTT') %>% filter(bead!='GGGGGGGGGGGGGG') %>% filter(bead!='TTTGGAATTTAACT') %>% # duplicated bead mutate(gene = as.factor(as.character(gene)), bead = as.factor(as.character(bead))) %>% group_by(bead, gene, xcoord, ycoord, X, Y, first_type, spot_class) %>% summarise(CAST = sum(CAST), X129 = sum(X129), total = sum(total)) %>% # since the pucks have the same bead barcodes which have the same coordinates ungroup() # get locations at a grid of points xcoords <- seq(min(joint$X), max(joint$X), by = 0.1) ycoords <- seq(min(joint$Y), max(joint$Y), by = 0.1) predictCoords <- expand.grid(xcoords, ycoords) colnames(predictCoords) <- c('X', 'Y') predictCoords <- data.frame(predictCoords) %>% filter(((X-3.2)^2+(Y-3.1)^2<=2.3^2) | ((X-x.center-x.shift)^2+(Y-y.center-y.shift)^2<=2.3^2)) singlets <- joint %>% filter(spot_class == 'singlet') %>% mutate(gene = as.factor(as.character(gene)), bead = as.factor(as.character(bead))) # only use the following abundant singlet cell types: # Astrocyte, CA1, CA3, Dentate, Interneuron, Microglia/Macrophages, Oligo, Endothelial tip # (the RCTD reference also includes more, but for our purposes we are only # interested in these) # RCTD reference also misspelled Dentate as Denate singlets <- singlets %>% filter(first_type %in% c('Astrocyte','CA1','CA3','Denate','Interneuron', 'Microglia_Macrophages', 'Oligodendrocyte', 'Endothelial_Tip')) %>% mutate(first_type = factor(first_type)) remove.beads <- readRDS('../inst/extdata/CA1-CA3-Dentate-remove-these-beads.rds') singlets <- singlets %>% filter(!(bead %in% remove.beads))
# manually refine CA1, CA3, and Dentate labels # i ran this in the console to get rid of some misc calls for CA1, CA3, and Dentate # then i saved the beads that were truly CA1. library(gatepoints) ca1 <- singlets %>% filter(first_type=='CA1') %>% dplyr::select(bead,X,Y) %>% distinct() test <- as.matrix(ca1[,c(2,3)]) X11() plot(test) selectedPoints <- fhs(test, mark=T) remove.ca1.beads <- ca1$bead[-as.numeric(selectedPoints)] ca3 <- singlets %>% filter(first_type=='CA3') %>% dplyr::select(bead,X,Y) %>% distinct() test <- as.matrix(ca3[,c(2,3)]) X11() plot(test) selectedPoints <- fhs(test, mark=T) remove.ca3.beads <- ca3$bead[-as.numeric(selectedPoints)] dentate <- singlets %>% filter(first_type=='Denate') %>% dplyr::select(bead,X,Y) %>% distinct() test <- as.matrix(dentate[,c(2,3)]) X11() plot(test) selectedPoints <- fhs(test, mark=T) remove.dentate.beads <- dentate$bead[-as.numeric(selectedPoints)] remove.beads <- c(as.character(remove.ca1.beads), as.character(remove.ca3.beads), as.character(remove.dentate.beads)) saveRDS(remove.beads, file='../inst/extdata/CA1-CA3-Dentate-remove-these-beads.rds')
# plot a map of cell types called from RCTD # make a palette out of Set1 and 2 mypal <- data.frame(first_type=unique(singlets$first_type) %>% sort(),fill=c("#4DAF4A","#FF7F00","#377EB8","#984EA3","#E41A1C","#FFD700","#F781BF","#A65628")) %>% arrange(first_type) mypal$fill <- factor(mypal$fill, levels=unique(mypal$fill)) singlets %>% dplyr::select(X,Y,first_type) %>% distinct() %>% left_join(mypal, by='first_type') %>% # arrange(first_type) %>% # mutate(first_type = factor(first_type, levels=unique(mypal$first_type))) %>% ggplot(aes(x = X, y= Y)) + geom_point(aes(color=fill),size=0.2) + scale_color_identity(guide='legend', name='', labels=(mypal%>%arrange(fill)%>% mutate(first_type = gsub('_',' ',first_type)) %>% mutate(first_type = gsub('Denate','Dentate',first_type))%>%mutate(first_type = gsub('Microglia Macrophages','Microglia/Macrophages',first_type))%>%mutate(gsub('Entorihinal','Entorhinal',first_type))%>%pull(first_type))) + theme_classic() + guides(colour = guide_legend(ncol=1,override.aes = list(size=3))) + #theme(legend.position='none') + xlab('x1') + ylab('x2') ggsave('figure2a.png', width=4,height=4,units='in')
# singlets, with and without using cell type mcast <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead)))) m129 <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead)))) covariates <- singlets%>% dplyr::select(bead, X, Y, first_type) %>% distinct() res10_singlets_nocelltype<-spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=1, df=10, min.umi=100) res10_singlets_celltype<-spase(mcast, m129, covariates, cores=2, df=10, min.umi=100)
singlets %>% group_by(gene) %>% summarise(totalSpots = n()) %>% ggplot(aes(x = log2(totalSpots))) + geom_histogram(bins=40) + geom_vline(xintercept=log2(100), lty='dashed') + theme_classic() + scale_y_continuous(expand=c(0,0)) + xlab('log2(total pixels)') + ylab('Number of genes') ggsave('fig3-totalpixels-singlets.png', height=2, width=2)
# summary of number of pixels for each gene summary(res10_singlets_nocelltype$result$totalSpots) # summary of number of UMI per pixel for each gene summary(res10_singlets_nocelltype$result$totalUMI/res10_singlets_nocelltype$result$totalSpots) # summary of baseline overdispersion for each gene summary(res10_singlets_nocelltype$result$phi.baseline) # summary of spatial model overdispersion for each gene summary(res10_singlets_nocelltype$result$phi.full)
# summary of number of pixels for each gene summary(res10_singlets_celltype$result$totalSpots) # summary of number of UMI per pixel for each gene summary(res10_singlets_celltype$result$totalUMI/res10_singlets_nocelltype$result$totalSpots) # summary of baseline overdispersion for each gene summary(res10_singlets_celltype$result$phi.baseline) # summary of spatial model overdispersion for each gene summary(res10_singlets_celltype$result$phi.full)
res10_singlets_nocelltype$result$xchr <- ifelse(res10_singlets_nocelltype$result$gene %in% xchr, 'xchr', 'autosome') res10_singlets_nocelltype$result %>% ggplot(aes(x = phi.baseline, y = phi.full)) + geom_point(aes(color=log2(totalSpots)),alpha=1, size=0.05) + scale_color_viridis(name = 'log2(total pixels)') + geom_abline(intercept=0, slope=1, lty='dashed', color='red',size=0.5) + geom_rug(alpha=0.01) + theme_classic() + theme(legend.position = 'none') + xlab(TeX(r'(Estimated $\phi$ null)')) + ylab(TeX(r'(Estimated $\phi$ spatial)')) ggsave('overdispersions-singlets.png', height=2, width=2)
res10_singlets_nocelltype$result$xchr <- ifelse(res10_singlets_nocelltype$result$gene %in% xchr, 'X - no cell type', 'A - no cell type') res10_singlets_celltype$result$xchr <- ifelse(res10_singlets_celltype$result$gene %in% xchr, 'X - cell type', 'A - cell type') dd <- res10_singlets_nocelltype$result %>% bind_rows(res10_singlets_celltype$result) %>% mutate(xchr = factor(xchr, levels=c('A - no cell type', 'A - cell type', 'X - no cell type', 'X - cell type'))) dd %>% filter(xchr=='A - no cell type') %>% pull(chisq.p) %>% summary() dd %>% filter(xchr=='A - cell type') %>% pull(chisq.p) %>% summary() dd %>% filter(xchr=='X - no cell type') %>% pull(chisq.p) %>% summary() dd %>% filter(xchr=='X - cell type') %>% pull(chisq.p) %>% summary()
dd %>% mutate(p.value.bin = cut(chisq.p, breaks=seq(0,1,0.25))) %>% group_by(xchr, p.value.bin) %>% summarise(num.genes = n()) %>% filter(!is.na(p.value.bin)) %>% group_by(xchr) %>% mutate(total = sum(num.genes)) %>% mutate(prop = num.genes/total) %>% ggplot(aes(x = p.value.bin, y = prop)) + geom_bar(stat='identity') + theme_classic() + scale_y_continuous(expand=c(0,0)) + facet_wrap(xchr ~ ., nrow=1) + xlab('p-value') + ylab('Fraction of genes') + theme_bw() + theme(axis.text.x = element_text(angle=45, hjust=1)) ggsave('fig2b-pvals-singlets.png', height=3, width=6)
ellipseX <- 3 ellipseY <- 3.2 ellipseWidth <- 1.9 ellipseHeight <- 1.1 ellipseAngle <- pi/2.3 predictCoordsEllipse <- predictCoords %>% filter(((X-ellipseX)*cos(ellipseAngle)+(Y-ellipseY)*sin(ellipseAngle))^2/ellipseWidth^2 + ((X-ellipseX)*sin(ellipseAngle)+(Y-ellipseY)*cos(ellipseAngle))^2/ellipseHeight^2 <= 1)
#plotSpase(mcast, m129, covariates, res10_singlets_nocelltype, coords = predictCoords, size.scale = F, point.size=1, point.outline=T, crosshairs=T, cross.x1 = 3, cross.x2 = 3, genes=c('Hpca'), save='hpca') plotSpase(mcast, m129, covariates, res10_singlets_nocelltype, coords = predictCoords, size.scale = F, point.size=1, point.outline=T, crosshairs=T, cross.x1 = 3, cross.x2 = 3, genes=c('Hpca'))
result.nocelltype <- res10_singlets_nocelltype$result %>% filter(qval <= 0.01) %>% arrange(qval) result.nocelltype$xchr <- ifelse(result.nocelltype$gene %in% xchr, T, F) # total number of significant genes with min pixels 400: nrow(result.nocelltype) # total number of x chromosome genes sum(result.nocelltype$xchr) xtable(result.nocelltype %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's')) #supp table 2
result.celltype <- res10_singlets_celltype$result %>% filter(qval <= 0.01) %>% arrange(qval) result.celltype$xchr <- ifelse(result.celltype$gene %in% xchr, T, F) # total number of significant genes with min pixels 400: nrow(result.celltype) # total number of x chromosome genes sum(result.celltype$xchr) xtable(result.celltype %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's')) #supp table 2
# repeat above two blocks for k=5, 15, 20 to see if reproducible res5.nocelltype <- spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=2, df=5, min.umi=100) res10.nocelltype <- spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=2, df=15, min.umi=100) res20.nocelltype <- spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=2, df=20, min.umi=100)
result.nocelltype.5 <- res5.nocelltype$result %>% filter(qval <= 0.01) %>% arrange(qval) result.nocelltype.5$xchr <- ifelse(result.nocelltype.5$gene %in% xchr, T, F) # total number of significant genes with min total UMI 1000, min pixels 100: nrow(result.nocelltype.5) # total number of x chromosome genes sum(result.nocelltype.5$xchr) xtable(result.nocelltype.5 %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's'))
result.nocelltype.10 <- res10.nocelltype$result %>% filter(qval <= 0.01) %>% arrange(qval) result.nocelltype.10$xchr <- ifelse(result.nocelltype.10$gene %in% xchr, T, F) # total number of significant genes with min total UMI 1000, min pixels 100: nrow(result.nocelltype.10) # total number of x chromosome genes sum(result.nocelltype.10$xchr) xtable(result.nocelltype.10 %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's'))
result.nocelltype.20 <- res20.nocelltype$result %>% filter(qval <= 0.01) %>% arrange(qval) result.nocelltype.20$xchr <- ifelse(result.nocelltype.20$gene %in% xchr, T, F) # total number of significant genes with min total UMI 1000, min pixels 100: nrow(result.nocelltype.20) # total number of x chromosome genes sum(result.nocelltype.20$xchr) xtable(result.nocelltype.20 %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's'))
# use all pixels, spatial coordinates, to increase resolution for estimated p mcast <- with(joint, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead)))) m129 <- with(joint, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead)))) covariates <- joint%>% dplyr::select(bead, X, Y) %>% distinct() # filter out beads with more than two locations dupebeads <- covariates %>% group_by(bead) %>% summarise(n=n()) %>% filter(n>1) %>% pull(bead) covariates <- covariates %>% filter(!(bead %in% dupebeads)) res15.nocelltype <- spase(mcast, m129, covariates, cores=2, df=15, min.umi=400)
#plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Tspan7', 'Plp1', 'Xist', 'Tceal3'), crosshairs=F, save='fig3-') result.nocelltype <- res15.nocelltype$result result.nocelltype$xchr <- ifelse(result.nocelltype$gene %in% xchr, T, F) plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Tspan7', 'Plp1', 'Xist', 'Tceal3'), crosshairs=F)
#plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Ptgds', 'Nrip3', 'Sst'), save='fig4-autosome') plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Ptgds', 'Nrip3', 'Sst'))
#plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=result.nocelltype %>% filter(xchr) %>% pull(gene), save='SuppFig-xchrom') plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=result.nocelltype %>% filter(xchr) %>% pull(gene))
#plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=res15.nocelltype$result %>% filter(xchr!='autosome', !is.na(phi.baseline)) %>% pull(gene), save='all-xchrom',cross.x1=3, cross.x2=3) plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=(res15.nocelltype$result %>% dplyr::filter(xchr!='autosome', !is.na(phi.baseline)) %>% pull(gene)),cross.x1=3, cross.x2=3)
# merge the xchr genes except for xist xchr.idx <- which(rownames(mcast) %in% xchr) xchr.idx <- xchr.idx[-which(xchr.idx == (which(rownames(mcast)=='Xist')))] mcast.xchr <- colSums(mcast[xchr.idx,]) m129.xchr <- colSums(m129[xchr.idx,]) mcast.xchr <- t(as.matrix(mcast.xchr)) m129.xchr <- t(as.matrix(m129.xchr)) rownames(mcast.xchr) <- 'merged xchr' rownames(m129.xchr) <- 'merged xchr' res.xchr <- spase(mcast.xchr, m129.xchr, covariates, cores=1, df=15)
#plotSpase(mcast.xchr, m129.xchr, covariates, res.xchr, coords=predictCoords, cross.x1=3, cross.x2=3, save='figure3xchrmerged') plotSpase(mcast.xchr, m129.xchr, covariates, res.xchr, coords=predictCoords, cross.x1=3, cross.x2=3)
#plotSpase(mcast, m129, covariates, res15.nocelltype, genes=c('Xist', 'Tspan7', 'Plp1', 'Tceal6'), coords=predictCoords,cross.x1=3, cross.x2=3, save='figure3xchr') plotSpase(mcast, m129, covariates, res15.nocelltype, genes=c('Xist', 'Tspan7', 'Plp1', 'Tceal3'), coords=predictCoords,cross.x1=3, cross.x2=3)
mcast <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead)))) m129 <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead)))) covariates <- singlets%>% dplyr::select(bead, first_type) %>% distinct() ss <- scase(mcast, m129, min.cells=300, cores=2)
# test for gradient within cell type for gene/cell type combos with high # enough cells and total UMI # THIS LOOP TAKES A WHILE WHICH IS WHY I SAVED THE RESULTS # singlets.celltype <- singlets %>% # group_by(gene, first_type) %>% # summarise(nbeads=n(), numis = sum(CAST+X129)) %>% # arrange(desc(nbeads, numis)) %>% # filter(nbeads > 100, numis > 100) # singlets.celltype$chisq.p <- NA # for (i in 1:nrow(singlets.celltype)) { # print(i) # singlets.subset <- singlets %>% # filter(gene==singlets.celltype$gene[i], first_type==singlets.celltype$first_type[i]) %>% # mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead)) # mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead)))) # m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead)))) # covariates <- singlets.subset%>% dplyr::select(bead, X, Y) %>% distinct() # res.subset <- spase(mcast, m129, covariates, cores=1, df=5, min.umi = 100) # singlets.celltype$chisq.p[i] <- res.subset$result$chisq.p # } # saveRDS(singlets.celltype, file='../inst/extdata/ASE_paper_singlets_celltype_results.rds') singlets.celltype <- readRDS('../inst/extdata/ASE_paper_singlets_celltype_results.rds') singlets.celltype$qval <- p.adjust(singlets.celltype$chisq.p, method='BH') singlets.celltype <- singlets.celltype %>% arrange(qval) colnames(singlets.celltype) <- c('Gene', 'Cell type', 'Beads', 'UMIs', 'pval', 'qval') xtable(singlets.celltype %>% filter(qval<0.01),display=c('d','s', 's', 'd', 'd', 'e', 'e'))
# investigate/plot cell type specific gradients df<-5 singlets.subset <- singlets %>% filter(gene=='Tspan7', first_type=='Astrocyte') %>% mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead)) mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead)))) m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead)))) covariates <- singlets.subset%>% dplyr::select(bead, X, Y) %>% distinct() res.subset <- spase(mcast, m129, covariates, cores=1, df=df, min.umi = 100) #plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F, save=paste0('tspan7-Astrocyte','-df-',df)) plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F)
# investigate/plot cell type specific gradients df<-15 singlets.subset <- singlets %>% filter(gene=='Plp1', first_type=='Oligodendrocyte') %>% mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead)) mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead)))) m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead)))) covariates <- singlets.subset%>% dplyr::select(bead, X, Y) %>% distinct() res.subset <- spase(mcast, m129, covariates, cores=1, df=df, min.umi = 100) #plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F, save=paste0('plp1-oligodendrocyte-df-',df)) plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F)
# linearize CA1, CA3, Dentate CA1.linear <- singlets %>% filter(first_type=='CA1') %>% dplyr::select(bead, X, Y) %>% distinct() plot(CA1.linear$X, CA1.linear$Y) # find the start and end using min and max #v = c(5000 - 2000,2000-4000) v = c(max(CA1.linear$X)-min(CA1.linear$X),min(CA1.linear$Y)-max(CA1.linear$Y)) CA1.linear$Z <- CA1.linear$X*v[1] + CA1.linear$Y*v[2] CA1.linear <- CA1.linear %>% arrange(Z) CA1.linear$Z.scaled <- rescale(CA1.linear$Z, to=c(0,5)) # df<-3 singlets.subset <- singlets %>% filter(gene=='Syt11', first_type=='CA1') %>% mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead)) mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead)))) m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead)))) covariates <- CA1.linear%>% dplyr::select(bead, Z.scaled) %>% distinct() colnames(covariates)[2] <- 'x1' res.subset <- spase(mcast, m129, covariates, cores=1, df=3, min.umi = 100) sm <- smoothCon(s(x1,k=df,fx=T,bs='tp'),data=covariates)[[1]] smooth.model <- res.subset$fits[[1]] smooth.coef <- coef(smooth.model) Xp <- PredictMat(sm, covariates) covariates$fit <- Xp%*%smooth.coef left_join(singlets.subset, covariates, by=c('bead')) %>% ggplot(aes(x = X, y = Y)) + geom_point(aes(color=expit(fit)))
# redo the CA1 but with linearized singlets.CA1 <- singlets %>% group_by(gene, first_type) %>% summarise(nbeads=n(), numis = sum(CAST+X129)) %>% arrange(desc(nbeads, numis)) %>% filter(nbeads > 100, numis > 100) %>% filter(first_type=='CA1') singlets.CA1$chisq.p <- NA for (i in 1:nrow(singlets.CA1)) { print(i) singlets.subset <- singlets %>% filter(gene==singlets.CA1$gene[i], first_type==singlets.CA1$first_type[i]) %>% mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead)) mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead)))) m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead)))) covariates <- CA1.linear%>% dplyr::select(bead, Z.scaled) %>% distinct() colnames(covariates)[2] <- 'x1' res.subset <- spase(mcast, m129, covariates, cores=1, df=3, min.umi = 100) singlets.CA1$chisq.p[i] <- res.subset$result$chisq.p }
# get individual CI results for each cell type using scase # which ones are significant at qval<0.01 signif0.01 <- res10.nocelltype$result %>% arrange(qval) %>% filter(qval<0.01) signif0.01$xchr <- ifelse(signif0.01$gene%in%xchr, T, F) mcast <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead)))) m129 <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead)))) covariates <- singlets%>% dplyr::select(bead, X, Y, first_type) %>% distinct() sccov <- covariates[,c(1,4)] sccov$first_type <- as.factor(sccov$first_type) res_celltype_sc <- scase(mcast, m129, sccov, cores=2, genes = signif0.01$gene)
# for each gene, plot the estimated p, CI, and total UMI res_celltype_sc <- res_celltype_sc %>% mutate(p.low = expit(logit.p-2*logit.p.sd), p.high = expit(logit.p+2*logit.p.sd), p = expit(logit.p)) %>% left_join(mypal, by =c('lvl'='first_type'))
res_celltype_sc %>% ggplot(aes(y = totalUMI)) + geom_point(aes(x = p, color=fill)) + geom_errorbarh(aes(xmin = p.low, xmax=p.high, color=fill)) + scale_color_identity() + geom_vline(xintercept=0.5, lty='dashed', color='grey') + facet_wrap(gene ~ ., scales='free') + scale_x_continuous(limits=c(0,1),breaks=c(0,0.25,0.50,0.75,1)) + theme_bw() + theme(axis.text.x = element_text(angle=45, hjust=1)) + xlab('Estimated p maternal') + ylab('Total UMI') #ggsave('figure4-celltype-cis.png', height=8, width=7)
res_celltype_sc %>% filter(gene %in% c('Nrip3','Ptgds','Sst')) %>% ggplot(aes(y = totalUMI)) + geom_point(aes(x = p, color=fill),size=3) + geom_errorbarh(aes(xmin = p.low, xmax=p.high, color=fill)) + scale_color_identity() + geom_vline(xintercept=0.5, lty='dashed', color='grey') + facet_wrap(gene ~ ., nrow=1) + scale_x_continuous(limits=c(0,1),breaks=c(0,0.25,0.50,0.75,1)) + theme_bw() + theme(axis.text.x = element_text(angle=45, hjust=1))+ xlab('Estimated p maternal') + ylab('Total UMI') #ggsave('figure4-celltype-cis-top.png', height=3, width=5)
# compare hippocampal formation to everything else HPF <- c('CA1', 'CA3', 'Dentate') hpfcov <- sccov hpfcov <- hpfcov %>% mutate(first_type = ifelse(first_type %in% HPF, 'HPF', 'other')) hpfcov$first_type <- as.factor(hpfcov$first_type) res_celltype_hpf <- scase(mcast, m129, hpfcov, cores=2, genes = signif0.01$gene[!signif0.01$xchr])
res_celltype_hpf %>% mutate(p.low = expit(logit.p-2*logit.p.sd), p.high = expit(logit.p+2*logit.p.sd), p = expit(logit.p)) %>% ggplot(aes(y = totalUMI)) + geom_point(aes(x = p, color=lvl)) + geom_errorbarh(aes(xmin = p.low, xmax=p.high, color=lvl)) + geom_vline(xintercept=0.5, lty='dashed', color='grey') + facet_wrap(gene ~ ., scales='free') + scale_x_continuous(limits=c(0,1),breaks=c(0,0.25,0.50,0.75,1)) + theme_bw()
# plotting the non-xchr cell type points with CIs res_celltype_sc %>% ggplot(aes(y = gene)) + geom_errorbarh(aes(xmin = p.low, xmax = p.high, color = fill), position=position_dodge(width=0.9)) + geom_point(aes(x=p, color=fill), position=position_dodge(width=0.9), size=0.5) + geom_vline(xintercept=0.5, lty='dashed') + geom_hline(yintercept=1.5) + geom_hline(yintercept=2.5) + geom_hline(yintercept=3.5) + geom_hline(yintercept=4.5) + geom_hline(yintercept=5.5) + geom_hline(yintercept=6.5) + scale_color_identity() + scale_x_continuous(limits=c(0,1),breaks=c(0,0.25,0.50,0.75,1)) + theme_classic() + theme(axis.text.x = element_text(angle=90, hjust=1)) + ylab('') ggsave(filename='figure4b.png', height=4, width=1.5, units='in')
# plot only oligodendrocytes for Ptgds both.idx <- which(covariates$first_type%in%c('Oligodendrocyte','Endothelial_Tip')) #both.idx <- c(both.idx, grep('Endothelial', covariates$first_type)) both.beads <- covariates$bead[both.idx] y <- mcast['Ptgds',both.beads] total <- y+m129['Ptgds',both.beads] idx <- which(total>0) y<-y[idx]; total<-total[idx]; covari<-covariates[both.idx,] covari <- covari[idx,] covari$shape <- ifelse(covari$first_type=='Oligodendrocyte',21,24) ggplot(cbind(data.frame(y=y, total=total), covari), aes(x=X,y=Y)) + geom_point(aes(fill=y/total,size=total,shape=shape), color='black') + scale_fill_gradient2(name='CAST/total', low='blue', mid='white', high='red', midpoint=0.5, limits=c(0,1)) + scale_shape_identity(labels=c('Oligo', 'Endo')) + scale_size_continuous(limits=c(1,43), breaks=c(1,10,20,30,40)) + ylim(c(0.5,5.5)) + xlim(c(1,6)) + xlab('x1') + ylab('x2') + theme_classic() #ggsave('bottom legend.png', height=3,width=7)
# plot astrocytes for Nrip3 inter.idx <- which(covariates$first_type%in%c('CA1', 'CA3', 'Denate', 'Interneuron','Astrocyte')) inter.beads <- covariates$bead[inter.idx] y <- mcast['Nrip3',inter.beads] total <- y+m129['Nrip3',inter.beads] covari <- covariates[inter.idx,] idx <- which(total>0) y<-y[idx]; total<-total[idx]; covari<-covari[idx,] ggplot(cbind(data.frame(y=y, total=total), covari), aes(x=X,y=Y)) + geom_point(aes(fill=y/total,size=total), shape=21, color='black') + scale_fill_gradient2(name='CAST/total', low='blue', mid='white', high='red', midpoint=0.5, limits=c(0,1)) + scale_size_continuous(limits=c(1,43), breaks=c(1,10,20,30,40)) + ylim(c(0.5,5.5)) + xlim(c(1,6)) + xlab('x1') + ylab('x2') + theme_classic() ggsave('figure4nrip-astrocyte.png', height=3,width=4)
# plot interneurons for Sst inter.idx <- which(grepl('Interneuron', covariates$first_type)) inter.beads <- covariates$bead[inter.idx] y <- mcast['Sst',inter.beads] total <- y+m129['Sst',inter.beads] covari <- covariates[inter.idx,] idx <- which(total>0) y<-y[idx]; total<-total[idx]; covari<-covari[idx,] ggplot(cbind(data.frame(y=y, total=total), covari), aes(x=X,y=Y)) + geom_point(aes(fill=y/total,size=total), shape=21, color='black') + scale_fill_gradient2(name='CAST/total', low='blue', mid='white', high='red', midpoint=0.5, limits=c(0,1)) + scale_size_continuous(limits=c(1,43), breaks=c(1,10,20,30,40)) + ylim(c(0.5,5.5)) + xlim(c(1,6)) + xlab('x1') + ylab('x2') + theme_classic() ggsave('figure4sst-inter.png', height=3,width=4)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.