load packages and data
library(conos) library(parallel) library(ggplot2) library(Matrix) library(data.table) library(pagoda2) library(cowplot) library(dplyr) library(abind) library(tidyr) require(Rtsne) source('/home/larsc/SecretUtils/R/asdf.R') con <- readRDS(file.path('/home/larsc/data/10x_preproced_graphed.rds')) #con <- Conos$new(con) annot <- readRDS(file.path('/home/demharters/R/projects/UPF9_14_17_19_22_23_24_32_33/metadata_10x_final.rds')) # make annotation as peter typef_eps <- setNames(annot$subtype, rownames(annot)) %>% as.factor()
Initiate the analysis. We start with some annotation changing and palette thingies for compatibility
fraction_palette_eps <- c(epilepsy='hotpink', healthy='seagreen') type_palette_eps <- setNames(rainbow(length(levels(typef_eps))),levels(typef_eps)) samplef_eps <- lapply(con$samples,function(x) rownames(x$counts)) samplef_eps <- as.factor(setNames(rep(names(samplef_eps),unlist(lapply(samplef_eps,length))),unlist(samplef_eps))) #sample factor samplef_eps2 <- setNames(annot$sample, rownames(annot)) %>% as.factor # same thing some_fraction <- rep('Brain', dim(annot)[1]) fractionf_eps <- setNames(some_fraction, rownames(annot)) %>% as.factor # tissue type factor
Comment 5 fractional changes, we have no fractions so we use disease instead
# how about fractional changes? InitFractionDF <- function(type.factor, sample.factor){ # at least save a couple of lines xt <- table(type.factor, sample.factor) xf <- t(t(xt)/colSums(xt)) # this is what we need to change for different background df <- melt(xf) } dfe <- InitFractionDF(typef_eps, samplef_eps) dfe$patient <- dfe$samplef.eps # since patients and samples are the same for our data dfe$fraction <- NA dfe[grep('c_',dfe$sample.factor),]$fraction <- 'healthy' dfe[grep('ep_',dfe$sample.factor),]$fraction <- 'epilepsy' dfe <- dfe %>% arrange(type.factor,fraction) # 140 vals PlotFractional <- function(fractional.df, fraction.palette){ # assumes colnames are the same p <- ggplot(na.omit(fractional.df),aes(x=type.factor,y=value,dodge=fraction,fill=fraction))+geom_boxplot(notch=FALSE,outlier.shape=NA) +scale_fill_manual(values=fraction.palette) + geom_point(position = position_jitterdodge(jitter.width=0.1),color=adjustcolor(1,alpha=0.3),aes(pch=fraction),size=0.8)+ theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.text.y = element_text(angle = 90, hjust = 0.5)) +xlab("") +ylab("fraction of total cells")+ theme(legend.position="top") } fraction_plot <- PlotFractional(dfe, fraction_palette_eps) fraction_plot
Comment 2 Distance matrix tsne plot ctdm.eps should contain weighted js-distances between samples for each subtype or something, not exactly sure how all this works together. Somehow the values are averaged to produce the values (3 dimensional array -> 2 dimensional)
cm_eps <- con$getClusterCountMatrix(groups=typef_eps) cct_eps <- table(typef_eps,samplef_eps) # without tumor signature correction ctdm.eps <- mclapply(setNames(colnames(cm_eps[[1]]), colnames(cm_eps[[1]])),function(ct) { tcm <- do.call(rbind,lapply(cm_eps,function(x) x[,ct])) tcm <- t(tcm/pmax(1,rowSums(tcm))) tcd <- pagoda2:::jsDist(tcm); dimnames(tcd) <- list(colnames(tcm),colnames(tcm)); # calls a c function # calculate how many cells there are attr(tcd,'cc') <- cct_eps[ct,colnames(tcm)] tcd },mc.cores=1) x <- abind(lapply(ctdm.eps,function(x) { # comment 1, kinda nc <- attr(x,'cc'); #wm <- (outer(nc,nc,FUN='pmin')) wm <- sqrt(outer(nc,nc,FUN='pmin')) return( x*wm ) }),along=3) y <- abind(lapply(ctdm.eps,function(x) { nc <- attr(x,'cc'); sqrt(outer(nc,nc,FUN='pmin')) }),along=3) xd.nc <- apply(x,c(1,2),sum)/apply(y,c(1,2),sum) # first weighting by min count then dividing by all the values to 're-normalize' I guess xde <- Rtsne(xd.nc,is_distance=TRUE, perplexity=2,max_iter=1e3)$Y df <- data.frame(xde); rownames(df) <- rownames(xd.nc); colnames(df) <- c("x","y"); df$fraction <- NA df[grep('c_',df %>% rownames),]$fraction <- 'healthy' df[grep('ep_',df %>% rownames),]$fraction <- 'epilepsy' # need better way of doing this df$patient <- rownames(df) df$ncells <- colSums(cct_eps)[rownames(df)] ts.sample.tsne.nc <- ggplot(df,aes(x,y,color=patient,shape=fraction,size=log10(ncells))) + geom_point() + theme_bw() + xlab("") + ylab("") + theme(axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank()) + guides(color=guide_legend(ncol=2)); ts.sample.tsne.nc # why are we plotting tsne of js distances?
Comment 4, kind of. Distribution of distances per cell type, supposed to be inter-patient for a specific fraction, but we don't have that so we can try healthy vs eps instead or something
min_cells=0 InterPatDF <- function(cell.type, min.cells=0, between.conditions=TRUE) { x <- ctdm.eps[[cell.type]] # jsdists for subtype xn nc <- attr(x,'cc'); # count of cells wm <- outer(nc,nc,FUN='pmin') # min cells for the comparison x[upper.tri(x)] <- NA; diag(x) <- NA; wm[upper.tri(wm)] <- NA; diag(wm) <- NA; df2 <- melt(x); df2$ncells <- melt(wm)$value df2 <- na.omit(df2) df2 <- df2[df2$ncells>=min.cells,]; df2$patient1 <- df2$Var1 df2$patient2 <- df2$Var2 inds.c <- grep('c_',df2$Var1) inds.e <- grep('ep_',df2$Var2) if (between.conditions){ inds.mixed <- intersect(inds.c, inds.e) } else { inds.mixed <- setdiff(union(inds.c, inds.e), intersect(inds.c, inds.e)) } df2 <- df2[inds.mixed,] df2$cell <- cell.type df2 } xl <- do.call(rbind,lapply(names(ctdm.eps), InterPatDF, min_cells, between.conditions=TRUE)) xl$comparison <- paste(xl$patient1, xl$patient2, sep='.') ggplot(na.omit(xl),aes(x=cell,y=value))+geom_boxplot(notch=FALSE)+geom_jitter(aes(col=comparison))+theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.text.y = element_text(angle = 90, hjust = 0.5)) +xlab("") +ylab("inter-patient expression distance")+ theme(legend.position="top")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.