In githubz0r/SecretUtils: Some utils for Conos

load packages and data

library(conos)
library(parallel)
library(ggplot2)
library(Matrix)
library(data.table)
library(pagoda2)
library(cowplot)
library(dplyr)
library(abind)
library(tidyr)
require(Rtsne)
source('/home/larsc/SecretUtils/R/asdf.R')

con <- readRDS(file.path('/home/larsc/data/10x_preproced_graphed.rds'))
#con <- Conos$new(con)
annot <- readRDS(file.path('/home/demharters/R/projects/UPF9_14_17_19_22_23_24_32_33/metadata_10x_final.rds'))

# make annotation as peter
typef_eps <- setNames(annot$subtype, rownames(annot)) %>% as.factor()

Initiate the analysis. We start with some annotation changing and palette thingies for compatibility

  fraction_palette_eps <- c(epilepsy='hotpink', healthy='seagreen') 

  type_palette_eps <- setNames(rainbow(length(levels(typef_eps))),levels(typef_eps))


  samplef_eps <- lapply(con$samples,function(x) rownames(x$counts))
  samplef_eps <- as.factor(setNames(rep(names(samplef_eps),unlist(lapply(samplef_eps,length))),unlist(samplef_eps))) #sample factor
  samplef_eps2 <- setNames(annot$sample, rownames(annot)) %>% as.factor # same thing
  some_fraction <- rep('Brain', dim(annot)[1])
  fractionf_eps <- setNames(some_fraction, rownames(annot)) %>% as.factor # tissue type factor

Comment 5 fractional changes, we have no fractions so we use disease instead

  # how about fractional changes?
  InitFractionDF <- function(type.factor, sample.factor){ # at least save a couple of lines
    xt <- table(type.factor, sample.factor)
    xf <- t(t(xt)/colSums(xt)) # this is what we need to change for different background
    df <- melt(xf)
  }

  dfe <- InitFractionDF(typef_eps, samplef_eps)
  dfe$patient <- dfe$samplef.eps # since patients and samples are the same for our data
  dfe$fraction <- NA
  dfe[grep('c_',dfe$sample.factor),]$fraction <- 'healthy'
  dfe[grep('ep_',dfe$sample.factor),]$fraction <- 'epilepsy'
  dfe <- dfe %>% arrange(type.factor,fraction) # 140 vals

  PlotFractional <- function(fractional.df, fraction.palette){ # assumes colnames are the same
    p <- ggplot(na.omit(fractional.df),aes(x=type.factor,y=value,dodge=fraction,fill=fraction))+geom_boxplot(notch=FALSE,outlier.shape=NA) +scale_fill_manual(values=fraction.palette) +  geom_point(position = position_jitterdodge(jitter.width=0.1),color=adjustcolor(1,alpha=0.3),aes(pch=fraction),size=0.8)+ theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.text.y = element_text(angle = 90, hjust = 0.5))  +xlab("") +ylab("fraction of total cells")+ theme(legend.position="top")

  }
  fraction_plot <- PlotFractional(dfe, fraction_palette_eps)
  fraction_plot

Comment 2 Distance matrix tsne plot ctdm.eps should contain weighted js-distances between samples for each subtype or something, not exactly sure how all this works together. Somehow the values are averaged to produce the values (3 dimensional array -> 2 dimensional)

  cm_eps <- con$getClusterCountMatrix(groups=typef_eps)
  cct_eps <- table(typef_eps,samplef_eps)

  # without tumor signature correction
  ctdm.eps <- mclapply(setNames(colnames(cm_eps[[1]]), colnames(cm_eps[[1]])),function(ct) {
    tcm <- do.call(rbind,lapply(cm_eps,function(x) x[,ct]))
    tcm <- t(tcm/pmax(1,rowSums(tcm)))
    tcd <- pagoda2:::jsDist(tcm); dimnames(tcd) <- list(colnames(tcm),colnames(tcm)); # calls a c function
    # calculate how many cells there are
    attr(tcd,'cc') <- cct_eps[ct,colnames(tcm)]
    tcd
  },mc.cores=1)

  x <- abind(lapply(ctdm.eps,function(x) { # comment 1, kinda
    nc <- attr(x,'cc');
    #wm <- (outer(nc,nc,FUN='pmin'))
    wm <- sqrt(outer(nc,nc,FUN='pmin'))
    return( x*wm )
  }),along=3)
  y <- abind(lapply(ctdm.eps,function(x) {
    nc <- attr(x,'cc');
    sqrt(outer(nc,nc,FUN='pmin'))
  }),along=3)
  xd.nc <- apply(x,c(1,2),sum)/apply(y,c(1,2),sum) # first weighting by min count then dividing by all the values to 're-normalize' I guess

  xde <- Rtsne(xd.nc,is_distance=TRUE, perplexity=2,max_iter=1e3)$Y
  df <- data.frame(xde); rownames(df) <- rownames(xd.nc); colnames(df) <- c("x","y");
  df$fraction <- NA
  df[grep('c_',df %>% rownames),]$fraction <- 'healthy'
  df[grep('ep_',df %>% rownames),]$fraction <- 'epilepsy' # need better way of doing this

  df$patient <- rownames(df)
  df$ncells <- colSums(cct_eps)[rownames(df)]

  ts.sample.tsne.nc <- ggplot(df,aes(x,y,color=patient,shape=fraction,size=log10(ncells))) + geom_point() + theme_bw() + xlab("") + ylab("") + theme(axis.title=element_blank(),  axis.text=element_blank(), axis.ticks=element_blank()) + guides(color=guide_legend(ncol=2));
  ts.sample.tsne.nc # why are we plotting tsne of js distances?

Comment 4, kind of. Distribution of distances per cell type, supposed to be inter-patient for a specific fraction, but we don't have that so we can try healthy vs eps instead or something

min_cells=0
InterPatDF <- function(cell.type, min.cells=0, between.conditions=TRUE) {
    x <- ctdm.eps[[cell.type]] # jsdists for subtype xn
    nc <- attr(x,'cc'); # count of cells
    wm <- outer(nc,nc,FUN='pmin') # min cells for the comparison

    x[upper.tri(x)] <- NA; diag(x) <- NA;
    wm[upper.tri(wm)] <- NA; diag(wm) <- NA;
    df2 <- melt(x);
    df2$ncells <- melt(wm)$value
    df2 <- na.omit(df2)
    df2 <- df2[df2$ncells>=min.cells,];

    df2$patient1 <- df2$Var1
    df2$patient2 <- df2$Var2
    inds.c <- grep('c_',df2$Var1)
    inds.e <- grep('ep_',df2$Var2)
    if (between.conditions){
      inds.mixed <- intersect(inds.c, inds.e)
    } else {
      inds.mixed <- setdiff(union(inds.c, inds.e), intersect(inds.c, inds.e))
    }
    df2 <- df2[inds.mixed,]
    df2$cell <- cell.type
    df2
}
xl <- do.call(rbind,lapply(names(ctdm.eps), InterPatDF, min_cells, between.conditions=TRUE))
xl$comparison <- paste(xl$patient1, xl$patient2, sep='.')
ggplot(na.omit(xl),aes(x=cell,y=value))+geom_boxplot(notch=FALSE)+geom_jitter(aes(col=comparison))+theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.text.y = element_text(angle = 90, hjust = 0.5))  +xlab("") +ylab("inter-patient expression distance")+ theme(legend.position="top")