library(reshape2) library(dtplyr) library(ggplot2) library(plyr) library(GGally) library(igraph) #library(GSEABase) library(DT) library(HurdleNormal) library(Mus.musculus) library(magrittr) library(stringr) ## contains fitting/comparison function source('common.R') knitr::opts_chunk$set(cache=TRUE, autodep=TRUE, echo=FALSE, error=FALSE, warning=FALSE) #Set to false if Tfh data is not present EVAL_TFH <- FALSE
if(!file.exists('alexNetworks_noattributes.rds')){ alex_fits_fl <- do.call(c, alex_fits) saveRDS(alex_fits_fl, 'alexNetworks_noattributes.rds') } rm(alex_fits)
ll <- load('alexNetworks.RData') alex_fits_fl <- do.call(c, alex_fits) ngo_names = names(alex_fits_fl)[names(alex_fits_fl) %like% 'ngo'] for(i in ngo_names){ genNetwork(alex_fits_fl[[i]], 1400, min.components=3, min.degree.label=1, main=i) } degrees <- rbindlist(lapply(alex_fits_fl[ngo_names], function(afit){ gn <- genNetwork(afit, 1400, min.components=0, plot=F) data.table(degree=igraph::degree(gn), gene=names(V(gn))) }), idcol='method') setkey(degrees, method, degree) setorder(degrees, method, -degree)
degrees_sub = degrees[method %in% c('ngoLPS.gaussian', 'ngoLPS.hurdle', 'ngoLPS.logistic')][,Method := (str_replace(method, 'ngoLPS.', '') %>% str_to_title())] topdegree <- degrees_sub[,.SD[1:10,],keyby=list(Method)] topdegrees_all = degrees_sub[topdegree[,gene],,on = 'gene'] ggplot(topdegrees_all, aes(y = reorder(gene, degree, max), x = degree, color = Method))+ geom_point() + ylab('Gene') + theme_minimal() + theme(legend.position = c(.8, .4))
degrees[,rank:=.I, keyby=list(method)] degrees[,dsum:=cumsum(degree), keyby=list(method)] ggplot(degrees, aes(x=rank, y=dsum, color=method)) + geom_line() godb[,gene:=toupper(ALIAS)] merge(godb, topdegree, by='gene')
For mouse, these are derived from the go assocdb database, time stamp 20150919.
ea <- lapply(alex_fits_fl, edgeGoAnno, n=1400, goAlias=godb, goTerm=goTerm, background=nullgo, nulldist=nedge1400perm) ealist <- setNames(lapply(ea, '[[', 1), names(alex_fits_fl)) ealist <- rbindlist(ealist, use.names=TRUE, idcol='L1') ealist[,rank:=NULL] setkey(ealist, L1, phyper) ealist[,rank:=rank(phyper, ties.method='average'), key=L1] setkey(ealist, L1, rank) ealist.sig <- ealist[, { fail <- min(min(which(pperm>.1)-1), .N) print(fail) .SD[seq_len(fail),] }, key=L1] ##ealist[,fdrThres:=rank/.N*.1, keyby=L1] ealist2 <- split(ealist.sig, ealist.sig$L1, drop=TRUE) go_id_term_table <- unique(rbind(ealist.sig[,.(GOID = GOID.i, TERM = TERM.i)], ealist.sig[,.(GOID = GOID.j, TERM = TERM.j)])) write.csv(cbind(go_id_term_table, human_name = NA), file = 'processShalek_go_terms_table.csv', row.names = FALSE)
Write out all terms/GOIDs that are significantly connected
hub_genes = c('MX1', 'CCL17', 'TAX1BP3', 'CCL3', 'MGL2', 'H2-AB1', 'H2-EB1', 'H2-AA', 'FABP5') anno_terms = read.csv('processShalek_go_terms_table2.csv') par(oma=c(0,0,0,0)+.1, mar=c(0,0,3, 0)+.1) shalek_to_plot = c(Hurdle = "ngoLPS.hurdle", Aracne = "ngoLPS.aracne", Gaussian = "ngoLPS.gaussian", 'Gaussian(raw)' = "ngoLPS.gaussianRaw") genesee_graphs = lapply(shalek_to_plot, function(i) plotgenesee(ealist2[[i]], godb, goTerm, network=alex_fits_fl[[i]], data.table(GOID=anno_terms$GOID, category=anno_terms$human_name), additionalGenes=hub_genes))
names(genesee_graphs) = names(shalek_to_plot) V_union = lapply(genesee_graphs, function(g) simplify_components(select = hub_genes, min.components = 5000, g) %>% V() %>% names()) %>% unlist() %>% unique() V_delete = genesee_graphs[[1]] %>% V() %>% names() %>% setdiff(V_union) genesee_simple = lapply(genesee_graphs, function(g) delete.vertices(g, V_delete) %>% igraph::simplify()) degrees = lapply(genesee_simple, degree) %>% do.call(cbind, .) median_degree = apply(degrees, 1, median) lay = layout_with_drl(genesee_simple[[1]], options = drl_defaults$refine) lapply(names(genesee_simple), function(i) plotNetwork(genesee_simple[[i]], layout = lay, main = i))
degreeGoAnno <- function(ginterp, n, godb, termdb, ...){ graph <- getNedgeInterp(ginterp, n) degree <- rowSums(abs(graph)>0) degdf <- data.table(deg=degree, i=seq_along(degree)) alias <- getGraphAlias(graph) degGo <- mergeWithAliasAndGo(degdf, 'i', alias, godb, ...)[N_GOID>5] degGo[,maxDeg:=max(deg), keyby=GOID] degGo[,idx:=.I] glmGo <- degGo[, { if(maxDeg[1]>2){ #bl <- arm::bayesglm(deg ~ I(idx %in% .I), family='quasipoisson', data=degGo) bl <- glm(deg ~ I(idx %in% .I), family='quasipoisson', data=degGo) rate <- coef(bl)[2] se <- sqrt(diag(vcov(bl)))[2] } else{ rate <- 0 se <- Inf } .(rate=rate, se=se, pval=2*pnorm(abs(rate/se), lower.tail=FALSE)) }, keyby=GOID] glmGo.sig <- glmGo[pval<.05/nrow(glmGo)] merge(glmGo.sig, termdb, by='GOID', all.x=TRUE) } #da <- lapply(alex_fits_fl, degreeGoAnno, n=1400, godb=godb, termdb=goTerm) #names(da) <- names(alex_fits_fl) #dalist <- rbindlist(da, use.names=TRUE, idcol='L1') #ggplot(dalist, aes(x=stringr::str_wrap(TERM, 30), y=rate, ymin=rate-se, ymax=rate+se, col=stringr::str_extract(L1, 'hurdle|gaussian|logistic'), shape=stringr::str_extract(L1, 'ngo|reg')))+geom_pointrange() + coord_flip() + theme(legend.position='bottom')
loadLausanne()
dcast(as.data.frame(colData(fl)), PatientID+run~SafeCS+ncells)
library(lme4) concord <- getConcordance(samp10, samp1, groups=c('SafeCS')) ggplot(concord, aes(y=et.ref, x=et.comp))+geom_point()+geom_abline(slope=1, intercept=0) + xlab("log(1 cell average)") + ylab("log(10 cell average)") + theme_bw() + theme(legend.position=c(.8, .2)) + coord_cartesian(xlim=c(5, 23), ylim=c(5, 23))#+geom_text(aes(label=substr(primerid, 1, 2)), size=3) muLS <- lmer(et.ref~et.comp+ (1|primerid), data=concord) samp1HIV <- subset(samp1, SafeCS=='pp' & run=='HIV') samp10HIV <- subset(samp10, SafeCS=='pp' & run=='HIV') frqs <- data.table(freq1= freq(samp1HIV), freq10=freq(samp10HIV)) #lm(log(1.01-freq10) ~ log(1.01-freq1), data=frqs) freqls <- nls(freq10 ~ 1-(1- B*freq1+A)^(10), data=frqs, start=list(B=1, A=0)) summary(freqls) frqs[,resid:=resid(freqls, 'pearson')] frqs[,fitted:=predict(freqls)] frqs[,expected:= 1-(1- freq1)^(10)] ggplot(frqs, aes(x=freq1, y=freq10)) + geom_point()+ geom_line(aes(y=fitted, color='nls-fitted')) + geom_line(aes(y=expected, color='plugin')) + theme_bw() + xlab('Frequency 1 Cell')+ ylab('Frequency 10 Cell') + theme(legend.position=c(.8, .2))
opts_chunk$set(dev='CairoPNG')#, dev.args=list(useDingbats = FALSE)) fl.sub <- samp1[c('CXCR4','CCR7', 'IL21R'),] ggpairs(as.data.frame(t(assay(fl.sub))), alpha=.5, upper=list(continuous='blank'))
## manyfits: fit on all data, using 10 cell too ## mf2: concatenation of manyfits ## outFlat: matricized mf2 ## manystab ll <- load('lausanneStability.RData') names(manystab) = paste(fsubset$model, fsubset$subset, sep = '.') i <- "patient.Healthy" # set the names of the internal methods manystab = lapply(manystab, setNames, c('hurdle', 'guassian', 'logistic', 'gaussian10')) percentiles = c(.01, .5, .6, .7, .8, .9) stability_frame = as.data.frame(lapply(unlist(manystab, recursive=F), function(x) sapply(percentiles, function(p) sum(x>p)))) stability_frame = cbind(percentiles, stability_frame)
At 60% stability, we have networks ranging from 11-32 edges (edges are double counted in the symmetrized matrix) above.
renameDT <- data.table(method=c('hurdle', 'gaussian', 'logistic', 'aracne', 'gaussian10', 'gaussianRaw' ), Method=c('Hurdle\n(Anisometric)', 'Gaussian', 'Logistic', 'Aracne', 'Gaussian(10)', 'Gaussian(raw)')) renameDT[,Method:=factor(Method)] get_BIC = function(fit_list){ BIC_all <- rbindlist(lapply(fit_list, function(x){ data.frame(edges=x$trueEdges, BIC=x$BIC) }), idcol='method') BIC_all[,BIC0 := (BIC-min(BIC)),keyby = 'method'] BIC_all[,.(BICedges = edges[which.min(BIC)], maxEdges=max(edges)),keyby=list(method)] BIC_all } BIC_alex = get_BIC(alex_fits_fl)[method %like% 'ngoLPS' & !(method %like% 'aracne')][,method := stringr::str_extract(method, '[^.]+$')] %>% merge(renameDT, by = 'method') bic_alex_plot = ggplot(BIC_alex, aes(x=edges, y=BIC0, color=Method))+geom_path() + theme_minimal() + scale_color_discrete(drop = FALSE) + facet_wrap(~Method, scales ='free_y', ncol = 1) + theme(legend.position = 'none', axis.text.x = element_text(angle = 45)) + ylab('BIC Score') bic_alex_plot+ facet_wrap(~Method, scales ='free_y', ncol = 2)
BIC_tfh = get_BIC(mf2[,'patient:Healthy'])[method != 'aracne'] %>% merge(renameDT, by = 'method') bic_tfh_plot = bic_alex_plot %+% BIC_tfh cowplot::plot_grid(bic_alex_plot , bic_tfh_plot + coord_cartesian(xlim=c(0, 200), ylim=c(0, 750))) knitr::kable(BIC_tfh[,.(BICedges = edges[which.min(BIC)], maxEdges=max(edges)),keyby=list(Method)])
knitr::kable(BIC_alex[,.(BICedges = edges[which.min(BIC)], maxEdges=max(edges)),keyby=list(method)])
matplotNetwork(mf2[,'ngo:Healthy'], 66) matplotNetwork(mf2[,'patient:Healthy'], 48) matplotNetwork(mf2[,'ngo:HIV'], 66)
matplotNetwork(c(mf2[,'patient:HIV'], mf2['hurdle', 'patient:Healthy']), matplot=FALSE, colorbar=F)
matplotNetwork(mf2[,'patient:Healthy'], nedge=48, matplot=FALSE, colorbar=F) gn <- genNetwork(mf2[['gaussian', 'patient:Healthy']], nedge=44, plot=F)
## comp <- 'patient:HIV' ## outmat <- do.call(cBind, outFlat[names(outFlat) %like% comp]) ## ham <- crossprod(abs(outmat)>0, abs(outmat)<=0)+crossprod(abs(outmat)<=0, abs(outmat)>0) ## heat2(ham, main=paste(names(outFlat)[names(outFlat) %like% comp], collapse=',')) pairwiseHamming <- function(outli){ lout <- length(outli) pairs <- vector('list') for(i in seq_along(outli)){ for(j in seq_along(outli)){ hamming <- (abs(outli[[i]])>0) *(abs(outli[[j]])<=0) + (abs(outli[[i]])<=0) *(abs(outli[[j]])>0) pairs[[i*lout+j]] <- data.table(L1=names(outli)[i], L2=names(outli)[j], hamming=Matrix::colSums(hamming, na.rm=TRUE), edges=Matrix::colSums(abs(outli[[j]])>0)+Matrix::colSums(abs(outli[[i]])>0)) } } pairs } pairwiseStab <- function(outli){ lout <- length(outli) pairs <- vector('list') for(i in seq_along(outli)){ for(j in seq_along(outli)){ jacc <- (outli[[i]] - outli[[j]]) pairs[[i*lout+j]] <- data.table(L1=names(outli)[i], L2=names(outli)[j], jaccIncl=sum(jacc>.8, na.rm=TRUE), jaccDiff=sum(jacc, na.rm=TRUE)) } } pairs } subsets <- list('patient:Healthy', 'ngo:Healthy') pwh <- lapply(subsets, function(x){ rbindlist(pairwiseHamming(outFlat[names(outFlat) %like% x])) }) ggplot(pwh[[1]][L1 != L2,], aes(x=edges, y=hamming))+geom_line(aes(col=L1)) + geom_abline(slope=2, color='black') + facet_grid(L2~ .) pwh <- rbindlist(pwh) pwh400 <- pwh[edges<60 & edges > 40 & L1 %like% 'patient:Healthy', list(dissimilarity=mean(hamming/edges)), keyby=list(L1, L2)] pwh400[dissimilarity<.001, dissimilarity:=NA_real_] ggplot(pwh400, aes(x=L1, y=L2, fill=dissimilarity))+geom_tile()
Dissimilarities
library(xtable) print(xtable(dcast(pwh400, L1 ~L2)), file = 'processShalekMAITNetworks_files/figure-html/hamming-dist.tex', include.rownames = FALSE)
genes <- c('Lck', 'NFATC1', 'ANP32B', 'IL21', 'ITGAL') ee <- exprs(samp1HIV[genes,]) ggpairs(as.data.frame(ee), lower=list(continuous='hurdle'), upper=list(continuous='hmosaic'))+theme_bw() samp1HltPP <- subset(samp1, run=='Healthy' & SafeCS%in% c('nn', 'pp')) samp10HltPP <- subset(samp10, run=='Healthy' & SafeCS %in% c('pp', 'nn')) samp1HltP <- subset(samp1, run=='Healthy' & SafeCS=='CXCR5p')[c('BcL6', 'CXCR5', 'PDCD1'),] #ggpairs(exprs(samp1HltPP[,c('BcL6', 'PDCD1', 'CXCR5')]), lower=list(continuous='hurdle'), upper=list(continuous='hmosaic'))+theme_bw() #ggpairs(exprs(samp10HltPP), lower=list(continuous='hurdle'), upper=list(continuous='hmosaic'))+theme_bw() ggpairs(as.data.frame(exprs(samp1HltP)), lower=list(continuous='hurdle'), upper=list(continuous='hmosaic'))+theme_bw() mbcl6 <- glm(I(BcL6>0) ~ I(CASP1>0) + I(NFATC1>0) +I(Blimp1>0) + I(CD154>0), data=data.frame(exprs(samp1HltPP), colData(samp1HltPP)), family='binomial') mil21 <- glm(I(IL21>0) ~ I(CASP1>0) + I(NFATC1>0) + I(BcL6>0), data=data.frame(exprs(samp1HltPP)), family='binomial') cd <- colData(samp1HltPP) cd$PatientID <- factor(cd$PatientID) contrasts(cd$PatientID) <- 'contr.sum' cd$SafeCS <- relevel(factor(cd$SafeCS), 'nn') cd -> colData(samp1HltPP) zz1 <- zlm(~PatientID + SafeCS+ngeneson, samp1HltPP, method='bayesglm', ebayes=T) zzR <- zlm(~PatientID, samp1HltPP, hook=MAST:::residualsHook, method='bayesglm', ebayes=T) zs <- summary(zz1, doLRT='SafeCSpp') sigGenes <- zs$datatable[component=='H' & contrast=='SafeCSpp' & `Pr(>Chisq)`<.001/80] setorder(sigGenes, `Pr(>Chisq)`) sigPrimerid <- setdiff(as.character(sigGenes[1:5,primerid]), '')#c('CXCR5', 'PDCD1')) resids <- cbind(as(samp1HltPP[sigPrimerid,], 'data.table'), resid=unlist(zzR@hookOut[sigPrimerid])) ggplot(resids, aes(y=value, x=primerid, color=SafeCS))+geom_violin(scale='width') + geom_jitter(position=position_jitterdodge(jitter.width=.5)) + theme_bw() + scale_color_brewer('Cell subtype', labels=c('CXCR5-PD1-', 'CXCR5+PD1+'), type='qual') + xlab(NULL) + ylab('Log expression') resid_mat <- dcast.data.table(resids, wellKey +CellSubset ~ primerid, value.var='value') ggp <- ggpairs(resid_mat, columns=3:6, mapping=aes(fill=CellSubset, color=CellSubset), lower=list(continuous='smooth')) + theme_bw() scales <- list(scale_fill_brewer('Cell subtype', type='qual'), scale_color_brewer('Cell subtype', type='qual')) for (i in seq_len(ggp$nrow)){ for(j in seq_len(ggp$ncol)){ ggp[i,j] <- ggp[i,j] + scales } } ggp ggpairs(resid_mat[CellSubset=='CXCR5+PD1+'], columns=3:6, lower=list(continuous='smooth')) + theme_bw() ggpairs(resid_mat, columns=3:6, lower=list(continuous='hurdle'), upper=list(continuous='hmosaic'), mapping=aes(color=CellSubset)) + theme_bw() cor(resid_mat[,3:6,with=F])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.