Process Fitted networks to MAIT and Shalek data sets

library(reshape2)
library(dtplyr)
library(ggplot2)
library(plyr)
library(GGally)
library(igraph)
#library(GSEABase)
library(DT)
library(HurdleNormal)
library(Mus.musculus)
library(magrittr)
library(stringr)
## contains fitting/comparison function
source('common.R') 
knitr::opts_chunk$set(cache=TRUE, autodep=TRUE, echo=FALSE, error=FALSE, warning=FALSE)

#Set to false if Tfh data is not present
EVAL_TFH <- FALSE
if(!file.exists('alexNetworks_noattributes.rds')){
    alex_fits_fl <- do.call(c, alex_fits)
    saveRDS(alex_fits_fl, 'alexNetworks_noattributes.rds')
}
rm(alex_fits)
ll <- load('alexNetworks.RData')
alex_fits_fl <- do.call(c, alex_fits)

ngo_names = names(alex_fits_fl)[names(alex_fits_fl) %like% 'ngo']

for(i in ngo_names){
    genNetwork(alex_fits_fl[[i]], 1400, min.components=3, min.degree.label=1, main=i)
} 


degrees <- rbindlist(lapply(alex_fits_fl[ngo_names], function(afit){
    gn <- genNetwork(afit, 1400, min.components=0, plot=F)
    data.table(degree=igraph::degree(gn), gene=names(V(gn)))
}), idcol='method')

setkey(degrees, method, degree)
setorder(degrees, method, -degree)
degrees_sub = degrees[method %in% c('ngoLPS.gaussian', 'ngoLPS.hurdle', 'ngoLPS.logistic')][,Method := (str_replace(method, 'ngoLPS.', '') %>% str_to_title())]
topdegree <- degrees_sub[,.SD[1:10,],keyby=list(Method)]
topdegrees_all = degrees_sub[topdegree[,gene],,on = 'gene']
ggplot(topdegrees_all, aes(y = reorder(gene, degree, max), x = degree, color = Method))+ geom_point() + ylab('Gene') + theme_minimal() + theme(legend.position = c(.8, .4))
degrees[,rank:=.I, keyby=list(method)]
degrees[,dsum:=cumsum(degree), keyby=list(method)]
ggplot(degrees, aes(x=rank, y=dsum, color=method)) + geom_line()
godb[,gene:=toupper(ALIAS)]
merge(godb, topdegree, by='gene')

For mouse, these are derived from the go assocdb database, time stamp 20150919.

ea <- lapply(alex_fits_fl, edgeGoAnno, n=1400, goAlias=godb, goTerm=goTerm, background=nullgo, nulldist=nedge1400perm)
ealist <- setNames(lapply(ea, '[[', 1), names(alex_fits_fl))
ealist <- rbindlist(ealist, use.names=TRUE, idcol='L1')
ealist[,rank:=NULL]
setkey(ealist, L1, phyper)
ealist[,rank:=rank(phyper, ties.method='average'), key=L1]
setkey(ealist, L1, rank) 

ealist.sig <- ealist[, {
    fail <- min(min(which(pperm>.1)-1), .N)
    print(fail)
    .SD[seq_len(fail),]
}, key=L1]

##ealist[,fdrThres:=rank/.N*.1, keyby=L1]
ealist2 <- split(ealist.sig, ealist.sig$L1, drop=TRUE)

go_id_term_table <- unique(rbind(ealist.sig[,.(GOID = GOID.i, TERM  = TERM.i)], ealist.sig[,.(GOID = GOID.j, TERM  = TERM.j)]))
write.csv(cbind(go_id_term_table, human_name = NA), file = 'processShalek_go_terms_table.csv', row.names = FALSE)

Write out all terms/GOIDs that are significantly connected

hub_genes = c('MX1', 'CCL17', 'TAX1BP3', 'CCL3', 'MGL2', 'H2-AB1', 'H2-EB1', 'H2-AA', 'FABP5')
anno_terms = read.csv('processShalek_go_terms_table2.csv')
par(oma=c(0,0,0,0)+.1, mar=c(0,0,3, 0)+.1)
shalek_to_plot = c(Hurdle = "ngoLPS.hurdle", Aracne = "ngoLPS.aracne",      Gaussian = "ngoLPS.gaussian",    'Gaussian(raw)' = "ngoLPS.gaussianRaw")
genesee_graphs = lapply(shalek_to_plot, function(i) plotgenesee(ealist2[[i]], godb, goTerm, network=alex_fits_fl[[i]], data.table(GOID=anno_terms$GOID, category=anno_terms$human_name), additionalGenes=hub_genes))
names(genesee_graphs) = names(shalek_to_plot)
V_union = lapply(genesee_graphs, function(g) simplify_components(select = hub_genes, min.components = 5000, g) %>% V() %>% names()) %>% unlist() %>% unique()
V_delete = genesee_graphs[[1]] %>% V() %>% names() %>% setdiff(V_union)
genesee_simple = lapply(genesee_graphs, function(g) delete.vertices(g, V_delete) %>% igraph::simplify())
degrees = lapply(genesee_simple, degree) %>% do.call(cbind, .)
median_degree = apply(degrees, 1, median)
lay = layout_with_drl(genesee_simple[[1]], options = drl_defaults$refine)
lapply(names(genesee_simple), function(i) plotNetwork(genesee_simple[[i]], layout = lay, main = i))
degreeGoAnno <- function(ginterp, n, godb, termdb, ...){
    graph <- getNedgeInterp(ginterp, n)
    degree <- rowSums(abs(graph)>0)
    degdf <- data.table(deg=degree, i=seq_along(degree))
    alias <- getGraphAlias(graph)
    degGo <- mergeWithAliasAndGo(degdf, 'i', alias, godb, ...)[N_GOID>5]
    degGo[,maxDeg:=max(deg), keyby=GOID]
    degGo[,idx:=.I]
    glmGo <- degGo[, {
        if(maxDeg[1]>2){
            #bl <- arm::bayesglm(deg ~ I(idx %in% .I), family='quasipoisson', data=degGo)
            bl <- glm(deg ~ I(idx %in% .I), family='quasipoisson', data=degGo)
            rate <- coef(bl)[2]
            se <- sqrt(diag(vcov(bl)))[2]
        } else{
            rate <- 0
            se <- Inf
        }
        .(rate=rate, se=se, pval=2*pnorm(abs(rate/se), lower.tail=FALSE))
    }, keyby=GOID]
    glmGo.sig <- glmGo[pval<.05/nrow(glmGo)]
    merge(glmGo.sig, termdb, by='GOID', all.x=TRUE)
}

#da <- lapply(alex_fits_fl, degreeGoAnno, n=1400, godb=godb, termdb=goTerm)
#names(da) <-  names(alex_fits_fl)
#dalist <- rbindlist(da, use.names=TRUE, idcol='L1')

#ggplot(dalist, aes(x=stringr::str_wrap(TERM, 30), y=rate, ymin=rate-se, ymax=rate+se, col=stringr::str_extract(L1, 'hurdle|gaussian|logistic'), shape=stringr::str_extract(L1, 'ngo|reg')))+geom_pointrange() + coord_flip() + theme(legend.position='bottom')

TFH data from Lausanne

loadLausanne()
dcast(as.data.frame(colData(fl)), PatientID+run~SafeCS+ncells)
library(lme4)
concord <- getConcordance(samp10, samp1, groups=c('SafeCS'))
ggplot(concord, aes(y=et.ref, x=et.comp))+geom_point()+geom_abline(slope=1, intercept=0) + xlab("log(1 cell average)") + ylab("log(10 cell average)") + theme_bw() + theme(legend.position=c(.8, .2)) + coord_cartesian(xlim=c(5, 23), ylim=c(5, 23))#+geom_text(aes(label=substr(primerid, 1, 2)), size=3)

muLS <- lmer(et.ref~et.comp+ (1|primerid), data=concord)

samp1HIV <- subset(samp1, SafeCS=='pp' & run=='HIV')
samp10HIV <- subset(samp10, SafeCS=='pp' & run=='HIV')
frqs <- data.table(freq1= freq(samp1HIV), freq10=freq(samp10HIV))

#lm(log(1.01-freq10) ~ log(1.01-freq1), data=frqs)

freqls <- nls(freq10 ~ 1-(1- B*freq1+A)^(10), data=frqs, start=list(B=1, A=0))

summary(freqls)

frqs[,resid:=resid(freqls, 'pearson')]
frqs[,fitted:=predict(freqls)]
frqs[,expected:= 1-(1- freq1)^(10)]

ggplot(frqs, aes(x=freq1, y=freq10)) + geom_point()+ geom_line(aes(y=fitted, color='nls-fitted')) + geom_line(aes(y=expected, color='plugin')) + theme_bw() + xlab('Frequency 1 Cell')+ ylab('Frequency 10 Cell') + theme(legend.position=c(.8, .2))
opts_chunk$set(dev='CairoPNG')#, dev.args=list(useDingbats = FALSE))
fl.sub <- samp1[c('CXCR4','CCR7', 'IL21R'),]
ggpairs(as.data.frame(t(assay(fl.sub))), alpha=.5, upper=list(continuous='blank'))
## manyfits: fit on all data, using 10 cell too
## mf2: concatenation of manyfits
## outFlat: matricized mf2
## manystab
ll <- load('lausanneStability.RData') 
names(manystab) = paste(fsubset$model, fsubset$subset, sep = '.')
i <- "patient.Healthy"

# set the names of the internal methods
manystab = lapply(manystab, setNames, c('hurdle', 'guassian', 'logistic', 'gaussian10'))

percentiles = c(.01, .5, .6, .7, .8, .9)
stability_frame = as.data.frame(lapply(unlist(manystab, recursive=F), function(x) sapply(percentiles, function(p) sum(x>p))))
stability_frame = cbind(percentiles, stability_frame)

At 60% stability, we have networks ranging from 11-32 edges (edges are double counted in the symmetrized matrix) above.

BIC Comparison

renameDT <- data.table(method=c('hurdle', 
                            'gaussian', 
                            'logistic',
                            'aracne',
                            'gaussian10',
                            'gaussianRaw'
                            ), 
                       Method=c('Hurdle\n(Anisometric)',
                                'Gaussian',
                                'Logistic',
                            'Aracne',
                            'Gaussian(10)',
                            'Gaussian(raw)'))
renameDT[,Method:=factor(Method)]

get_BIC = function(fit_list){
   BIC_all <-  rbindlist(lapply(fit_list, function(x){
           data.frame(edges=x$trueEdges, BIC=x$BIC)
}), idcol='method')
    BIC_all[,BIC0 := (BIC-min(BIC)),keyby = 'method']
    BIC_all[,.(BICedges = edges[which.min(BIC)], maxEdges=max(edges)),keyby=list(method)]
    BIC_all
}
BIC_alex = get_BIC(alex_fits_fl)[method %like% 'ngoLPS' & !(method %like% 'aracne')][,method := stringr::str_extract(method, '[^.]+$')] %>% merge(renameDT, by = 'method')


bic_alex_plot = ggplot(BIC_alex, aes(x=edges, y=BIC0, color=Method))+geom_path() + theme_minimal() + scale_color_discrete(drop = FALSE) + facet_wrap(~Method, scales ='free_y', ncol = 1) + theme(legend.position = 'none', axis.text.x = element_text(angle = 45)) + ylab('BIC Score')
bic_alex_plot+ facet_wrap(~Method, scales ='free_y', ncol = 2)
BIC_tfh = get_BIC(mf2[,'patient:Healthy'])[method != 'aracne'] %>% merge(renameDT, by = 'method')
bic_tfh_plot = bic_alex_plot %+% BIC_tfh 
cowplot::plot_grid(bic_alex_plot , bic_tfh_plot + coord_cartesian(xlim=c(0, 200), ylim=c(0, 750)))
knitr::kable(BIC_tfh[,.(BICedges = edges[which.min(BIC)], maxEdges=max(edges)),keyby=list(Method)])
knitr::kable(BIC_alex[,.(BICedges = edges[which.min(BIC)], maxEdges=max(edges)),keyby=list(method)])
matplotNetwork(mf2[,'ngo:Healthy'], 66) 
matplotNetwork(mf2[,'patient:Healthy'], 48)
matplotNetwork(mf2[,'ngo:HIV'], 66)
matplotNetwork(c(mf2[,'patient:HIV'], mf2['hurdle', 'patient:Healthy']), matplot=FALSE, colorbar=F)
matplotNetwork(mf2[,'patient:Healthy'], nedge=48, matplot=FALSE, colorbar=F)

gn <- genNetwork(mf2[['gaussian', 'patient:Healthy']], nedge=44, plot=F)
## comp <- 'patient:HIV'
## outmat <- do.call(cBind, outFlat[names(outFlat) %like% comp])

## ham <- crossprod(abs(outmat)>0, abs(outmat)<=0)+crossprod(abs(outmat)<=0, abs(outmat)>0)
## heat2(ham, main=paste(names(outFlat)[names(outFlat) %like% comp], collapse=','))

pairwiseHamming <- function(outli){
    lout <- length(outli)
    pairs <- vector('list')
    for(i in seq_along(outli)){
        for(j in seq_along(outli)){
            hamming <- (abs(outli[[i]])>0) *(abs(outli[[j]])<=0) + (abs(outli[[i]])<=0) *(abs(outli[[j]])>0)
            pairs[[i*lout+j]] <- data.table(L1=names(outli)[i], L2=names(outli)[j],
                                            hamming=Matrix::colSums(hamming, na.rm=TRUE),
                                            edges=Matrix::colSums(abs(outli[[j]])>0)+Matrix::colSums(abs(outli[[i]])>0))
        }
    }
    pairs
}

pairwiseStab <- function(outli){
    lout <- length(outli)
    pairs <- vector('list')
    for(i in seq_along(outli)){
        for(j in seq_along(outli)){
            jacc <- (outli[[i]] - outli[[j]])
            pairs[[i*lout+j]] <- data.table(L1=names(outli)[i], L2=names(outli)[j],
                                            jaccIncl=sum(jacc>.8, na.rm=TRUE),
                                            jaccDiff=sum(jacc, na.rm=TRUE))
        }
    }
    pairs

}

subsets <- list('patient:Healthy', 'ngo:Healthy')

pwh <- lapply(subsets, function(x){
    rbindlist(pairwiseHamming(outFlat[names(outFlat) %like% x]))
})

ggplot(pwh[[1]][L1 != L2,], aes(x=edges, y=hamming))+geom_line(aes(col=L1)) + geom_abline(slope=2, color='black') + facet_grid(L2~ .)
pwh <- rbindlist(pwh)
pwh400 <- pwh[edges<60 & edges > 40 & L1 %like% 'patient:Healthy', list(dissimilarity=mean(hamming/edges)), keyby=list(L1, L2)]
pwh400[dissimilarity<.001, dissimilarity:=NA_real_]
ggplot(pwh400, aes(x=L1, y=L2, fill=dissimilarity))+geom_tile()

Dissimilarities

library(xtable)
print(xtable(dcast(pwh400, L1 ~L2)), file = 'processShalekMAITNetworks_files/figure-html/hamming-dist.tex', include.rownames = FALSE)
genes <- c('Lck', 'NFATC1',
           'ANP32B', 'IL21', 'ITGAL')
ee <- exprs(samp1HIV[genes,])
ggpairs(as.data.frame(ee), lower=list(continuous='hurdle'), upper=list(continuous='hmosaic'))+theme_bw()

samp1HltPP <- subset(samp1, run=='Healthy' & SafeCS%in% c('nn', 'pp'))
samp10HltPP <- subset(samp10, run=='Healthy' & SafeCS %in% c('pp', 'nn'))
samp1HltP <- subset(samp1, run=='Healthy' & SafeCS=='CXCR5p')[c('BcL6', 'CXCR5', 'PDCD1'),]

#ggpairs(exprs(samp1HltPP[,c('BcL6', 'PDCD1', 'CXCR5')]),  lower=list(continuous='hurdle'), upper=list(continuous='hmosaic'))+theme_bw()
#ggpairs(exprs(samp10HltPP),  lower=list(continuous='hurdle'), upper=list(continuous='hmosaic'))+theme_bw()
ggpairs(as.data.frame(exprs(samp1HltP)),  lower=list(continuous='hurdle'), upper=list(continuous='hmosaic'))+theme_bw()

mbcl6 <- glm(I(BcL6>0) ~ I(CASP1>0) + I(NFATC1>0) +I(Blimp1>0) + I(CD154>0), data=data.frame(exprs(samp1HltPP), colData(samp1HltPP)), family='binomial')
mil21 <- glm(I(IL21>0) ~ I(CASP1>0) + I(NFATC1>0) + I(BcL6>0), data=data.frame(exprs(samp1HltPP)), family='binomial')

cd <- colData(samp1HltPP)
cd$PatientID <- factor(cd$PatientID)
contrasts(cd$PatientID) <- 'contr.sum'
cd$SafeCS <- relevel(factor(cd$SafeCS), 'nn')
cd -> colData(samp1HltPP)

zz1 <- zlm(~PatientID + SafeCS+ngeneson, samp1HltPP, method='bayesglm', ebayes=T)
zzR <- zlm(~PatientID, samp1HltPP, hook=MAST:::residualsHook, method='bayesglm', ebayes=T)
zs <- summary(zz1, doLRT='SafeCSpp')
sigGenes <- zs$datatable[component=='H' & contrast=='SafeCSpp' & `Pr(>Chisq)`<.001/80]
setorder(sigGenes, `Pr(>Chisq)`)
sigPrimerid <- setdiff(as.character(sigGenes[1:5,primerid]), '')#c('CXCR5', 'PDCD1'))

resids <- cbind(as(samp1HltPP[sigPrimerid,], 'data.table'),
                resid=unlist(zzR@hookOut[sigPrimerid]))

ggplot(resids, aes(y=value, x=primerid, color=SafeCS))+geom_violin(scale='width') + geom_jitter(position=position_jitterdodge(jitter.width=.5)) + theme_bw() + scale_color_brewer('Cell subtype', labels=c('CXCR5-PD1-', 'CXCR5+PD1+'), type='qual') + xlab(NULL) + ylab('Log expression')
resid_mat <- dcast.data.table(resids, wellKey +CellSubset  ~ primerid, value.var='value')

ggp <- ggpairs(resid_mat, columns=3:6, mapping=aes(fill=CellSubset, color=CellSubset), lower=list(continuous='smooth')) + theme_bw()
scales <- list(scale_fill_brewer('Cell subtype', type='qual'),  scale_color_brewer('Cell subtype', type='qual'))

for (i in seq_len(ggp$nrow)){
    for(j in seq_len(ggp$ncol)){
        ggp[i,j] <- ggp[i,j] + scales
        }
}

ggp


 ggpairs(resid_mat[CellSubset=='CXCR5+PD1+'], columns=3:6, lower=list(continuous='smooth')) + theme_bw()

ggpairs(resid_mat, columns=3:6,  lower=list(continuous='hurdle'), upper=list(continuous='hmosaic'), mapping=aes(color=CellSubset)) + theme_bw()
cor(resid_mat[,3:6,with=F])


amcdavid/HurdleNormal documentation built on May 14, 2022, 11:12 p.m.