source('simulation_library.R')
knitr::opts_chunk$set(error=FALSE, warning=FALSE, results='hide', cache=FALSE, dev=c('png', 'pdf'))
load('simulation_graphs.RData')
library(ggplot2)
library(directlabels)
library(data.table)
library(GGally)
library(HurdleNormal)
library(cowplot)
library(stringr)
theme_set(theme_bw())# + theme(legend.position=c(.8, .2)))

label_singleline <- function(x) label_value(x, multi_line = FALSE)

sims_files <- list.files('sim_chkpoint', pattern='*.rds', full.names=TRUE)
fittedmodels <- lapply(sims_files, readRDS)

Example of data distributions

plot_data_dist = function(i){
    ggpairs(as.data.table(modelList[[i]]$gibbs[,1:3]), lower=list(continuous=HurdleNormal:::ggally_hurdle), upper=list(continuous=HurdleNormal:::ggally_hmosaic)) +ggtitle(paste(modelArgs[i,], collapse = '/'))
}

Ecoli network

library(igraph)
ec_net = readRDS('ecoli_network.rds')
adjc = abs(ec_net$true$G)>0 | abs(ec_net$true$H)>0 | abs(ec_net$true$K)>0
diag(adjc) = FALSE
adjc = adjc | t(adjc)
adj_net = graph.adjacency(adjc, mode = 'undirected')
coords = layout_with_kk(adj_net)
plot(adj_net, vertex.size = 3, layout = coords, vertex.label = NA)
plot_data_dist(19)

Model 1

plot_data_dist(20)

Model 2

plot_data_dist(34)

Model 4

modeledges <- sapply(modelList, function(x) sum((abs(x$true$G) + abs(x$true$H) + abs(x$true$K))>0) - ncol(x$true$G))/2
collectFDR <- list()
setDT(modelArgs)
modelArgs[is.na(contam), contam:='none']
modelArgs[is.na(P), P:=500]
modelArgs[kcell==1 & (P==24  | type=='ecoli') , aspect1:='consistency']
modelArgs[P==32 | type=='ecoli', aspect2:='kcell']
modelArgs[kcell == 1 & contam =='none' & P!=24 & (N==100  | type=='ecoli'), aspect3:='overall']
modelArgs[,edges:=modeledges]

renameDT <- data.table(L1=c('full', 
                            'reg',
                            'Gaussian', 
                            'Logistic',
                            'NPN', 
                            'Aracne'
                            ), 
                       Method=c('Hurdle\n(Anisometric)', 
                                'Hurdle\n(Isometric)',
                                'Gaussian',
                                'Logistic',
                            'NPN', 
                            'Aracne'))
renameDT[,Method:=factor(Method, levels = Method[order(L1)])]
for(i in seq_along(fittedmodels)){
    fittedmodels[[i]] <- rbindlist(fittedmodels[[i]], idcol='i')[!L1 %in% c('Cfull', 'Creg')]
}
allmodels <- rbindlist(fittedmodels, idcol='rep')[,Method:=NULL]
allmodels[L1=='Aracne' & oracle=='BIC', FDR:=NA]
allmodels = merge(allmodels, renameDT, by = 'L1')

oracles <- allmodels[!is.na(oracle),.(sensitivity=mean(sensitivity), fdr=mean(FDR), sem_sens = sd(sensitivity)/sqrt(.N), sem_fdr = sd(FDR)/sqrt(.N)),keyby=list(L1, oracle, i)][modelArgs,,on='i']
oracles[, Nfactor:=as.numeric(factor(N))]
oracles[is.na(contam), contam:='none']
oracles[oracle=='BIC' & L1=='Aracne', ':='(FDR=NA, sensitivity=NA)]
setkey(oracles, type, L1, Nfactor)

Load processed simulations.

Monte Carlo error

oracles[,list(err_pct_sens = median(sem_sens/sensitivity, na.rm = TRUE)*100, err_pct_fdr = median(sem_fdr/fdr, na.rm = TRUE)*100), by = list(L1)]

Median monte carlo error

Consistency by BIC

bic_plt <- ggplot(oracles[oracle=='BIC' & aspect1=='consistency'], aes(x=fdr, y=sensitivity, label=Nfactor, color=L1))+ geom_point(size=.1) + geom_text(vjust=0, hjust=0, size=3) + geom_path(aes(group=L1)) + facet_wrap(~type + contam, labeller=label_singleline) + ggtitle("Consistency and BIC tuning") + ylab('Sensitivity') + xlab('FDR')  + scale_color_discrete('Method')+ theme(legend.position=c(.8, .2))

Timings

timings = allmodels[,list(time = max(timing, na.rm = TRUE)),keyby=list(rep,i, Method)]
timings = merge(timings, modelArgs, by = 'i')
ggplot(timings[kcell==1 & contam =='none' & N <3000,], aes(x = P, y = time, color = Method))  + geom_smooth(se = FALSE) + scale_y_continuous(trans = 'log2') + scale_x_continuous(trans = 'log2') + ylab('Time(s)') + xlab('Number of nodes') + theme(legend.position = 'bottom', legend.text = element_text(size = 8), legend.key.size = unit(8, 'points')) + scale_color_discrete('')
#legend.position = c(.2, .5), legend.background = element_rect(fill = "#00000000"))
kcell_cast <- dcast(oracles[oracle=='FDR' & aspect2=='kcell'], P+L1 + N + type + contam ~ kcell, value.var='sensitivity')
plt <- ggplot(kcell_cast, aes(x=`1`, y=`10`, color=L1)) + geom_point() + facet_wrap(~type+N+P, labeller=label_both)
plt
overall_plot <- ggplot(oracles[oracle=='FDR' & N==100 & P!=24 & type !='ecoli'], aes(y=sensitivity, color=L1, lty=factor(kcell))) + facet_wrap(~type, scales='free_y') + ylab('Sensitivity') +geom_line() + scale_linetype('# Cells')  + guides(colour = "none")
dim_scale_plt <- overall_plot+aes(x=P)+ xlab('M')+ ggtitle("FDR oracle tuning: dimensional scaling")
ecoli_plt <- overall_plot %+% oracles[oracle=='FDR' & type =='ecoli'& contam=='none']+ aes(x=N) + geom_line() + ggtitle("Sample size") +ylab(NULL) 
modelI <- modelArgs[P %in% c(24, 500)&  ( N<2000 | type=='ecoli') & kcell==1  & contam %in% c('none', 'selection'), .(i, P, type, edges, kcell, N, contam)]
fi <- allmodels[modelI,,on='i']
## fi <- merge(fi, renameDT, by='L1', all.x=TRUE)
## fi[,Method:= ifelse(is.na(Method), L1, Method)]
#fi[is.na(oracle),':='(sensitivity=tpI/edges, FDR=fpI/(fpI+tpI), totalI=floor(fpI+tpI))]
setkey(fi, rep, i, L1, totalEdges)

roclike <- fi[is.na(oracle) & totalEdges<1000,.(FDR=mean(fpI/totalEdges), sensitivity=mean(tpI/edges), sdFDR=sd(fpI/totalEdges)/sqrt(.N), sdSens=sd(tpI/edges)/(sqrt(.N))), keyby=list(L1, totalEdges, i, N, type, contam)]
roclike[,':='(FDR=caTools::runmean(FDR, 2), sensitivity=caTools::runmean(sensitivity, k=2)), keyby=list(L1, N, type, contam, i)]
roclike[,fN:=factor(N)]
roclike[,.(medianSD_FDR=median(sdFDR, na.rm=TRUE),
          medianSD_sens=median(sdSens, na.rm=TRUE))]

bic <- fi[oracle=='BIC',.(FDR=median(FDR), sensitivity=median(sensitivity)), key=key(roclike)]


roclike_plt <- ggplot(roclike, aes(x=FDR, y=sensitivity, color=L1))+geom_path() + facet_wrap(~type + contam+N, labeller=label_singleline, scales='free_y') + scale_color_discrete('Method') + ggtitle('Sensitivity vs FDR')
roclike_plt+ geom_point(data=bic, pch=9)
## roclike <- mfi[,.(vbar=mean(value), sdv=sd(value)/sqrt(.N)), keyby=list(L1, totalEdges, i, variable)]
## roclike[,':='(vlo=vbar-sdv,
##               vhi=vbar+sdv)]

roclike_wide <- dcast(roclike, L1 + totalEdges + i ~ variable, value.var='vbar')

p <- ggplot(roclike, aes(x=totalEdges, y=vbar, col=L1))+geom_line() + xlab("Total Edges") + ylab("Measure") + facet_wrap(~i+variable, scales='free') + xlim(0, 100)



roclike <- fi[is.na(oracle),.(tpbar=mean(tpI), sdtp=sd(tpI)/sqrt(.N)), keyby=list(L1, fpI, i)]
roclike[,':='(tplo=tpbar-sdtp,
         tphi=tpbar+sdtp)]
    p <- ggplot(roclike, aes(x=fpI, y=tpbar, col=L1))+geom_line() + xlab("False Edges") + ylab("True Edges") + xlim(0, 20) + facet_wrap(~i)
    print(direct.label(p + geom_ribbon(aes(ymin=tplo, ymax=tphi, fill=L1), alpha=.3), 'angled.boxes'))

p <- ggplot(collectFDR[Method %in% c('Anisometric', 'Isometric', 'Logistic', 'Gaussian', 'NPN') & N==100 & !(type %like% '2')], aes(y=sensbar, x=P, color=Method)) + geom_l2ine()+ facet_wrap(~type) + theme_bw() + xlab('M') + ylab('Sensitivity')
direct.label(p, 'angled.boxes')

p <- ggplot(collectFDR[Method %in% c('Anisometric', 'Isometric', 'Logistic', 'Gaussian', 'NPN') & N==10000 & !(type %like% '2')], aes(y=sensbar, x=P, color=Method)) + geom_line()+ facet_wrap(~type) + theme_bw() + xlab('M') + ylab('Sensitivity')
ggdraw() + draw_plot(dim_scale_plt+ guides(colour='none', linetype='none'), y=.65, height=.35, width=.6)+ draw_plot(ecoli_plt, y=.65, x=.6, height=.35, width=.4) + draw_plot(bic_plt, height=.65) + draw_plot_label(c('A', 'B', 'C'), c(0, .6, 0), c(1, 1, .65))
ggsave('./simulations_multipanel.pdf', height=10, width=8)
dim_scale_plt %+% oracles[oracle=='FDR' & N==100 & P!=24 & type  != 'ecoli'] + scale_color_discrete('Method') + guides(colour = 'legend') + theme(legend.position='bottom')
roclike_plt %+% roclike[contam=='none'] + geom_point(data=bic[contam=='none'], pch=9) + theme(legend.position='bottom')


amcdavid/HurdleNormal documentation built on May 14, 2022, 11:12 p.m.