source('simulation_library.R') knitr::opts_chunk$set(error=FALSE, warning=FALSE, results='hide', cache=FALSE, dev=c('png', 'pdf')) load('simulation_graphs.RData') library(ggplot2) library(directlabels) library(data.table) library(GGally) library(HurdleNormal) library(cowplot) library(stringr) theme_set(theme_bw())# + theme(legend.position=c(.8, .2))) label_singleline <- function(x) label_value(x, multi_line = FALSE) sims_files <- list.files('sim_chkpoint', pattern='*.rds', full.names=TRUE) fittedmodels <- lapply(sims_files, readRDS)
plot_data_dist = function(i){ ggpairs(as.data.table(modelList[[i]]$gibbs[,1:3]), lower=list(continuous=HurdleNormal:::ggally_hurdle), upper=list(continuous=HurdleNormal:::ggally_hmosaic)) +ggtitle(paste(modelArgs[i,], collapse = '/')) }
library(igraph) ec_net = readRDS('ecoli_network.rds') adjc = abs(ec_net$true$G)>0 | abs(ec_net$true$H)>0 | abs(ec_net$true$K)>0 diag(adjc) = FALSE adjc = adjc | t(adjc) adj_net = graph.adjacency(adjc, mode = 'undirected') coords = layout_with_kk(adj_net) plot(adj_net, vertex.size = 3, layout = coords, vertex.label = NA)
plot_data_dist(19)
Model 1
plot_data_dist(20)
Model 2
plot_data_dist(34)
Model 4
modeledges <- sapply(modelList, function(x) sum((abs(x$true$G) + abs(x$true$H) + abs(x$true$K))>0) - ncol(x$true$G))/2 collectFDR <- list() setDT(modelArgs) modelArgs[is.na(contam), contam:='none'] modelArgs[is.na(P), P:=500] modelArgs[kcell==1 & (P==24 | type=='ecoli') , aspect1:='consistency'] modelArgs[P==32 | type=='ecoli', aspect2:='kcell'] modelArgs[kcell == 1 & contam =='none' & P!=24 & (N==100 | type=='ecoli'), aspect3:='overall'] modelArgs[,edges:=modeledges] renameDT <- data.table(L1=c('full', 'reg', 'Gaussian', 'Logistic', 'NPN', 'Aracne' ), Method=c('Hurdle\n(Anisometric)', 'Hurdle\n(Isometric)', 'Gaussian', 'Logistic', 'NPN', 'Aracne')) renameDT[,Method:=factor(Method, levels = Method[order(L1)])] for(i in seq_along(fittedmodels)){ fittedmodels[[i]] <- rbindlist(fittedmodels[[i]], idcol='i')[!L1 %in% c('Cfull', 'Creg')] } allmodels <- rbindlist(fittedmodels, idcol='rep')[,Method:=NULL] allmodels[L1=='Aracne' & oracle=='BIC', FDR:=NA] allmodels = merge(allmodels, renameDT, by = 'L1') oracles <- allmodels[!is.na(oracle),.(sensitivity=mean(sensitivity), fdr=mean(FDR), sem_sens = sd(sensitivity)/sqrt(.N), sem_fdr = sd(FDR)/sqrt(.N)),keyby=list(L1, oracle, i)][modelArgs,,on='i'] oracles[, Nfactor:=as.numeric(factor(N))] oracles[is.na(contam), contam:='none'] oracles[oracle=='BIC' & L1=='Aracne', ':='(FDR=NA, sensitivity=NA)] setkey(oracles, type, L1, Nfactor)
Load processed simulations.
oracles[,list(err_pct_sens = median(sem_sens/sensitivity, na.rm = TRUE)*100, err_pct_fdr = median(sem_fdr/fdr, na.rm = TRUE)*100), by = list(L1)]
Median monte carlo error
bic_plt <- ggplot(oracles[oracle=='BIC' & aspect1=='consistency'], aes(x=fdr, y=sensitivity, label=Nfactor, color=L1))+ geom_point(size=.1) + geom_text(vjust=0, hjust=0, size=3) + geom_path(aes(group=L1)) + facet_wrap(~type + contam, labeller=label_singleline) + ggtitle("Consistency and BIC tuning") + ylab('Sensitivity') + xlab('FDR') + scale_color_discrete('Method')+ theme(legend.position=c(.8, .2))
timings = allmodels[,list(time = max(timing, na.rm = TRUE)),keyby=list(rep,i, Method)] timings = merge(timings, modelArgs, by = 'i') ggplot(timings[kcell==1 & contam =='none' & N <3000,], aes(x = P, y = time, color = Method)) + geom_smooth(se = FALSE) + scale_y_continuous(trans = 'log2') + scale_x_continuous(trans = 'log2') + ylab('Time(s)') + xlab('Number of nodes') + theme(legend.position = 'bottom', legend.text = element_text(size = 8), legend.key.size = unit(8, 'points')) + scale_color_discrete('') #legend.position = c(.2, .5), legend.background = element_rect(fill = "#00000000"))
kcell_cast <- dcast(oracles[oracle=='FDR' & aspect2=='kcell'], P+L1 + N + type + contam ~ kcell, value.var='sensitivity') plt <- ggplot(kcell_cast, aes(x=`1`, y=`10`, color=L1)) + geom_point() + facet_wrap(~type+N+P, labeller=label_both) plt
overall_plot <- ggplot(oracles[oracle=='FDR' & N==100 & P!=24 & type !='ecoli'], aes(y=sensitivity, color=L1, lty=factor(kcell))) + facet_wrap(~type, scales='free_y') + ylab('Sensitivity') +geom_line() + scale_linetype('# Cells') + guides(colour = "none") dim_scale_plt <- overall_plot+aes(x=P)+ xlab('M')+ ggtitle("FDR oracle tuning: dimensional scaling") ecoli_plt <- overall_plot %+% oracles[oracle=='FDR' & type =='ecoli'& contam=='none']+ aes(x=N) + geom_line() + ggtitle("Sample size") +ylab(NULL)
modelI <- modelArgs[P %in% c(24, 500)& ( N<2000 | type=='ecoli') & kcell==1 & contam %in% c('none', 'selection'), .(i, P, type, edges, kcell, N, contam)] fi <- allmodels[modelI,,on='i'] ## fi <- merge(fi, renameDT, by='L1', all.x=TRUE) ## fi[,Method:= ifelse(is.na(Method), L1, Method)] #fi[is.na(oracle),':='(sensitivity=tpI/edges, FDR=fpI/(fpI+tpI), totalI=floor(fpI+tpI))] setkey(fi, rep, i, L1, totalEdges) roclike <- fi[is.na(oracle) & totalEdges<1000,.(FDR=mean(fpI/totalEdges), sensitivity=mean(tpI/edges), sdFDR=sd(fpI/totalEdges)/sqrt(.N), sdSens=sd(tpI/edges)/(sqrt(.N))), keyby=list(L1, totalEdges, i, N, type, contam)] roclike[,':='(FDR=caTools::runmean(FDR, 2), sensitivity=caTools::runmean(sensitivity, k=2)), keyby=list(L1, N, type, contam, i)] roclike[,fN:=factor(N)] roclike[,.(medianSD_FDR=median(sdFDR, na.rm=TRUE), medianSD_sens=median(sdSens, na.rm=TRUE))] bic <- fi[oracle=='BIC',.(FDR=median(FDR), sensitivity=median(sensitivity)), key=key(roclike)] roclike_plt <- ggplot(roclike, aes(x=FDR, y=sensitivity, color=L1))+geom_path() + facet_wrap(~type + contam+N, labeller=label_singleline, scales='free_y') + scale_color_discrete('Method') + ggtitle('Sensitivity vs FDR') roclike_plt+ geom_point(data=bic, pch=9)
## roclike <- mfi[,.(vbar=mean(value), sdv=sd(value)/sqrt(.N)), keyby=list(L1, totalEdges, i, variable)] ## roclike[,':='(vlo=vbar-sdv, ## vhi=vbar+sdv)] roclike_wide <- dcast(roclike, L1 + totalEdges + i ~ variable, value.var='vbar') p <- ggplot(roclike, aes(x=totalEdges, y=vbar, col=L1))+geom_line() + xlab("Total Edges") + ylab("Measure") + facet_wrap(~i+variable, scales='free') + xlim(0, 100) roclike <- fi[is.na(oracle),.(tpbar=mean(tpI), sdtp=sd(tpI)/sqrt(.N)), keyby=list(L1, fpI, i)] roclike[,':='(tplo=tpbar-sdtp, tphi=tpbar+sdtp)] p <- ggplot(roclike, aes(x=fpI, y=tpbar, col=L1))+geom_line() + xlab("False Edges") + ylab("True Edges") + xlim(0, 20) + facet_wrap(~i) print(direct.label(p + geom_ribbon(aes(ymin=tplo, ymax=tphi, fill=L1), alpha=.3), 'angled.boxes')) p <- ggplot(collectFDR[Method %in% c('Anisometric', 'Isometric', 'Logistic', 'Gaussian', 'NPN') & N==100 & !(type %like% '2')], aes(y=sensbar, x=P, color=Method)) + geom_l2ine()+ facet_wrap(~type) + theme_bw() + xlab('M') + ylab('Sensitivity') direct.label(p, 'angled.boxes') p <- ggplot(collectFDR[Method %in% c('Anisometric', 'Isometric', 'Logistic', 'Gaussian', 'NPN') & N==10000 & !(type %like% '2')], aes(y=sensbar, x=P, color=Method)) + geom_line()+ facet_wrap(~type) + theme_bw() + xlab('M') + ylab('Sensitivity')
ggdraw() + draw_plot(dim_scale_plt+ guides(colour='none', linetype='none'), y=.65, height=.35, width=.6)+ draw_plot(ecoli_plt, y=.65, x=.6, height=.35, width=.4) + draw_plot(bic_plt, height=.65) + draw_plot_label(c('A', 'B', 'C'), c(0, .6, 0), c(1, 1, .65)) ggsave('./simulations_multipanel.pdf', height=10, width=8)
dim_scale_plt %+% oracles[oracle=='FDR' & N==100 & P!=24 & type != 'ecoli'] + scale_color_discrete('Method') + guides(colour = 'legend') + theme(legend.position='bottom')
roclike_plt %+% roclike[contam=='none'] + geom_point(data=bic[contam=='none'], pch=9) + theme(legend.position='bottom')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.