work in progress
This document shows and explains how to use the dogss package and how to reproduce Figures 9 to 13 from our paper Sparse-Group Bayesian Feature Selection Using Expectation Propagation for Signal Recovery and Network Reconstruction.
First we need to load some packages that are required for comparisons and plotting (please install if not available on your machine):
library(dogss) # our method for sparse-group Bayesian feature selection with EP library(glmnet) # standard lasso library(gglasso) # group lasso library(SGL) # sparse-group lasso library(MBSGS) # Bayesian feature selection with Gibbs sampling library(ggplot2) # for nice plots library(ggthemes) # for even nicer plots library(grid); library(gridExtra) # to arrange plots pleasantly library(reshape2) # to melt data into "tidy" long-format library(DescTools) # for area computations (AUROC, AUPR)
Furthermore we need to load three R files with additional code:
source("../auxiliary_rfunctions/my_cvSGL.R") # proper cross validation for SGL package source("../auxiliary_rfunctions/my_theme.R") # functions to adjust ggplots
Finally, we provide all of the results on our simulated data to reconstruct the plots from the publication. If you wish to re-do all of the simulations/calculations, change the following parameter to TRUE
(only do this if you have access to multiple cores):
selfcompute <- FALSE B <- 100 # number of simulations ncores <- 50 # number of cores used for parallelization
example network
# load("~/Programme/dogss/testing/sim2/simnet_onetwothree_96_2.RData") par(oma=c(0,0,0,0), mar=c(0,0,0,0)) plot.igraph(graph_from_adjacency_matrix(t(truth_matrix)), vertex.size=5, vertex.label=NA, vertex.color="white", vertex.frame.color=Ed_palette[1], vertex.shape=ifelse(row.names(truth_matrix) %in% names(G), "square", "circle"), edge.width=1, edge.arrow.mode=0, edge.curved=FALSE, vertex.label.family="CM Roman", asp=3/5) # layout=layout.big
library(mvtnorm) library(qpgraph) library(sparseMVN) library(psych) library(SGL) library(gglasso) library(glmnet) library(dogss2) library(DescTools) library(methods) library(Matrix) library(foreach) library(doParallel) source("/project/dogss/my_cvSGL.R") ##### simulation of kind-of scale-free network graph cK <- c(100, 1000, 5000) # 100 # number of genes total cTF <- c(10, 100, 500) # number of transcription factors/hubs cBG <- c(3, 20, 50) # 10 # number of "big groups" cq <- c(0.01, 0.001, 0.0002) # 20/((S*BG)*(K-1)) # sigma_0 <- 0.1 B <- 100 ncores <- 50 # generate the network graphs and data ncl <- detectCores() ncl <- min(ncl, ncores) myCluster1 <- makeCluster(ncl, outfile="/project/dogss/tests_thesis/log_02_networkreconstruction_generate.txt") registerDoParallel(myCluster1) networks <- foreach (b=1:B, .packages=c("sparseMVN", "psych", "Matrix", "mvtnorm", "qpgraph"), .errorhandling='pass') %dopar% { cat(b, "\n") for (i in 1:2) { K <- cK[i] BG <- cBG[i] q <- cq[i] ntfs <- cTF[i] genes <- paste0("G", 1:K) TFs <- sample(genes, ntfs) partitions <- vector("integer", K) names(partitions) <- genes partitions[TFs[1:BG]] <- 1:BG partitions[TFs[(BG+1):ntfs]] <- sample(1:BG, ntfs-BG, replace=TRUE) probsBG <- runif(BG) # probs for big groups to get different sizes partitions[!(genes%in%TFs)] <- sample(1:BG, K-ntfs, replace=TRUE, prob=probsBG) # every node gets assigned a big group connectlist <- c(0, 0) for (g in genes) { potentialTFs <- names(partitions[TFs][partitions[TFs]==partitions[g]]) if (sum(potentialTFs!=g)!=0) { firstconnect <- sample(potentialTFs[potentialTFs!=g], 1) connectlist <- rbind(connectlist, c(firstconnect, g)) for (tf in potentialTFs[!potentialTFs%in%c(firstconnect, g)]) { if (runif(1)<0.5) connectlist <- rbind(connectlist, c(tf, g)) } } for (h in TFs) { # here we add random edges if (runif(1) <= q & h != g) connectlist <- rbind(connectlist, c(h, g)) # c(fromTF, toGene) } } connectlist <- connectlist[-1, ] dat.sort <- t(apply(connectlist, 1, sort)) newconnectlist <- connectlist[!duplicated(dat.sort), ] # remove duplicated. don't ask me why this works, stackoverflow truth_matrix <- matrix(0, nrow=length(partitions), ncol=length(partitions), dimnames=list(from=names(partitions), to=names(partitions))) # BETA <- truth_matrix # prec_matrix <- truth_matrix for (j in 1:dim(newconnectlist)[1]) { truth_matrix[newconnectlist[j, 1], newconnectlist[j, 2]] <- 1 truth_matrix[newconnectlist[j, 2], newconnectlist[j, 1]] <- 1 # thisminusbeta <- rnorm(1) # runif(1, -5, 5)# sample(c(runif(1,-5,-1), runif(1, 1, 5)), 1) # prec_matrix[newconnectlist[j, 1], newconnectlist[j, 2]] <- thisminusbeta # prec_matrix[newconnectlist[j, 2], newconnectlist[j, 1]] <- thisminusbeta } # diag(prec_matrix) <- 1000 # rnorm(length(diag(prec_matrix))) # runif(length(diag(prec_matrix)), -5, 5) # H2 <- glb.algebraic(prec_matrix)$solution # diag(prec_matrix) <- H2 + 0.01 # for (j in seq(dim(BETA)[2])) BETA[, j] <- -prec_matrix[, j]/prec_matrix[j, j] # diag(BETA) <- 0 truth_matrix <- as.matrix(as(truth_matrix, "symmetricMatrix")) mygraph <- rUGgmm(truth_matrix) prec_matrix <- solve(mygraph$sigma) geneexpr <- rmvnorm(200, mygraph) dimnames(prec_matrix) <- list(from=colnames(geneexpr), to=colnames(geneexpr)) geneexpr <- geneexpr[, genes] prec_matrix <- prec_matrix[, genes]; prec_matrix <- prec_matrix[genes, ] pm_nz <- truth_matrix diag(pm_nz) <- 1 prec_matrix[pm_nz!=1] <- 0 # prec_matrix <- Matrix(as(prec_matrix, "symmetricMatrix"), sparse=TRUE) # geneexpr <- rmvn.sparse(200, rep(0, dim(prec_matrix)[1]), Cholesky(prec_matrix)) # + rnorm(200*dim(prec_matrix)[1], 0, sigma_0) # colnames(geneexpr) <- genes train <- 1:100 test <- 101:200 geneexpr_train <- geneexpr[train, ] geneexpr_test <- geneexpr[test, ] G <- partitions[TFs] ngenes <- dim(geneexpr)[2] nobs <- 100 # dim(geneexpr)[1] # ntfs <- length(tfs) neffs <- sum(truth_matrix[TFs, ]) save(mygraph, geneexpr, geneexpr_train, geneexpr_test, prec_matrix, genes, TFs, G, ngenes, nobs, ntfs, truth_matrix, neffs, partitions, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/sim_network_", b, "_", i, ".RData")) # save.image funktioniert mit foreach nicht... # Graph, exps, lambdapath } b } stopCluster(myCluster1) save.image(file="/project/dogss/tests_thesis/02_networkreconstruction_generate.RData") # reconstruct the networks! ncl <- detectCores() ncl <- min(ncl, ncores) myCluster <- makeCluster(ncl, outfile="/project/dogss/tests_thesis/log_02_networkreconstruction.txt") registerDoParallel(myCluster) results <- foreach (b=1:B, .packages=c("dogss2", "gglasso", "SGL", "glmnet", "DescTools"), .errorhandling='pass') %dopar% { cat("b=", b, "\n") onetwothree <- vector("list", 3) gr_cl_rd <- vector("list", 3) for (i in 1:2) { # different scenarios for number of observations, groups etc cat("b=", b, ", i=", i, "\n") load(file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/sim_network_", b, "_", i, ".RData")) results_simnetwork <- list() for (g in genes) { # cat("\nGene ", g, "\n") Y <- as.vector(geneexpr_train[, g]) # as.vector(geneexpr_center_train[, g]) index <- as.integer(G)[!(names(G) %in% g)] names(index) <- names(G)[!(names(G) %in% g)] x <- geneexpr_train[, names(index)] # geneexpr_scale_train[, names(index)] groups <- unique(index) ngroups <- length(groups) nG <- ngroups nngroups <- sapply(groups, function(g) sum(index==g)) while (max(groups) != ngroups) { # this is for gglasso which needs consecutively numbered groups :-/ del <- max(groups) index[index==del] <- which.min(1:nG %in% groups) # this is superweird :D groups <- unique(index) ngroups <- length(groups) nngroups <- sapply(groups, function(g) sum(index==g)) } if (is.vector(x)) x <- as.matrix(x, ncol=1) res_dogss <- cv_dogss(x, Y, G=index, tol=1e-03) res_ssep <- cv_dogss(x, Y, G=NULL, tol=1e-03) res_sgl <- tryCatch(my_cvSGL(data=list(x=x, y=Y), index=index, standardize=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL) res_gglasso <- cv.gglasso(x=x, y=Y, group=index, nfolds=10, intercept=FALSE, eps=1e-03, maxit=1000) res_lasso <- tryCatch(cv.glmnet(x, Y, standardize=FALSE, intercept=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL) res_sgl$lambda <- res_sgl$fit$lambdas if (is.null(res_sgl)) res_sgl <- res_gglasso # in this case (only one feature) the three methods are equivalent, but only gglasso gives results if (is.null(res_lasso)) res_lasso <- res_gglasso res_sgl$mylambda <- apply(res_sgl$fit$beta, 1, function(b) res_sgl$lambda[which(b!=0)[1]]) res_gglasso$mylambda <- apply(res_gglasso$gglasso.fit$beta, 1, function(b) res_gglasso$lambda[which(b!=0)][1]) res_lasso$mylambda <- apply(res_lasso$glmnet.fit$beta, 1, function(b) res_lasso$lambda[which(b!=0)[1]]) scores <- cbind(dogss=res_dogss$p_final, ssep=res_ssep$p_final, sgl=res_sgl$mylambda, gglasso=res_gglasso$mylambda, lasso=res_lasso$mylambda) # ...$areascore # ...$p_final # , row.names(scores) <- names(index) results_simnetwork[[g]] <- list(scores=scores, results=list(dogss=res_dogss, ssep=res_ssep, sgl=res_sgl, gglasso=res_gglasso, lasso=res_lasso)) } save(results_simnetwork, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/results_network_onetwothree_", i, "_", b, ".RData")) scorelist <- data.frame(to=character(), from=character(), dogss=numeric(), ssep=numeric(), sgl=numeric(), gglasso=numeric(), lasso=numeric(), stringsAsFactors=FALSE) # , for (g in genes) { scorelist <- rbind(scorelist, data.frame(to=as.character(rep(g, dim(results_simnetwork[[g]]$scores)[1])), from=as.character(rownames(results_simnetwork[[g]]$scores)), dogss=results_simnetwork[[g]]$scores[, "dogss"], ssep=results_simnetwork[[g]]$scores[, "ssep"], sgl=results_simnetwork[[g]]$scores[, "sgl"], gglasso=results_simnetwork[[g]]$scores[, "gglasso"], lasso=results_simnetwork[[g]]$scores[, "lasso"],stringsAsFactors=FALSE)) # } ranks <- apply(scorelist[, 3:7], 2, function(r) rank(1-r, ties.method="min")) pairs <- scorelist[, c(1,2)] truth_pairs <- rep(0, dim(pairs)[1]) recov_matrices <- list() prec_matrices <- list() methods <- c("dogss", "ssep", "sgl", "gglasso", "lasso") # for (m in methods) recov_matrices[[m]] <- matrix(0, nrow=ntfs, ncol=dim(truth_matrix)[2], dimnames=list(from=TFs, to=colnames(truth_matrix))) for (m in methods) prec_matrices[[m]] <- matrix(0, nrow=dim(truth_matrix)[1], ncol=dim(truth_matrix)[2], dimnames=list(from=rownames(truth_matrix), to=colnames(truth_matrix))) for (j in 1:length(truth_pairs)) { truth_pairs[j] <- truth_matrix[pairs[j,2], pairs[j,1]] for (m in methods) { recov_matrices[[m]][pairs[j,2], pairs[j,1]] <- ranks[j, m] } } # rocstuff K <- length(truth_pairs) P <- sum(truth_matrix[TFs, ]) N <- ntfs*ngenes-ntfs-P M <- length(methods) tpr <- matrix(0, K, M) fpr <- matrix(0, K, M) precision <- matrix(0, K, M) for (m in 1:M) { truth_ordered <- truth_matrix[TFs, ][order(recov_matrices[[m]])][-c(1:ntfs)] tpr[, m] <- cumsum(truth_ordered)/P fpr[, m] <- cumsum(!truth_ordered)/N precision[, m] <- tpr[, m]*P/(tpr[, m]*P + fpr[, m]*N) } AUROC <- sapply(1:length(methods), function(m) AUC(c(0,fpr[, m],1), c(0,tpr[, m],1))) AUPR <- sapply(1:length(methods), function(m) AUC(c(0,tpr[, m],1), c(1,precision[, m],0))) # prediction stuff X_test <- geneexpr_test[, TFs] # geneexpr_scale_test[, tfs] prederror <- rep(0, 5) names(prederror) <- c("dogss", "ssep", "sgl", "gglasso", "lasso") precm_errors <- prederror normPM <- norm(prec_matrix, "1") BETA_dogss <- truth_matrix[TFs, ] BETA_dogss[, ] <- 0 BETA_ssep <- BETA_dogss BETA_sgl <- BETA_dogss BETA_gglasso <- BETA_dogss BETA_lasso <- BETA_dogss X <- X_test[, TFs] for (g in genes) { Y <- geneexpr_test[, g] # geneexpr_center_test[, g] sumY2 <- sum(Y^2) res_dogss <- results_simnetwork[[g]]$results$dogss res_ssep <- results_simnetwork[[g]]$results$ssep res_sgl <- results_simnetwork[[g]]$results$sgl res_lasso <- results_simnetwork[[g]]$results$lasso res_gglasso <- results_simnetwork[[g]]$results$gglasso names(res_dogss$m_cv) <- TFs[TFs!=g] names(res_ssep$m_cv) <- TFs[TFs!=g] names(res_sgl$beta) <- TFs[TFs!=g] BETA_dogss[TFs[!(TFs %in% g)], g] <- res_dogss$m_cv[TFs[!(TFs %in% g)]] BETA_ssep[TFs[!(TFs %in% g)], g] <- res_ssep$m_cv[TFs[!(TFs %in% g)]] BETA_sgl[TFs[!(TFs %in% g)], g] <- res_sgl$beta[TFs[!(TFs %in% g)]] BETA_gglasso[TFs[!(TFs %in% g)], g] <- res_gglasso$gglasso.fit$beta[, which(res_gglasso$lambda==res_gglasso$lambda.1se)][TFs[!(TFs %in% g)]] BETA_lasso[TFs[!(TFs %in% g)], g] <- res_lasso$glmnet.fit$beta[, which(res_lasso$lambda==res_lasso$lambda.1se)][TFs[!(TFs %in% g)]] if (!(g%in%TFs)) { prederror["dogss"] <- prederror["dogss"] + sum((Y-X%*%BETA_dogss[, g])^2)/sumY2 prederror["ssep"] <- prederror["ssep"] + sum((Y-X%*%BETA_ssep[, g])^2)/sumY2 prederror["sgl"] <- prederror["sgl"] + sum((Y-X%*%BETA_sgl[, g])^2)/sumY2 prederror["gglasso"] <- prederror["gglasso"] + sum((Y-X%*%BETA_gglasso[, g])^2)/sumY2 prederror["lasso"] <- prederror["lasso"] + sum((Y-X%*%BETA_lasso[, g])^2)/sumY2 } prec_matrices[["dogss"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_dogss[, g][TFs[!(TFs %in% g)]])^2) prec_matrices[["dogss"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["dogss"]][g, g] * BETA_dogss[, g][TFs[!(TFs %in% g)]] # minus is important :) prec_matrices[["dogss"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["dogss"]][TFs[!(TFs %in% g)], g] prec_matrices[["ssep"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_ssep[, g][TFs[!(TFs %in% g)]])^2) prec_matrices[["ssep"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["ssep"]][g, g] * BETA_ssep[, g][TFs[!(TFs %in% g)]] # minus is important :) prec_matrices[["ssep"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["ssep"]][TFs[!(TFs %in% g)], g] prec_matrices[["sgl"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_sgl[, g][TFs[!(TFs %in% g)]])^2) prec_matrices[["sgl"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["sgl"]][g, g] * BETA_sgl[, g][TFs[!(TFs %in% g)]] # minus is important :) prec_matrices[["sgl"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["sgl"]][TFs[!(TFs %in% g)], g] prec_matrices[["gglasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_gglasso[, g][TFs[!(TFs %in% g)]])^2) prec_matrices[["gglasso"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["gglasso"]][g, g] * BETA_gglasso[, g][TFs[!(TFs %in% g)]] # minus is important :) prec_matrices[["gglasso"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["gglasso"]][TFs[!(TFs %in% g)], g] prec_matrices[["lasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_lasso[, g][TFs[!(TFs %in% g)]])^2) prec_matrices[["lasso"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["lasso"]][g, g] * BETA_lasso[, g][TFs[!(TFs %in% g)]] # minus is important :) prec_matrices[["lasso"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["lasso"]][TFs[!(TFs %in% g)], g] } prederror <- prederror/length(genes[!(genes%in%TFs)]) for (m in methods) { precm_errors[m] <- norm(prec_matrix-prec_matrices[[m]], "1")/normPM } onetwothree[[i]] <- list(TPR=tpr, FPR=fpr, Prec=precision, AUROC=AUROC, AUPR=AUPR, prederror=prederror, precm_errors=precm_errors, times=times) # , results_simnetwork=results_simnetwork # } save(onetwothree, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/net_onetwothree_", b, ".RData")) #### jetzt noch der different groups stuff fuer das zweite scenario load(file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/sim_network_", b, "_", 2, ".RData")) for (dg in 2:3) { # calculate results for different groupings: clustered, random results_simnetwork <- list() if (dg==2) { G_new <- kmeans(t(geneexpr[, TFs]), length(unique(G)))$cluster cat("kmeans!", " b=", b, "\n") } else { # (dg==3) G_new <- sample(G) names(G_new) <- names(G) cat("random!", " b=", b, "\n") } for (g in genes) { # cat("\nGene ", g, "\n") Y <- as.vector(geneexpr_train[, g]) # as.vector(geneexpr_center_train[, g]) index <- as.integer(G_new)[!(names(G_new) %in% g)] names(index) <- names(G_new)[!(names(G_new) %in% g)] x <- geneexpr_train[, names(index)] # geneexpr_scale_train[, names(index)] groups <- unique(index) ngroups <- length(groups) nG <- ngroups nngroups <- sapply(groups, function(g) sum(index==g)) while (max(groups) != ngroups) { # this is for gglasso which needs consecutively numbered groups :-/ del <- max(groups) index[index==del] <- which.min(1:nG %in% groups) # this is superweird :D groups <- unique(index) ngroups <- length(groups) nngroups <- sapply(groups, function(g) sum(index==g)) } if (is.vector(x)) x <- as.matrix(x, ncol=1) res_dogss <- cv_dogss(x, Y, G=index, tol=1e-03) res_ssep <- cv_dogss(x, Y, G=NULL, tol=1e-03) res_sgl <- tryCatch(my_cvSGL(data=list(x=x, y=Y), index=index, standardize=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL) res_gglasso <- cv.gglasso(x=x, y=Y, group=index, nfolds=10, intercept=FALSE, eps=1e-03, maxit=1000) res_lasso <- tryCatch(cv.glmnet(x, Y, standardize=FALSE, intercept=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL) res_sgl$lambda <- res_sgl$fit$lambdas if (is.null(res_sgl)) res_sgl <- res_gglasso # in this case (only one feature) the three methods are equivalent, but only gglasso gives results if (is.null(res_lasso)) res_lasso <- res_gglasso res_sgl$mylambda <- apply(res_sgl$fit$beta, 1, function(b) res_sgl$lambda[which(b!=0)[1]]) res_gglasso$mylambda <- apply(res_gglasso$gglasso.fit$beta, 1, function(b) res_gglasso$lambda[which(b!=0)][1]) res_lasso$mylambda <- apply(res_lasso$glmnet.fit$beta, 1, function(b) res_lasso$lambda[which(b!=0)[1]]) scores <- cbind(dogss=res_dogss$p_final, ssep=res_ssep$p_final, sgl=res_sgl$mylambda, gglasso=res_gglasso$mylambda, lasso=res_lasso$mylambda) # ...$areascore # ...$p_final # , row.names(scores) <- names(index) results_simnetwork[[g]] <- list(scores=scores, results=list(dogss=res_dogss, ssep=res_ssep, sgl=res_sgl, gglasso=res_gglasso, lasso=res_lasso)) } save(results_simnetwork, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/results_network_grclrd_", dg, "_", b, ".RData")) scorelist <- data.frame(to=character(), from=character(), dogss=numeric(), ssep=numeric(), sgl=numeric(), gglasso=numeric(), lasso=numeric(), stringsAsFactors=FALSE) # , for (g in genes) { scorelist <- rbind(scorelist, data.frame(to=as.character(rep(g, dim(results_simnetwork[[g]]$scores)[1])), from=as.character(rownames(results_simnetwork[[g]]$scores)), dogss=results_simnetwork[[g]]$scores[, "dogss"], ssep=results_simnetwork[[g]]$scores[, "ssep"], sgl=results_simnetwork[[g]]$scores[, "sgl"], gglasso=results_simnetwork[[g]]$scores[, "gglasso"], lasso=results_simnetwork[[g]]$scores[, "lasso"],stringsAsFactors=FALSE)) # } ranks <- apply(scorelist[, 3:7], 2, function(r) rank(1-r, ties.method="min")) pairs <- scorelist[, c(1,2)] truth_pairs <- rep(0, dim(pairs)[1]) recov_matrices <- list() prec_matrices <- list() methods <- c("dogss", "ssep", "sgl", "gglasso", "lasso") # for (m in methods) recov_matrices[[m]] <- matrix(0, nrow=ntfs, ncol=dim(truth_matrix)[2], dimnames=list(from=TFs, to=colnames(truth_matrix))) for (m in methods) prec_matrices[[m]] <- matrix(0, nrow=dim(truth_matrix)[1], ncol=dim(truth_matrix)[2], dimnames=list(from=rownames(truth_matrix), to=colnames(truth_matrix))) for (j in 1:length(truth_pairs)) { truth_pairs[j] <- truth_matrix[pairs[j,2], pairs[j,1]] for (m in methods) { recov_matrices[[m]][pairs[j,2], pairs[j,1]] <- ranks[j, m] } } # rocstuff K <- length(truth_pairs) P <- sum(truth_matrix[TFs, ]) N <- ntfs*ngenes-ntfs-P M <- length(methods) tpr <- matrix(0, K, M) fpr <- matrix(0, K, M) precision <- matrix(0, K, M) for (m in 1:M) { truth_ordered <- truth_matrix[TFs, ][order(recov_matrices[[m]])][-c(1:ntfs)] tpr[, m] <- cumsum(truth_ordered)/P fpr[, m] <- cumsum(!truth_ordered)/N precision[, m] <- tpr[, m]*P/(tpr[, m]*P + fpr[, m]*N) } AUROC <- sapply(1:length(methods), function(m) AUC(c(0,fpr[, m],1), c(0,tpr[, m],1))) AUPR <- sapply(1:length(methods), function(m) AUC(c(0,tpr[, m],1), c(1,precision[, m],0))) # prediction stuff X_test <- geneexpr_test[, TFs] # geneexpr_scale_test[, tfs] prederror <- rep(0, 5) names(prederror) <- c("dogss", "ssep", "sgl", "gglasso", "lasso") precm_errors <- prederror normPM <- norm(prec_matrix, "1") BETA_dogss <- truth_matrix[TFs, ] BETA_dogss[, ] <- 0 BETA_ssep <- BETA_dogss BETA_sgl <- BETA_dogss BETA_gglasso <- BETA_dogss BETA_lasso <- BETA_dogss X <- X_test[, TFs] for (g in genes) { Y <- geneexpr_test[, g] # geneexpr_center_test[, g] sumY2 <- sum(Y^2) res_dogss <- results_simnetwork[[g]]$results$dogss res_ssep <- results_simnetwork[[g]]$results$ssep res_sgl <- results_simnetwork[[g]]$results$sgl res_lasso <- results_simnetwork[[g]]$results$lasso res_gglasso <- results_simnetwork[[g]]$results$gglasso names(res_dogss$m_cv) <- TFs[TFs!=g] names(res_ssep$m_cv) <- TFs[TFs!=g] names(res_sgl$beta) <- TFs[TFs!=g] BETA_dogss[TFs[!(TFs %in% g)], g] <- res_dogss$m_cv[TFs[!(TFs %in% g)]] BETA_ssep[TFs[!(TFs %in% g)], g] <- res_ssep$m_cv[TFs[!(TFs %in% g)]] BETA_sgl[TFs[!(TFs %in% g)], g] <- res_sgl$beta[TFs[!(TFs %in% g)]] BETA_gglasso[TFs[!(TFs %in% g)], g] <- res_gglasso$gglasso.fit$beta[, which(res_gglasso$lambda==res_gglasso$lambda.1se)][TFs[!(TFs %in% g)]] BETA_lasso[TFs[!(TFs %in% g)], g] <- res_lasso$glmnet.fit$beta[, which(res_lasso$lambda==res_lasso$lambda.1se)][TFs[!(TFs %in% g)]] if (!(g%in%TFs)) { prederror["dogss"] <- prederror["dogss"] + sum((Y-X%*%BETA_dogss[, g])^2)/sumY2 prederror["ssep"] <- prederror["ssep"] + sum((Y-X%*%BETA_ssep[, g])^2)/sumY2 prederror["sgl"] <- prederror["sgl"] + sum((Y-X%*%BETA_sgl[, g])^2)/sumY2 prederror["gglasso"] <- prederror["gglasso"] + sum((Y-X%*%BETA_gglasso[, g])^2)/sumY2 prederror["lasso"] <- prederror["lasso"] + sum((Y-X%*%BETA_lasso[, g])^2)/sumY2 } prec_matrices[["dogss"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_dogss[, g][TFs[!(TFs %in% g)]])^2) prec_matrices[["dogss"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["dogss"]][g, g] * BETA_dogss[, g][TFs[!(TFs %in% g)]] # minus is important :) prec_matrices[["dogss"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["dogss"]][TFs[!(TFs %in% g)], g] prec_matrices[["ssep"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_ssep[, g][TFs[!(TFs %in% g)]])^2) prec_matrices[["ssep"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["ssep"]][g, g] * BETA_ssep[, g][TFs[!(TFs %in% g)]] # minus is important :) prec_matrices[["ssep"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["ssep"]][TFs[!(TFs %in% g)], g] prec_matrices[["sgl"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_sgl[, g][TFs[!(TFs %in% g)]])^2) prec_matrices[["sgl"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["sgl"]][g, g] * BETA_sgl[, g][TFs[!(TFs %in% g)]] # minus is important :) prec_matrices[["sgl"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["sgl"]][TFs[!(TFs %in% g)], g] prec_matrices[["gglasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_gglasso[, g][TFs[!(TFs %in% g)]])^2) prec_matrices[["gglasso"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["gglasso"]][g, g] * BETA_gglasso[, g][TFs[!(TFs %in% g)]] # minus is important :) prec_matrices[["gglasso"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["gglasso"]][TFs[!(TFs %in% g)], g] prec_matrices[["lasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_lasso[, g][TFs[!(TFs %in% g)]])^2) prec_matrices[["lasso"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["lasso"]][g, g] * BETA_lasso[, g][TFs[!(TFs %in% g)]] # minus is important :) prec_matrices[["lasso"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["lasso"]][TFs[!(TFs %in% g)], g] } prederror <- prederror/length(genes[!(genes%in%TFs)]) for (m in methods) { precm_errors[m] <- norm(prec_matrix-prec_matrices[[m]], "1")/normPM } gr_cl_rd[[dg]] <- list(TPR=tpr, FPR=fpr, Prec=precision, AUROC=AUROC, AUPR=AUPR, prederror=prederror, precm_errors=precm_errors, times=times) } ## dg=1 bzw whole groups (not only TFs...) cat("whole groups... b=", b, "\n") results_simnetwork <- list() for (g in genes) { # cat("\nGene ", g, "\n") Y <- as.vector(geneexpr_train[, g]) # as.vector(geneexpr_center_train[, g]) index <- as.integer(partitions)[!(names(partitions) %in% g)] names(index) <- names(partitions)[!(names(partitions) %in% g)] x <- geneexpr_train[, names(index)] # geneexpr_scale_train[, names(index)] groups <- unique(index) ngroups <- length(groups) nG <- ngroups nngroups <- sapply(groups, function(g) sum(index==g)) while (max(groups) != ngroups) { # this is for gglasso which needs consecutively numbered groups :-/ del <- max(groups) index[index==del] <- which.min(1:nG %in% groups) # this is superweird :D groups <- unique(index) ngroups <- length(groups) nngroups <- sapply(groups, function(g) sum(index==g)) } if (is.vector(x)) x <- as.matrix(x, ncol=1) res_dogss <- cv_dogss(x, Y, G=index, tol=1e-03) res_ssep <- cv_dogss(x, Y, G=NULL, tol=1e-03) res_sgl <- tryCatch(my_cvSGL(data=list(x=x, y=Y), index=index, standardize=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL) res_gglasso <- cv.gglasso(x=x, y=Y, group=index, nfolds=10, intercept=FALSE, eps=1e-03, maxit=1000) res_lasso <- tryCatch(cv.glmnet(x, Y, standardize=FALSE, intercept=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL) res_sgl$lambda <- res_sgl$fit$lambdas if (is.null(res_sgl)) res_sgl <- res_gglasso # in this case (only one feature) the three methods are equivalent, but only gglasso gives results if (is.null(res_lasso)) res_lasso <- res_gglasso if (is.na(res_gglasso$lambda.1se)) { whichmin <- which.min(res_gglasso$cvm) res_gglasso$lambda.1se <- res_gglasso$lambda[which.max(res_gglasso$cvm<=res_gglasso$cvm[whichmin]+res_gglasso$cvsd[whichmin])] } res_sgl$mylambda <- apply(res_sgl$fit$beta, 1, function(b) res_sgl$lambda[which(b!=0)[1]]) res_gglasso$mylambda <- apply(res_gglasso$gglasso.fit$beta, 1, function(b) res_gglasso$lambda[which(b!=0)][1]) res_lasso$mylambda <- apply(res_lasso$glmnet.fit$beta, 1, function(b) res_lasso$lambda[which(b!=0)[1]]) scores <- cbind(dogss=res_dogss$p_final, ssep=res_ssep$p_final, sgl=res_sgl$mylambda, gglasso=res_gglasso$mylambda, lasso=res_lasso$mylambda) # ...$areascore # ...$p_final # , row.names(scores) <- names(index) results_simnetwork[[g]] <- list(scores=scores, results=list(dogss=res_dogss, ssep=res_ssep, sgl=res_sgl, gglasso=res_gglasso, lasso=res_lasso)) } save(results_simnetwork, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/results_network_grclrd_", 1, "_", b, ".RData")) scorelist <- data.frame(to=character(), from=character(), dogss=numeric(), ssep=numeric(), sgl=numeric(), gglasso=numeric(), lasso=numeric(), stringsAsFactors=FALSE) # , for (g in genes) { scorelist <- rbind(scorelist, data.frame(to=as.character(rep(g, dim(results_simnetwork[[g]]$scores)[1])), from=as.character(rownames(results_simnetwork[[g]]$scores)), dogss=results_simnetwork[[g]]$scores[, "dogss"], ssep=results_simnetwork[[g]]$scores[, "ssep"], sgl=results_simnetwork[[g]]$scores[, "sgl"], gglasso=results_simnetwork[[g]]$scores[, "gglasso"], lasso=results_simnetwork[[g]]$scores[, "lasso"],stringsAsFactors=FALSE)) # } ranks <- apply(scorelist[, 3:7], 2, function(r) rank(1-r, ties.method="min")) pairs <- scorelist[, c(1,2)] truth_pairs <- rep(0, dim(pairs)[1]) recov_matrices <- list() methods <- c("dogss", "ssep", "sgl", "gglasso", "lasso") # for (m in methods) recov_matrices[[m]] <- matrix(0, nrow=dim(truth_matrix)[1], ncol=dim(truth_matrix)[2], dimnames=list(from=rownames(truth_matrix), to=colnames(truth_matrix))) prec_matrices <- recov_matrices for (j in 1:length(truth_pairs)) { truth_pairs[j] <- truth_matrix[pairs[j,2], pairs[j,1]] for (m in methods) { if (recov_matrices[[m]][pairs[j,2], pairs[j,1]]==0 | recov_matrices[[m]][pairs[j,2], pairs[j,1]] > ranks[j, m]) { recov_matrices[[m]][pairs[j,2], pairs[j,1]] <- ranks[j, m] recov_matrices[[m]][pairs[j,1], pairs[j,2]] <- ranks[j, m] } } } # rocstuff K <- length(truth_pairs)/2 P <- sum(truth_matrix)/2 N <- (ngenes*ngenes-ngenes)/2-P M <- length(methods) tpr <- matrix(0, K, M) fpr <- matrix(0, K, M) precision <- matrix(0, K, M) for (m in 1:M) { truth_ordered <- truth_matrix[upper.tri(truth_matrix, diag=FALSE)][order(recov_matrices[[m]][upper.tri(recov_matrices[[m]], diag = FALSE)])] tpr[, m] <- cumsum(truth_ordered)/P fpr[, m] <- cumsum(!truth_ordered)/N precision[, m] <- tpr[, m]*P/(tpr[, m]*P + fpr[, m]*N) } AUROC <- sapply(1:length(methods), function(m) AUC(c(0,fpr[, m],1), c(0,tpr[, m],1))) AUPR <- sapply(1:length(methods), function(m) AUC(c(0,tpr[, m],1), c(1,precision[, m],0))) # prediction stuff X_test <- geneexpr_test# [, TFs] # geneexpr_scale_test[, tfs] X_test[, genes[!(genes%in%TFs)]] <- 0 prederror <- rep(0, 5) names(prederror) <- c("dogss", "ssep", "sgl", "gglasso", "lasso") precm_errors <- prederror normPM <- norm(prec_matrix, "1") BETA_dogss <- truth_matrix BETA_dogss[, ] <- 0 BETA_ssep <- BETA_dogss BETA_sgl <- BETA_dogss BETA_gglasso <- BETA_dogss BETA_lasso <- BETA_dogss for (g in genes) { Y <- geneexpr_test[, g] # geneexpr_center_test[, g] sumY2 <- sum(Y^2) # X <- geneexpr_test[, genes[genes!=g]] # X[, ] <- 0 X <- X_test[, genes[genes!=g]] res_dogss <- results_simnetwork[[g]]$results$dogss res_ssep <- results_simnetwork[[g]]$results$ssep res_sgl <- results_simnetwork[[g]]$results$sgl res_lasso <- results_simnetwork[[g]]$results$lasso res_gglasso <- results_simnetwork[[g]]$results$gglasso names(res_dogss$m_cv) <- genes[genes!=g] names(res_ssep$m_cv) <- genes[genes!=g] names(res_sgl$beta) <- genes[genes!=g] BETA_dogss[genes[!(genes %in% g)], g] <- res_dogss$m_cv BETA_ssep[genes[!(genes %in% g)], g] <- res_ssep$m_cv BETA_sgl[genes[!(genes %in% g)], g] <- res_sgl$beta BETA_gglasso[genes[!(genes %in% g)], g] <- res_gglasso$gglasso.fit$beta[, which(res_gglasso$lambda==res_gglasso$lambda.1se)] BETA_lasso[genes[!(genes %in% g)], g] <- res_lasso$glmnet.fit$beta[, which(res_lasso$lambda==res_lasso$lambda.1se)] if (!(g %in% TFs)) { prederror["dogss"] <- prederror["dogss"] + sum((Y-X%*%BETA_dogss[genes!=g, g])^2)/sumY2 prederror["ssep"] <- prederror["ssep"] + sum((Y-X%*%BETA_ssep[genes!=g, g])^2)/sumY2 prederror["sgl"] <- prederror["sgl"] + sum((Y-X%*%BETA_sgl[genes!=g, g])^2)/sumY2 prederror["gglasso"] <- prederror["gglasso"] + sum((Y-X%*%BETA_gglasso[genes!=g, g])^2)/sumY2 prederror["lasso"] <- prederror["lasso"] + sum((Y-X%*%BETA_lasso[genes!=g, g])^2)/sumY2 } prec_matrices[["dogss"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, genes[!(genes %in% g)]] %*% BETA_dogss[, g][genes[!(genes %in% g)]])^2) prec_matrices[["dogss"]][genes[!(genes %in% g)], g] <- -prec_matrices[["dogss"]][g, g] * BETA_dogss[, g][genes[!(genes %in% g)]] # minus is important :) prec_matrices[["ssep"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, genes[!(genes %in% g)]] %*% BETA_ssep[, g][genes[!(genes %in% g)]])^2) prec_matrices[["ssep"]][genes[!(genes %in% g)], g] <- -prec_matrices[["ssep"]][g, g] * BETA_ssep[, g][genes[!(genes %in% g)]] # minus is important :) prec_matrices[["sgl"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, genes[!(genes %in% g)]] %*% BETA_sgl[, g][genes[!(genes %in% g)]])^2) prec_matrices[["sgl"]][genes[!(genes %in% g)], g] <- -prec_matrices[["sgl"]][g, g] * BETA_sgl[, g][genes[!(genes %in% g)]] # minus is important :) prec_matrices[["gglasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, genes[!(genes %in% g)]] %*% BETA_gglasso[, g][genes[!(genes %in% g)]])^2) prec_matrices[["gglasso"]][genes[!(genes %in% g)], g] <- -prec_matrices[["gglasso"]][g, g] * BETA_gglasso[, g][genes[!(genes %in% g)]] # minus is important :) prec_matrices[["lasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, genes[!(genes %in% g)]] %*% BETA_lasso[, g][genes[!(genes %in% g)]])^2) prec_matrices[["lasso"]][genes[!(genes %in% g)], g] <- -prec_matrices[["lasso"]][g, g] * BETA_lasso[, g][genes[!(genes %in% g)]] # minus is important :) } prederror <- prederror/length(genes[!(genes%in%TFs)]) for (m in methods) { precm_errors[m] <- norm(prec_matrix-prec_matrices[[m]], "1")/normPM } gr_cl_rd[[1]] <- list(TPR=tpr, FPR=fpr, Prec=precision, AUROC=AUROC, AUPR=AUPR, prederror=prederror, precm_errors=precm_errors, times=times) save(gr_cl_rd, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/net_gr_cl_rd_", b, ".RData")) load(file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/net_onetwothree_", b, ".RData")) list(onetwothree=onetwothree, gr_cl_rd=gr_cl_rd) } stopCluster(myCluster) save.image(file="/project/dogss/tests_thesis/02_networkreconstruction.RData")
# load("~/Programme/dogss/testing/sim2/finalresults_networksim.RData") B <- 100 for (i in 1:2) { auroc <- matrix(0, nrow=B, ncol=5, dimnames=list(run=1:100, method=c("dogss", "ssep", "sgl", "gglasso", "lasso"))) aupr <- auroc prederror <- auroc for (b in 1:B) { load(paste0("/project/dogss/tests_thesis/02_networkreconstruction_qpgraph/02_networkreconstruction/net_onetwothree_", b, ".RData")) auroc[b, ] <- onetwothree[[i]]$AUROC aupr[b, ] <- onetwothree[[i]]$AUPR prederror[b, ] <- onetwothree[[i]]$prederror } data <- data.frame(melt(auroc), measure="auroc") data <- rbind(data, data.frame(melt(aupr), measure="aupr")) data <- rbind(data, data.frame(melt(prederror), measure="pred. error")) data$method <- factor(data$method, ordered=TRUE, levels=c("dogss", "ssep", "sgl", "gglasso", "lasso")) auroc_data <- subset(data, measure=="auroc") P_auroc <- ggplot(auroc_data, aes(x=method, y=value)) + geom_boxplot() + theme_Ed() + scale_colour_Ed() + ylim(0,1) + labs(y="AUROC") + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) Pd_auroc <- ggplot_build(P_auroc)$data[[1]] P_auroc <- P_auroc + geom_segment(data=Pd_auroc, aes(x=xmin, xend=xmax, y=middle, yend=middle, colour=unique(auroc_data$method)), size=1, show.legend=TRUE) + labs(colour="method") aupr_data <- subset(data, measure=="aupr") P_aupr <- ggplot(aupr_data, aes(x=method, y=value)) + geom_boxplot() + theme_Ed() + scale_colour_Ed() + ylim(0,1) + labs(y="AUPR") + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) Pd_aupr <- ggplot_build(P_aupr)$data[[1]] P_aupr <- P_aupr + geom_segment(data=Pd_aupr, aes(x=xmin, xend=xmax, y=middle, yend=middle, colour=unique(aupr_data$method)), size=1, show.legend=FALSE) prederror_data <- subset(data, measure=="pred. error") P_prederror <- ggplot(prederror_data, aes(x=method, y=value)) + geom_boxplot() + theme_Ed() + scale_colour_Ed() + ylim(0,1) + labs(y="pred. error") + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) Pd_prederror <- ggplot_build(P_prederror)$data[[1]] P_prederror <- P_prederror + geom_segment(data=Pd_prederror, aes(x=xmin, xend=xmax, y=middle, yend=middle, colour=unique(prederror_data$method)), size=1, show.legend=FALSE) grid_arrange_shared_legend(P_auroc, P_aupr, P_prederror, ncol = 3, nrow = 1, which.legend = 1) }
# load("~/Programme/dogss/testing/sim2_grouping_tfs/extra_results_network_grouping.RData") # load("/project/dogss/testing/testing/sim2_new/finalresults_sim2_new_area.RData") # mydata$method <- factor(mydata$method, ordered=TRUE, levels=c("dogss", "ssep", "sgl", "gglasso", "lasso")) # mydata$grouping <- factor(mydata$grouping, ordered=TRUE, levels=c("original", "kmeans", "random")) # mydata <- subset(mydata, !(method %in% c("ssep", "lasso") & grouping %in% c("kmeans", "random"))) auroc <- data.frame(value=numeric(), method=character(), grouping=character()) aupr <- data.frame(value=numeric(), method=character(), grouping=character()) prederror <- data.frame(value=numeric(), method=character(), grouping=character()) method <- c("dogss", "ssep", "sgl", "gglasso", "lasso") B <- 100 for (b in 1:B) { load(paste0("/project/dogss/tests_thesis/02_networkreconstruction_qpgraph/02_networkreconstruction/small_net_whole_", b, ".RData")) for (m in 1:length(method)) { auroc <- rbind(auroc, data.frame(value=whole[[1]]$AUROC[m], method=method[m], grouping="original")) aupr <- rbind(aupr, data.frame(value=whole[[1]]$AUPR[m], method=method[m], grouping="original")) prederror <- rbind(prederror, data.frame(value=whole[[1]]$prederror[m], method=method[m], grouping="original")) if (m%in%c(1,3,4)) { auroc <- rbind(auroc, data.frame(value=whole[[2]]$AUROC[m], method=method[m], grouping="random")) aupr <- rbind(aupr, data.frame(value=whole[[2]]$AUPR[m], method=method[m], grouping="random")) prederror <- rbind(prederror, data.frame(value=whole[[2]]$prederror[m], method=method[m], grouping="random")) } } } plot_auroc <- ggplot(aes(y = value, x = method, fill = grouping), data = auroc) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("AUROC") plot_aupr <- ggplot(aes(y = value, x = method, fill = grouping), data = aupr) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("AUPR") plot_prederror <- ggplot(aes(y = value, x = method, fill = grouping), data = prederror) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("pred. error") grid_arrange_shared_legend(plot_auroc, plot_aupr, plot_prederror, ncol = 3, nrow = 1, which.legend = 1) ### large networks auroc <- data.frame(value=numeric(), method=character(), grouping=character()) aupr <- data.frame(value=numeric(), method=character(), grouping=character()) prederror <- data.frame(value=numeric(), method=character(), grouping=character()) method <- c("dogss", "ssep", "sgl", "gglasso", "lasso") B <- 100 for (b in 1:B) { load(paste0("/project/dogss/tests_thesis/02_networkreconstruction_qpgraph/02_networkreconstruction/net_whole_", b, ".RData")) for (m in 1:length(method)) { auroc <- rbind(auroc, data.frame(value=whole[[1]]$AUROC[m], method=method[m], grouping="original")) aupr <- rbind(aupr, data.frame(value=whole[[1]]$AUPR[m], method=method[m], grouping="original")) prederror <- rbind(prederror, data.frame(value=whole[[1]]$prederror[m], method=method[m], grouping="original")) if (m%in%c(1,3,4)) { auroc <- rbind(auroc, data.frame(value=whole[[2]]$AUROC[m], method=method[m], grouping="random")) aupr <- rbind(aupr, data.frame(value=whole[[2]]$AUPR[m], method=method[m], grouping="random")) prederror <- rbind(prederror, data.frame(value=whole[[2]]$prederror[m], method=method[m], grouping="random")) } } } plot_auroc <- ggplot(aes(y = value, x = method, fill = grouping), data = auroc) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("AUROC") plot_aupr <- ggplot(aes(y = value, x = method, fill = grouping), data = aupr) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("AUPR") plot_prederror <- ggplot(aes(y = value, x = method, fill = grouping), data = prederror) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("pred. error") grid_arrange_shared_legend(plot_auroc, plot_aupr, plot_prederror, ncol = 3, nrow = 1, which.legend = 1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.