notebooks/networks/networks.md

Network reconstruction with dogss: Simulations

Edgar Steiger 2018

work in progress

This document shows and explains how to use the dogss package and how to reproduce Figures 9 to 13 from our paper Sparse-Group Bayesian Feature Selection Using Expectation Propagation for Signal Recovery and Network Reconstruction.

First we need to load some packages that are required for comparisons and plotting (please install if not available on your machine):

library(dogss) # our method for sparse-group Bayesian feature selection with EP
library(glmnet) # standard lasso
library(gglasso) # group lasso
library(SGL) # sparse-group lasso
library(MBSGS) # Bayesian feature selection with Gibbs sampling

library(ggplot2) # for nice plots
library(ggthemes) # for even nicer plots
library(grid); library(gridExtra) # to arrange plots pleasantly

library(reshape2) # to melt data into "tidy" long-format

library(DescTools) # for area computations (AUROC, AUPR)

Furthermore we need to load three R files with additional code:

source("../auxiliary_rfunctions/my_cvSGL.R") # proper cross validation for SGL package

source("../auxiliary_rfunctions/my_theme.R") # functions to adjust ggplots

Finally, we provide all of the results on our simulated data to reconstruct the plots from the publication. If you wish to re-do all of the simulations/calculations, change the following parameter to TRUE (only do this if you have access to multiple cores):

selfcompute <- FALSE
B <- 100 # number of simulations
ncores <- 50 # number of cores used for parallelization

Simulations

example network

  # load("~/Programme/dogss/testing/sim2/simnet_onetwothree_96_2.RData")
  par(oma=c(0,0,0,0), mar=c(0,0,0,0))
  plot.igraph(graph_from_adjacency_matrix(t(truth_matrix)), vertex.size=5, vertex.label=NA, vertex.color="white", vertex.frame.color=Ed_palette[1], vertex.shape=ifelse(row.names(truth_matrix) %in% names(G), "square", "circle"), edge.width=1, edge.arrow.mode=0, edge.curved=FALSE, vertex.label.family="CM Roman", asp=3/5) #  layout=layout.big

Sim networks

library(mvtnorm)
library(qpgraph)
library(sparseMVN)
library(psych)

library(SGL)
library(gglasso)
library(glmnet)
library(dogss2)

library(DescTools)
library(methods)
library(Matrix)

library(foreach)
library(doParallel)

source("/project/dogss/my_cvSGL.R")

##### simulation of kind-of scale-free network graph
cK <- c(100, 1000, 5000) # 100 # number of genes total
cTF <- c(10, 100, 500) # number of transcription factors/hubs
cBG <- c(3, 20, 50) # 10 # number of "big groups"
cq <- c(0.01, 0.001, 0.0002) # 20/((S*BG)*(K-1))
# sigma_0 <- 0.1

B <- 100

ncores <- 50

# generate the network graphs and data
ncl <- detectCores()
ncl <- min(ncl, ncores)
myCluster1 <- makeCluster(ncl, outfile="/project/dogss/tests_thesis/log_02_networkreconstruction_generate.txt")
registerDoParallel(myCluster1)
networks <- foreach (b=1:B, .packages=c("sparseMVN", "psych", "Matrix", "mvtnorm", "qpgraph"), .errorhandling='pass') %dopar% {
  cat(b, "\n")
  for (i in 1:2) {

    K <- cK[i]
    BG <- cBG[i]
    q <- cq[i]
    ntfs <- cTF[i]

    genes <- paste0("G", 1:K)
    TFs <- sample(genes, ntfs)
    partitions <- vector("integer", K)
    names(partitions) <- genes
    partitions[TFs[1:BG]] <- 1:BG
    partitions[TFs[(BG+1):ntfs]] <- sample(1:BG, ntfs-BG, replace=TRUE)
    probsBG <- runif(BG) # probs for big groups to get different sizes
    partitions[!(genes%in%TFs)] <- sample(1:BG, K-ntfs, replace=TRUE, prob=probsBG) # every node gets assigned a big group
    connectlist <- c(0, 0)

    for (g in genes) {
      potentialTFs <- names(partitions[TFs][partitions[TFs]==partitions[g]])
      if (sum(potentialTFs!=g)!=0) {
        firstconnect <- sample(potentialTFs[potentialTFs!=g], 1)
        connectlist <- rbind(connectlist, c(firstconnect, g))
        for (tf in potentialTFs[!potentialTFs%in%c(firstconnect, g)]) {
          if (runif(1)<0.5) connectlist <- rbind(connectlist, c(tf, g))
        }
      }
      for (h in TFs) { # here we add random edges
        if (runif(1) <= q & h != g) connectlist <- rbind(connectlist, c(h, g)) # c(fromTF, toGene)
      }
    }
    connectlist <- connectlist[-1, ]

    dat.sort <- t(apply(connectlist, 1, sort))
    newconnectlist <- connectlist[!duplicated(dat.sort), ] # remove duplicated. don't ask me why this works, stackoverflow

    truth_matrix <- matrix(0, nrow=length(partitions), ncol=length(partitions), dimnames=list(from=names(partitions), to=names(partitions)))
    # BETA <- truth_matrix
    # prec_matrix <- truth_matrix
    for (j in 1:dim(newconnectlist)[1]) {
      truth_matrix[newconnectlist[j, 1], newconnectlist[j, 2]] <- 1
      truth_matrix[newconnectlist[j, 2], newconnectlist[j, 1]] <- 1
      # thisminusbeta <- rnorm(1) # runif(1, -5, 5)# sample(c(runif(1,-5,-1), runif(1, 1, 5)), 1)
      # prec_matrix[newconnectlist[j, 1], newconnectlist[j, 2]] <- thisminusbeta
      # prec_matrix[newconnectlist[j, 2], newconnectlist[j, 1]] <- thisminusbeta
    }
    # diag(prec_matrix) <- 1000 # rnorm(length(diag(prec_matrix))) # runif(length(diag(prec_matrix)), -5, 5)
    # H2 <- glb.algebraic(prec_matrix)$solution
    # diag(prec_matrix) <- H2 + 0.01

    # for (j in seq(dim(BETA)[2])) BETA[, j] <- -prec_matrix[, j]/prec_matrix[j, j]
    # diag(BETA) <- 0

    truth_matrix <- as.matrix(as(truth_matrix, "symmetricMatrix"))
    mygraph <- rUGgmm(truth_matrix)
    prec_matrix <- solve(mygraph$sigma)
    geneexpr <- rmvnorm(200, mygraph)
    dimnames(prec_matrix) <- list(from=colnames(geneexpr), to=colnames(geneexpr))
    geneexpr <- geneexpr[, genes]
    prec_matrix <- prec_matrix[, genes]; prec_matrix <- prec_matrix[genes, ]
    pm_nz <- truth_matrix
    diag(pm_nz) <- 1
    prec_matrix[pm_nz!=1] <- 0

    # prec_matrix <- Matrix(as(prec_matrix, "symmetricMatrix"), sparse=TRUE)

    # geneexpr <- rmvn.sparse(200, rep(0, dim(prec_matrix)[1]), Cholesky(prec_matrix)) # + rnorm(200*dim(prec_matrix)[1], 0, sigma_0)
    # colnames(geneexpr) <- genes

    train <- 1:100
    test <- 101:200

    geneexpr_train <- geneexpr[train, ]
    geneexpr_test <- geneexpr[test, ]

    G <- partitions[TFs]
    ngenes <- dim(geneexpr)[2]
    nobs <- 100 # dim(geneexpr)[1]
    # ntfs <- length(tfs)
    neffs <- sum(truth_matrix[TFs, ])

    save(mygraph, geneexpr, geneexpr_train, geneexpr_test, prec_matrix, genes, TFs, G, ngenes, nobs, ntfs, truth_matrix, neffs, partitions, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/sim_network_", b, "_", i, ".RData")) # save.image funktioniert mit foreach nicht... # Graph, exps, lambdapath
  }
  b
}
stopCluster(myCluster1)
save.image(file="/project/dogss/tests_thesis/02_networkreconstruction_generate.RData")

# reconstruct the networks!
ncl <- detectCores()
ncl <- min(ncl, ncores)
myCluster <- makeCluster(ncl, outfile="/project/dogss/tests_thesis/log_02_networkreconstruction.txt")
registerDoParallel(myCluster)

results <- foreach (b=1:B, .packages=c("dogss2", "gglasso", "SGL", "glmnet", "DescTools"), .errorhandling='pass') %dopar% {
  cat("b=", b, "\n")
  onetwothree <- vector("list", 3)
  gr_cl_rd <- vector("list", 3)

  for (i in 1:2) { # different scenarios for number of observations, groups etc
    cat("b=", b, ", i=", i, "\n")

    load(file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/sim_network_", b, "_", i, ".RData"))

    results_simnetwork <- list()

    for (g in genes) {
      # cat("\nGene ", g, "\n")
      Y <- as.vector(geneexpr_train[, g]) # as.vector(geneexpr_center_train[, g])
      index <- as.integer(G)[!(names(G) %in% g)]
      names(index) <- names(G)[!(names(G) %in% g)]
      x <- geneexpr_train[, names(index)] # geneexpr_scale_train[, names(index)]

      groups <- unique(index)
      ngroups <- length(groups)
      nG <- ngroups
      nngroups <- sapply(groups, function(g) sum(index==g))

      while (max(groups) != ngroups) { # this is for gglasso which needs consecutively numbered groups :-/
        del <- max(groups)
        index[index==del] <- which.min(1:nG %in% groups) # this is superweird :D
        groups <- unique(index)
        ngroups <- length(groups)
        nngroups <- sapply(groups, function(g) sum(index==g))
      }

      if (is.vector(x)) x <- as.matrix(x, ncol=1)

      res_dogss <- cv_dogss(x, Y, G=index, tol=1e-03)
      res_ssep <- cv_dogss(x, Y, G=NULL, tol=1e-03)
      res_sgl <- tryCatch(my_cvSGL(data=list(x=x, y=Y), index=index, standardize=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL)
      res_gglasso <- cv.gglasso(x=x, y=Y, group=index, nfolds=10, intercept=FALSE, eps=1e-03, maxit=1000)
      res_lasso <- tryCatch(cv.glmnet(x, Y, standardize=FALSE, intercept=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL)

      res_sgl$lambda <- res_sgl$fit$lambdas

      if (is.null(res_sgl)) res_sgl <- res_gglasso # in this case (only one feature) the three methods are equivalent, but only gglasso gives results
      if (is.null(res_lasso)) res_lasso <- res_gglasso

      res_sgl$mylambda <- apply(res_sgl$fit$beta, 1, function(b) res_sgl$lambda[which(b!=0)[1]])
      res_gglasso$mylambda <- apply(res_gglasso$gglasso.fit$beta, 1, function(b) res_gglasso$lambda[which(b!=0)][1])
      res_lasso$mylambda <- apply(res_lasso$glmnet.fit$beta, 1, function(b) res_lasso$lambda[which(b!=0)[1]])

      scores <- cbind(dogss=res_dogss$p_final, ssep=res_ssep$p_final, sgl=res_sgl$mylambda, gglasso=res_gglasso$mylambda, lasso=res_lasso$mylambda) # ...$areascore # ...$p_final # ,
      row.names(scores) <- names(index)
      results_simnetwork[[g]] <- list(scores=scores, results=list(dogss=res_dogss, ssep=res_ssep, sgl=res_sgl, gglasso=res_gglasso, lasso=res_lasso))
    }

    save(results_simnetwork, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/results_network_onetwothree_", i, "_", b, ".RData"))

    scorelist <- data.frame(to=character(), from=character(), dogss=numeric(), ssep=numeric(), sgl=numeric(), gglasso=numeric(), lasso=numeric(), stringsAsFactors=FALSE) # ,
    for (g in genes) {
      scorelist <- rbind(scorelist, data.frame(to=as.character(rep(g, dim(results_simnetwork[[g]]$scores)[1])), from=as.character(rownames(results_simnetwork[[g]]$scores)), dogss=results_simnetwork[[g]]$scores[, "dogss"], ssep=results_simnetwork[[g]]$scores[, "ssep"], sgl=results_simnetwork[[g]]$scores[, "sgl"], gglasso=results_simnetwork[[g]]$scores[, "gglasso"], lasso=results_simnetwork[[g]]$scores[, "lasso"],stringsAsFactors=FALSE)) #
    }
    ranks <- apply(scorelist[, 3:7], 2, function(r) rank(1-r, ties.method="min"))
    pairs <- scorelist[, c(1,2)]
    truth_pairs <- rep(0, dim(pairs)[1])
    recov_matrices <- list()
    prec_matrices <- list()
    methods <- c("dogss", "ssep", "sgl", "gglasso", "lasso") #
    for (m in methods) recov_matrices[[m]] <- matrix(0, nrow=ntfs, ncol=dim(truth_matrix)[2], dimnames=list(from=TFs, to=colnames(truth_matrix)))
    for (m in methods) prec_matrices[[m]] <- matrix(0, nrow=dim(truth_matrix)[1], ncol=dim(truth_matrix)[2], dimnames=list(from=rownames(truth_matrix), to=colnames(truth_matrix)))
    for (j in 1:length(truth_pairs)) {
      truth_pairs[j] <- truth_matrix[pairs[j,2], pairs[j,1]]
      for (m in methods) {
        recov_matrices[[m]][pairs[j,2], pairs[j,1]] <- ranks[j, m]
      }
    } 

    # rocstuff
    K <- length(truth_pairs)
    P <- sum(truth_matrix[TFs, ])
    N <- ntfs*ngenes-ntfs-P

    M <- length(methods)

    tpr <- matrix(0, K, M)
    fpr <- matrix(0, K, M)
    precision <- matrix(0, K, M)

    for (m in 1:M) {
      truth_ordered <- truth_matrix[TFs, ][order(recov_matrices[[m]])][-c(1:ntfs)]
      tpr[, m] <- cumsum(truth_ordered)/P
      fpr[, m] <- cumsum(!truth_ordered)/N
      precision[, m] <- tpr[, m]*P/(tpr[, m]*P + fpr[, m]*N)
    }
    AUROC <- sapply(1:length(methods), function(m) AUC(c(0,fpr[, m],1), c(0,tpr[, m],1)))
    AUPR <- sapply(1:length(methods), function(m) AUC(c(0,tpr[, m],1), c(1,precision[, m],0)))

    # prediction stuff 
    X_test <- geneexpr_test[, TFs] # geneexpr_scale_test[, tfs]
    prederror <- rep(0, 5)
    names(prederror) <- c("dogss", "ssep", "sgl", "gglasso", "lasso")
    precm_errors <- prederror
    normPM <- norm(prec_matrix, "1")
    BETA_dogss <- truth_matrix[TFs, ]
    BETA_dogss[, ] <- 0
    BETA_ssep <- BETA_dogss
    BETA_sgl <- BETA_dogss
    BETA_gglasso <- BETA_dogss
    BETA_lasso <- BETA_dogss

    X <- X_test[, TFs]

    for (g in genes) {
      Y <- geneexpr_test[, g] # geneexpr_center_test[, g]
      sumY2 <- sum(Y^2)

      res_dogss <- results_simnetwork[[g]]$results$dogss
      res_ssep <- results_simnetwork[[g]]$results$ssep
      res_sgl <- results_simnetwork[[g]]$results$sgl
      res_lasso <- results_simnetwork[[g]]$results$lasso
      res_gglasso <- results_simnetwork[[g]]$results$gglasso
      names(res_dogss$m_cv) <- TFs[TFs!=g]
      names(res_ssep$m_cv) <- TFs[TFs!=g]
      names(res_sgl$beta) <- TFs[TFs!=g]
      BETA_dogss[TFs[!(TFs %in% g)], g] <- res_dogss$m_cv[TFs[!(TFs %in% g)]]
      BETA_ssep[TFs[!(TFs %in% g)], g] <- res_ssep$m_cv[TFs[!(TFs %in% g)]]
      BETA_sgl[TFs[!(TFs %in% g)], g] <- res_sgl$beta[TFs[!(TFs %in% g)]]
      BETA_gglasso[TFs[!(TFs %in% g)], g] <- res_gglasso$gglasso.fit$beta[, which(res_gglasso$lambda==res_gglasso$lambda.1se)][TFs[!(TFs %in% g)]]
      BETA_lasso[TFs[!(TFs %in% g)], g] <- res_lasso$glmnet.fit$beta[, which(res_lasso$lambda==res_lasso$lambda.1se)][TFs[!(TFs %in% g)]]

      if (!(g%in%TFs)) {
        prederror["dogss"] <- prederror["dogss"] + sum((Y-X%*%BETA_dogss[, g])^2)/sumY2
        prederror["ssep"] <- prederror["ssep"] + sum((Y-X%*%BETA_ssep[, g])^2)/sumY2
        prederror["sgl"] <- prederror["sgl"] + sum((Y-X%*%BETA_sgl[, g])^2)/sumY2
        prederror["gglasso"] <- prederror["gglasso"] + sum((Y-X%*%BETA_gglasso[, g])^2)/sumY2
        prederror["lasso"] <- prederror["lasso"] + sum((Y-X%*%BETA_lasso[, g])^2)/sumY2
      }

      prec_matrices[["dogss"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_dogss[, g][TFs[!(TFs %in% g)]])^2)
      prec_matrices[["dogss"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["dogss"]][g, g] * BETA_dogss[, g][TFs[!(TFs %in% g)]]  # minus is important :)
      prec_matrices[["dogss"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["dogss"]][TFs[!(TFs %in% g)], g]

      prec_matrices[["ssep"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_ssep[, g][TFs[!(TFs %in% g)]])^2)
      prec_matrices[["ssep"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["ssep"]][g, g] * BETA_ssep[, g][TFs[!(TFs %in% g)]]  # minus is important :)
      prec_matrices[["ssep"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["ssep"]][TFs[!(TFs %in% g)], g]

      prec_matrices[["sgl"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_sgl[, g][TFs[!(TFs %in% g)]])^2)
      prec_matrices[["sgl"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["sgl"]][g, g] * BETA_sgl[, g][TFs[!(TFs %in% g)]]  # minus is important :)
      prec_matrices[["sgl"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["sgl"]][TFs[!(TFs %in% g)], g]

      prec_matrices[["gglasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_gglasso[, g][TFs[!(TFs %in% g)]])^2)
      prec_matrices[["gglasso"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["gglasso"]][g, g] * BETA_gglasso[, g][TFs[!(TFs %in% g)]]  # minus is important :)
      prec_matrices[["gglasso"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["gglasso"]][TFs[!(TFs %in% g)], g]

      prec_matrices[["lasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_lasso[, g][TFs[!(TFs %in% g)]])^2)
      prec_matrices[["lasso"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["lasso"]][g, g] * BETA_lasso[, g][TFs[!(TFs %in% g)]]  # minus is important :)
      prec_matrices[["lasso"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["lasso"]][TFs[!(TFs %in% g)], g]

    }
    prederror <- prederror/length(genes[!(genes%in%TFs)])
    for (m in methods) {
      precm_errors[m] <- norm(prec_matrix-prec_matrices[[m]], "1")/normPM
    } 

    onetwothree[[i]] <- list(TPR=tpr, FPR=fpr, Prec=precision, AUROC=AUROC, AUPR=AUPR, prederror=prederror, precm_errors=precm_errors, times=times) # , results_simnetwork=results_simnetwork # 
  }
  save(onetwothree, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/net_onetwothree_", b, ".RData"))

  #### jetzt noch der different groups stuff fuer das zweite scenario

  load(file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/sim_network_", b, "_", 2, ".RData"))

  for (dg in 2:3) { # calculate results for different groupings: clustered, random

    results_simnetwork <- list()

    if (dg==2) {
      G_new <- kmeans(t(geneexpr[, TFs]), length(unique(G)))$cluster
      cat("kmeans!", " b=", b, "\n")
    } else { # (dg==3)
      G_new <- sample(G)
      names(G_new) <- names(G)
      cat("random!", " b=", b, "\n")
    }



    for (g in genes) {
      # cat("\nGene ", g, "\n")
      Y <- as.vector(geneexpr_train[, g]) # as.vector(geneexpr_center_train[, g])
      index <- as.integer(G_new)[!(names(G_new) %in% g)]
      names(index) <- names(G_new)[!(names(G_new) %in% g)]
      x <- geneexpr_train[, names(index)] # geneexpr_scale_train[, names(index)]

      groups <- unique(index)
      ngroups <- length(groups)
      nG <- ngroups
      nngroups <- sapply(groups, function(g) sum(index==g))

      while (max(groups) != ngroups) { # this is for gglasso which needs consecutively numbered groups :-/
        del <- max(groups)
        index[index==del] <- which.min(1:nG %in% groups) # this is superweird :D
        groups <- unique(index)
        ngroups <- length(groups)
        nngroups <- sapply(groups, function(g) sum(index==g))
      }

      if (is.vector(x)) x <- as.matrix(x, ncol=1)


    res_dogss <- cv_dogss(x, Y, G=index, tol=1e-03)
    res_ssep <- cv_dogss(x, Y, G=NULL, tol=1e-03)
    res_sgl <- tryCatch(my_cvSGL(data=list(x=x, y=Y), index=index, standardize=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL)
    res_gglasso <- cv.gglasso(x=x, y=Y, group=index, nfolds=10, intercept=FALSE, eps=1e-03, maxit=1000)
    res_lasso <- tryCatch(cv.glmnet(x, Y, standardize=FALSE, intercept=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL)

      res_sgl$lambda <- res_sgl$fit$lambdas

      if (is.null(res_sgl)) res_sgl <- res_gglasso # in this case (only one feature) the three methods are equivalent, but only gglasso gives results
      if (is.null(res_lasso)) res_lasso <- res_gglasso

    res_sgl$mylambda <- apply(res_sgl$fit$beta, 1, function(b) res_sgl$lambda[which(b!=0)[1]])
    res_gglasso$mylambda <- apply(res_gglasso$gglasso.fit$beta, 1, function(b) res_gglasso$lambda[which(b!=0)][1])
    res_lasso$mylambda <- apply(res_lasso$glmnet.fit$beta, 1, function(b) res_lasso$lambda[which(b!=0)[1]])

      scores <- cbind(dogss=res_dogss$p_final, ssep=res_ssep$p_final, sgl=res_sgl$mylambda, gglasso=res_gglasso$mylambda, lasso=res_lasso$mylambda) # ...$areascore # ...$p_final # ,
      row.names(scores) <- names(index)
      results_simnetwork[[g]] <- list(scores=scores, results=list(dogss=res_dogss, ssep=res_ssep, sgl=res_sgl, gglasso=res_gglasso, lasso=res_lasso))
    }

    save(results_simnetwork, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/results_network_grclrd_", dg, "_", b, ".RData"))

    scorelist <- data.frame(to=character(), from=character(), dogss=numeric(), ssep=numeric(), sgl=numeric(), gglasso=numeric(), lasso=numeric(), stringsAsFactors=FALSE) # ,
    for (g in genes) {
      scorelist <- rbind(scorelist, data.frame(to=as.character(rep(g, dim(results_simnetwork[[g]]$scores)[1])), from=as.character(rownames(results_simnetwork[[g]]$scores)), dogss=results_simnetwork[[g]]$scores[, "dogss"], ssep=results_simnetwork[[g]]$scores[, "ssep"], sgl=results_simnetwork[[g]]$scores[, "sgl"], gglasso=results_simnetwork[[g]]$scores[, "gglasso"], lasso=results_simnetwork[[g]]$scores[, "lasso"],stringsAsFactors=FALSE)) #
    }
    ranks <- apply(scorelist[, 3:7], 2, function(r) rank(1-r, ties.method="min"))
    pairs <- scorelist[, c(1,2)]
    truth_pairs <- rep(0, dim(pairs)[1])
    recov_matrices <- list()
    prec_matrices <- list()
    methods <- c("dogss", "ssep", "sgl", "gglasso", "lasso") #
    for (m in methods) recov_matrices[[m]] <- matrix(0, nrow=ntfs, ncol=dim(truth_matrix)[2], dimnames=list(from=TFs, to=colnames(truth_matrix)))
    for (m in methods) prec_matrices[[m]] <- matrix(0, nrow=dim(truth_matrix)[1], ncol=dim(truth_matrix)[2], dimnames=list(from=rownames(truth_matrix), to=colnames(truth_matrix)))
    for (j in 1:length(truth_pairs)) {
      truth_pairs[j] <- truth_matrix[pairs[j,2], pairs[j,1]]
      for (m in methods) {
        recov_matrices[[m]][pairs[j,2], pairs[j,1]] <- ranks[j, m]
      }
    } 

    # rocstuff
    K <- length(truth_pairs)
    P <- sum(truth_matrix[TFs, ])
    N <- ntfs*ngenes-ntfs-P

    M <- length(methods)

    tpr <- matrix(0, K, M)
    fpr <- matrix(0, K, M)
    precision <- matrix(0, K, M)

    for (m in 1:M) {
      truth_ordered <- truth_matrix[TFs, ][order(recov_matrices[[m]])][-c(1:ntfs)]
      tpr[, m] <- cumsum(truth_ordered)/P
      fpr[, m] <- cumsum(!truth_ordered)/N
      precision[, m] <- tpr[, m]*P/(tpr[, m]*P + fpr[, m]*N)
    }
    AUROC <- sapply(1:length(methods), function(m) AUC(c(0,fpr[, m],1), c(0,tpr[, m],1)))
    AUPR <- sapply(1:length(methods), function(m) AUC(c(0,tpr[, m],1), c(1,precision[, m],0)))

    # prediction stuff 
    X_test <- geneexpr_test[, TFs] # geneexpr_scale_test[, tfs]
    prederror <- rep(0, 5)
    names(prederror) <- c("dogss", "ssep", "sgl", "gglasso", "lasso")
    precm_errors <- prederror
    normPM <- norm(prec_matrix, "1")
    BETA_dogss <- truth_matrix[TFs, ]
    BETA_dogss[, ] <- 0
    BETA_ssep <- BETA_dogss
    BETA_sgl <- BETA_dogss
    BETA_gglasso <- BETA_dogss
    BETA_lasso <- BETA_dogss

    X <- X_test[, TFs]

    for (g in genes) {
      Y <- geneexpr_test[, g] # geneexpr_center_test[, g]
      sumY2 <- sum(Y^2)

      res_dogss <- results_simnetwork[[g]]$results$dogss
      res_ssep <- results_simnetwork[[g]]$results$ssep
      res_sgl <- results_simnetwork[[g]]$results$sgl
      res_lasso <- results_simnetwork[[g]]$results$lasso
      res_gglasso <- results_simnetwork[[g]]$results$gglasso
      names(res_dogss$m_cv) <- TFs[TFs!=g]
      names(res_ssep$m_cv) <- TFs[TFs!=g]
      names(res_sgl$beta) <- TFs[TFs!=g]
      BETA_dogss[TFs[!(TFs %in% g)], g] <- res_dogss$m_cv[TFs[!(TFs %in% g)]]
      BETA_ssep[TFs[!(TFs %in% g)], g] <- res_ssep$m_cv[TFs[!(TFs %in% g)]]
      BETA_sgl[TFs[!(TFs %in% g)], g] <- res_sgl$beta[TFs[!(TFs %in% g)]]
      BETA_gglasso[TFs[!(TFs %in% g)], g] <- res_gglasso$gglasso.fit$beta[, which(res_gglasso$lambda==res_gglasso$lambda.1se)][TFs[!(TFs %in% g)]]
      BETA_lasso[TFs[!(TFs %in% g)], g] <- res_lasso$glmnet.fit$beta[, which(res_lasso$lambda==res_lasso$lambda.1se)][TFs[!(TFs %in% g)]]

      if (!(g%in%TFs)) {
        prederror["dogss"] <- prederror["dogss"] + sum((Y-X%*%BETA_dogss[, g])^2)/sumY2
        prederror["ssep"] <- prederror["ssep"] + sum((Y-X%*%BETA_ssep[, g])^2)/sumY2
        prederror["sgl"] <- prederror["sgl"] + sum((Y-X%*%BETA_sgl[, g])^2)/sumY2
        prederror["gglasso"] <- prederror["gglasso"] + sum((Y-X%*%BETA_gglasso[, g])^2)/sumY2
        prederror["lasso"] <- prederror["lasso"] + sum((Y-X%*%BETA_lasso[, g])^2)/sumY2
      }

      prec_matrices[["dogss"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_dogss[, g][TFs[!(TFs %in% g)]])^2)
      prec_matrices[["dogss"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["dogss"]][g, g] * BETA_dogss[, g][TFs[!(TFs %in% g)]]  # minus is important :)
      prec_matrices[["dogss"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["dogss"]][TFs[!(TFs %in% g)], g]

      prec_matrices[["ssep"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_ssep[, g][TFs[!(TFs %in% g)]])^2)
      prec_matrices[["ssep"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["ssep"]][g, g] * BETA_ssep[, g][TFs[!(TFs %in% g)]]  # minus is important :)
      prec_matrices[["ssep"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["ssep"]][TFs[!(TFs %in% g)], g]

      prec_matrices[["sgl"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_sgl[, g][TFs[!(TFs %in% g)]])^2)
      prec_matrices[["sgl"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["sgl"]][g, g] * BETA_sgl[, g][TFs[!(TFs %in% g)]]  # minus is important :)
      prec_matrices[["sgl"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["sgl"]][TFs[!(TFs %in% g)], g]

      prec_matrices[["gglasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_gglasso[, g][TFs[!(TFs %in% g)]])^2)
      prec_matrices[["gglasso"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["gglasso"]][g, g] * BETA_gglasso[, g][TFs[!(TFs %in% g)]]  # minus is important :)
      prec_matrices[["gglasso"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["gglasso"]][TFs[!(TFs %in% g)], g]

      prec_matrices[["lasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, TFs[!(TFs %in% g)]] %*% BETA_lasso[, g][TFs[!(TFs %in% g)]])^2)
      prec_matrices[["lasso"]][TFs[!(TFs %in% g)], g] <- -prec_matrices[["lasso"]][g, g] * BETA_lasso[, g][TFs[!(TFs %in% g)]]  # minus is important :)
      prec_matrices[["lasso"]][g, TFs[!(TFs %in% g)]] <- prec_matrices[["lasso"]][TFs[!(TFs %in% g)], g]

    }
    prederror <- prederror/length(genes[!(genes%in%TFs)])
    for (m in methods) {
      precm_errors[m] <- norm(prec_matrix-prec_matrices[[m]], "1")/normPM
    } 

    gr_cl_rd[[dg]] <- list(TPR=tpr, FPR=fpr, Prec=precision, AUROC=AUROC, AUPR=AUPR, prederror=prederror, precm_errors=precm_errors, times=times)
  } 

  ## dg=1 bzw whole groups (not only TFs...)
  cat("whole groups... b=", b, "\n")
  results_simnetwork <- list()
  for (g in genes) {
    # cat("\nGene ", g, "\n")
    Y <- as.vector(geneexpr_train[, g]) # as.vector(geneexpr_center_train[, g])
    index <- as.integer(partitions)[!(names(partitions) %in% g)]
    names(index) <- names(partitions)[!(names(partitions) %in% g)]
    x <- geneexpr_train[, names(index)] # geneexpr_scale_train[, names(index)]

    groups <- unique(index)
    ngroups <- length(groups)
    nG <- ngroups
    nngroups <- sapply(groups, function(g) sum(index==g))

    while (max(groups) != ngroups) { # this is for gglasso which needs consecutively numbered groups :-/
      del <- max(groups)
      index[index==del] <- which.min(1:nG %in% groups) # this is superweird :D
      groups <- unique(index)
      ngroups <- length(groups)
      nngroups <- sapply(groups, function(g) sum(index==g))
    }

    if (is.vector(x)) x <- as.matrix(x, ncol=1)

    res_dogss <- cv_dogss(x, Y, G=index, tol=1e-03)
    res_ssep <- cv_dogss(x, Y, G=NULL, tol=1e-03)
    res_sgl <- tryCatch(my_cvSGL(data=list(x=x, y=Y), index=index, standardize=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL)
    res_gglasso <- cv.gglasso(x=x, y=Y, group=index, nfolds=10, intercept=FALSE, eps=1e-03, maxit=1000)
    res_lasso <- tryCatch(cv.glmnet(x, Y, standardize=FALSE, intercept=FALSE, thresh=1e-03, maxit=1000), error=function(e) NULL)

    res_sgl$lambda <- res_sgl$fit$lambdas

    if (is.null(res_sgl)) res_sgl <- res_gglasso # in this case (only one feature) the three methods are equivalent, but only gglasso gives results
    if (is.null(res_lasso)) res_lasso <- res_gglasso
    if (is.na(res_gglasso$lambda.1se)) {
      whichmin <- which.min(res_gglasso$cvm)
      res_gglasso$lambda.1se <- res_gglasso$lambda[which.max(res_gglasso$cvm<=res_gglasso$cvm[whichmin]+res_gglasso$cvsd[whichmin])]
    }

    res_sgl$mylambda <- apply(res_sgl$fit$beta, 1, function(b) res_sgl$lambda[which(b!=0)[1]])
    res_gglasso$mylambda <- apply(res_gglasso$gglasso.fit$beta, 1, function(b) res_gglasso$lambda[which(b!=0)][1])
    res_lasso$mylambda <- apply(res_lasso$glmnet.fit$beta, 1, function(b) res_lasso$lambda[which(b!=0)[1]])

    scores <- cbind(dogss=res_dogss$p_final, ssep=res_ssep$p_final, sgl=res_sgl$mylambda, gglasso=res_gglasso$mylambda, lasso=res_lasso$mylambda) # ...$areascore # ...$p_final # ,
    row.names(scores) <- names(index)
    results_simnetwork[[g]] <- list(scores=scores, results=list(dogss=res_dogss, ssep=res_ssep, sgl=res_sgl, gglasso=res_gglasso, lasso=res_lasso))
  }

  save(results_simnetwork, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/results_network_grclrd_", 1, "_", b, ".RData"))
  scorelist <- data.frame(to=character(), from=character(), dogss=numeric(), ssep=numeric(), sgl=numeric(), gglasso=numeric(), lasso=numeric(), stringsAsFactors=FALSE) # ,
  for (g in genes) {
    scorelist <- rbind(scorelist, data.frame(to=as.character(rep(g, dim(results_simnetwork[[g]]$scores)[1])), from=as.character(rownames(results_simnetwork[[g]]$scores)), dogss=results_simnetwork[[g]]$scores[, "dogss"], ssep=results_simnetwork[[g]]$scores[, "ssep"], sgl=results_simnetwork[[g]]$scores[, "sgl"], gglasso=results_simnetwork[[g]]$scores[, "gglasso"], lasso=results_simnetwork[[g]]$scores[, "lasso"],stringsAsFactors=FALSE)) #
  }
  ranks <- apply(scorelist[, 3:7], 2, function(r) rank(1-r, ties.method="min"))
  pairs <- scorelist[, c(1,2)]
  truth_pairs <- rep(0, dim(pairs)[1])
  recov_matrices <- list()
  methods <- c("dogss", "ssep", "sgl", "gglasso", "lasso") #
  for (m in methods) recov_matrices[[m]] <- matrix(0, nrow=dim(truth_matrix)[1], ncol=dim(truth_matrix)[2], dimnames=list(from=rownames(truth_matrix), to=colnames(truth_matrix)))
  prec_matrices <- recov_matrices
  for (j in 1:length(truth_pairs)) {
    truth_pairs[j] <- truth_matrix[pairs[j,2], pairs[j,1]]
    for (m in methods) {
      if (recov_matrices[[m]][pairs[j,2], pairs[j,1]]==0 | recov_matrices[[m]][pairs[j,2], pairs[j,1]] > ranks[j, m]) {
        recov_matrices[[m]][pairs[j,2], pairs[j,1]] <- ranks[j, m]
        recov_matrices[[m]][pairs[j,1], pairs[j,2]] <- ranks[j, m]
      }
    }
  } 

  # rocstuff
  K <- length(truth_pairs)/2
  P <- sum(truth_matrix)/2
  N <- (ngenes*ngenes-ngenes)/2-P

  M <- length(methods)

  tpr <- matrix(0, K, M)
  fpr <- matrix(0, K, M)
  precision <- matrix(0, K, M)

  for (m in 1:M) {
    truth_ordered <- truth_matrix[upper.tri(truth_matrix, diag=FALSE)][order(recov_matrices[[m]][upper.tri(recov_matrices[[m]], diag = FALSE)])]
    tpr[, m] <- cumsum(truth_ordered)/P
    fpr[, m] <- cumsum(!truth_ordered)/N
    precision[, m] <- tpr[, m]*P/(tpr[, m]*P + fpr[, m]*N)
  }
  AUROC <- sapply(1:length(methods), function(m) AUC(c(0,fpr[, m],1), c(0,tpr[, m],1)))
  AUPR <- sapply(1:length(methods), function(m) AUC(c(0,tpr[, m],1), c(1,precision[, m],0)))

  # prediction stuff 
  X_test <- geneexpr_test# [, TFs] # geneexpr_scale_test[, tfs]
  X_test[, genes[!(genes%in%TFs)]] <- 0
  prederror <- rep(0, 5)
  names(prederror) <- c("dogss", "ssep", "sgl", "gglasso", "lasso")
  precm_errors <- prederror
  normPM <- norm(prec_matrix, "1")
  BETA_dogss <- truth_matrix
  BETA_dogss[, ] <- 0
  BETA_ssep <- BETA_dogss
  BETA_sgl <- BETA_dogss
  BETA_gglasso <- BETA_dogss
  BETA_lasso <- BETA_dogss

  for (g in genes) {
    Y <- geneexpr_test[, g] # geneexpr_center_test[, g]
    sumY2 <- sum(Y^2)
    # X <- geneexpr_test[, genes[genes!=g]]
    # X[, ] <- 0
    X <- X_test[, genes[genes!=g]]

    res_dogss <- results_simnetwork[[g]]$results$dogss
    res_ssep <- results_simnetwork[[g]]$results$ssep
    res_sgl <- results_simnetwork[[g]]$results$sgl
    res_lasso <- results_simnetwork[[g]]$results$lasso
    res_gglasso <- results_simnetwork[[g]]$results$gglasso
    names(res_dogss$m_cv) <- genes[genes!=g]
    names(res_ssep$m_cv) <- genes[genes!=g]
    names(res_sgl$beta) <- genes[genes!=g]
    BETA_dogss[genes[!(genes %in% g)], g] <- res_dogss$m_cv
    BETA_ssep[genes[!(genes %in% g)], g] <- res_ssep$m_cv
    BETA_sgl[genes[!(genes %in% g)], g] <- res_sgl$beta
    BETA_gglasso[genes[!(genes %in% g)], g] <- res_gglasso$gglasso.fit$beta[, which(res_gglasso$lambda==res_gglasso$lambda.1se)]
    BETA_lasso[genes[!(genes %in% g)], g] <- res_lasso$glmnet.fit$beta[, which(res_lasso$lambda==res_lasso$lambda.1se)]

    if (!(g %in% TFs)) {
      prederror["dogss"] <- prederror["dogss"] + sum((Y-X%*%BETA_dogss[genes!=g, g])^2)/sumY2
      prederror["ssep"] <- prederror["ssep"] + sum((Y-X%*%BETA_ssep[genes!=g, g])^2)/sumY2
      prederror["sgl"] <- prederror["sgl"] + sum((Y-X%*%BETA_sgl[genes!=g, g])^2)/sumY2
      prederror["gglasso"] <- prederror["gglasso"] + sum((Y-X%*%BETA_gglasso[genes!=g, g])^2)/sumY2
      prederror["lasso"] <- prederror["lasso"] + sum((Y-X%*%BETA_lasso[genes!=g, g])^2)/sumY2
    }

    prec_matrices[["dogss"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, genes[!(genes %in% g)]] %*% BETA_dogss[, g][genes[!(genes %in% g)]])^2)
    prec_matrices[["dogss"]][genes[!(genes %in% g)], g] <- -prec_matrices[["dogss"]][g, g] * BETA_dogss[, g][genes[!(genes %in% g)]]  # minus is important :)

    prec_matrices[["ssep"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, genes[!(genes %in% g)]] %*% BETA_ssep[, g][genes[!(genes %in% g)]])^2)
    prec_matrices[["ssep"]][genes[!(genes %in% g)], g] <- -prec_matrices[["ssep"]][g, g] * BETA_ssep[, g][genes[!(genes %in% g)]]  # minus is important :)

    prec_matrices[["sgl"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, genes[!(genes %in% g)]] %*% BETA_sgl[, g][genes[!(genes %in% g)]])^2)
    prec_matrices[["sgl"]][genes[!(genes %in% g)], g] <- -prec_matrices[["sgl"]][g, g] * BETA_sgl[, g][genes[!(genes %in% g)]]  # minus is important :)

    prec_matrices[["gglasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, genes[!(genes %in% g)]] %*% BETA_gglasso[, g][genes[!(genes %in% g)]])^2)
    prec_matrices[["gglasso"]][genes[!(genes %in% g)], g] <- -prec_matrices[["gglasso"]][g, g] * BETA_gglasso[, g][genes[!(genes %in% g)]]  # minus is important :)

    prec_matrices[["lasso"]][g, g] <- nobs/sum((geneexpr_train[, g] - geneexpr_train[, genes[!(genes %in% g)]] %*% BETA_lasso[, g][genes[!(genes %in% g)]])^2)
    prec_matrices[["lasso"]][genes[!(genes %in% g)], g] <- -prec_matrices[["lasso"]][g, g] * BETA_lasso[, g][genes[!(genes %in% g)]]  # minus is important :)

  }
  prederror <- prederror/length(genes[!(genes%in%TFs)])
  for (m in methods) {
    precm_errors[m] <- norm(prec_matrix-prec_matrices[[m]], "1")/normPM
  } 

  gr_cl_rd[[1]] <- list(TPR=tpr, FPR=fpr, Prec=precision, AUROC=AUROC, AUPR=AUPR, prederror=prederror, precm_errors=precm_errors, times=times)

  save(gr_cl_rd, file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/net_gr_cl_rd_", b, ".RData"))

  load(file=paste0("/project/dogss/tests_thesis/02_networkreconstruction/net_onetwothree_", b, ".RData"))

  list(onetwothree=onetwothree, gr_cl_rd=gr_cl_rd)
}
stopCluster(myCluster)
save.image(file="/project/dogss/tests_thesis/02_networkreconstruction.RData")
  # load("~/Programme/dogss/testing/sim2/finalresults_networksim.RData")
  B <- 100
  for (i in 1:2) {
    auroc <- matrix(0, nrow=B, ncol=5, dimnames=list(run=1:100, method=c("dogss", "ssep", "sgl", "gglasso", "lasso")))
    aupr <- auroc
    prederror <- auroc

    for (b in 1:B) {
      load(paste0("/project/dogss/tests_thesis/02_networkreconstruction_qpgraph/02_networkreconstruction/net_onetwothree_", b, ".RData"))
      auroc[b, ] <- onetwothree[[i]]$AUROC
      aupr[b, ] <- onetwothree[[i]]$AUPR
      prederror[b, ] <- onetwothree[[i]]$prederror
    }

    data <- data.frame(melt(auroc), measure="auroc")
    data <- rbind(data, data.frame(melt(aupr), measure="aupr"))
    data <- rbind(data, data.frame(melt(prederror), measure="pred. error"))
    data$method <- factor(data$method, ordered=TRUE, levels=c("dogss", "ssep", "sgl", "gglasso", "lasso"))

    auroc_data <- subset(data, measure=="auroc")
    P_auroc <- ggplot(auroc_data, aes(x=method, y=value)) +
      geom_boxplot() +
      theme_Ed() +
      scale_colour_Ed() +
      ylim(0,1) +
      labs(y="AUROC") +
      theme(axis.title.x=element_blank(),
            axis.text.x=element_blank(),
            axis.ticks.x=element_blank())
    Pd_auroc <- ggplot_build(P_auroc)$data[[1]]
    P_auroc <- P_auroc + geom_segment(data=Pd_auroc, aes(x=xmin, xend=xmax, y=middle, yend=middle, colour=unique(auroc_data$method)), size=1, show.legend=TRUE) + labs(colour="method")

    aupr_data <- subset(data, measure=="aupr")
    P_aupr <- ggplot(aupr_data, aes(x=method, y=value)) +
      geom_boxplot() +
      theme_Ed() +
      scale_colour_Ed() +
      ylim(0,1) +
      labs(y="AUPR") +
      theme(axis.title.x=element_blank(),
            axis.text.x=element_blank(),
            axis.ticks.x=element_blank())
    Pd_aupr <- ggplot_build(P_aupr)$data[[1]]
    P_aupr <- P_aupr + geom_segment(data=Pd_aupr, aes(x=xmin, xend=xmax, y=middle, yend=middle, colour=unique(aupr_data$method)), size=1, show.legend=FALSE)

    prederror_data <- subset(data, measure=="pred. error")
    P_prederror <- ggplot(prederror_data, aes(x=method, y=value)) +
      geom_boxplot() +
      theme_Ed() +
      scale_colour_Ed() +
      ylim(0,1) +
      labs(y="pred. error") +
      theme(axis.title.x=element_blank(),
            axis.text.x=element_blank(),
            axis.ticks.x=element_blank())
    Pd_prederror <- ggplot_build(P_prederror)$data[[1]]
    P_prederror <- P_prederror + geom_segment(data=Pd_prederror, aes(x=xmin, xend=xmax, y=middle, yend=middle, colour=unique(prederror_data$method)), size=1, show.legend=FALSE)

    grid_arrange_shared_legend(P_auroc, P_aupr, P_prederror, ncol = 3, nrow = 1, which.legend = 1)

  }

different groupings

  # load("~/Programme/dogss/testing/sim2_grouping_tfs/extra_results_network_grouping.RData")
  # load("/project/dogss/testing/testing/sim2_new/finalresults_sim2_new_area.RData")
  # mydata$method <- factor(mydata$method, ordered=TRUE, levels=c("dogss", "ssep", "sgl", "gglasso", "lasso"))
  # mydata$grouping <- factor(mydata$grouping, ordered=TRUE, levels=c("original", "kmeans", "random"))
  # mydata <- subset(mydata, !(method %in% c("ssep", "lasso") & grouping %in% c("kmeans", "random")))

  auroc <- data.frame(value=numeric(), method=character(), grouping=character())
  aupr <- data.frame(value=numeric(), method=character(), grouping=character())
  prederror <- data.frame(value=numeric(), method=character(), grouping=character())

  method <- c("dogss", "ssep", "sgl", "gglasso", "lasso")

  B <- 100

  for (b in 1:B) {
    load(paste0("/project/dogss/tests_thesis/02_networkreconstruction_qpgraph/02_networkreconstruction/small_net_whole_", b, ".RData"))

    for (m in 1:length(method)) {
      auroc <- rbind(auroc, data.frame(value=whole[[1]]$AUROC[m], method=method[m], grouping="original"))
      aupr <- rbind(aupr, data.frame(value=whole[[1]]$AUPR[m], method=method[m], grouping="original"))
      prederror <- rbind(prederror, data.frame(value=whole[[1]]$prederror[m], method=method[m], grouping="original"))
      if (m%in%c(1,3,4)) {
        auroc <- rbind(auroc, data.frame(value=whole[[2]]$AUROC[m], method=method[m], grouping="random"))
        aupr <- rbind(aupr, data.frame(value=whole[[2]]$AUPR[m], method=method[m], grouping="random"))
        prederror <- rbind(prederror, data.frame(value=whole[[2]]$prederror[m], method=method[m], grouping="random"))
      }

    }

  }

  plot_auroc <- ggplot(aes(y = value, x = method, fill = grouping), data = auroc) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("AUROC")
  plot_aupr <- ggplot(aes(y = value, x = method, fill = grouping), data = aupr) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("AUPR")
  plot_prederror <- ggplot(aes(y = value, x = method, fill = grouping), data = prederror) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("pred. error")

  grid_arrange_shared_legend(plot_auroc, plot_aupr, plot_prederror, ncol = 3, nrow = 1, which.legend = 1)

  ### large networks
  auroc <- data.frame(value=numeric(), method=character(), grouping=character())
  aupr <- data.frame(value=numeric(), method=character(), grouping=character())
  prederror <- data.frame(value=numeric(), method=character(), grouping=character())

  method <- c("dogss", "ssep", "sgl", "gglasso", "lasso")

  B <- 100

  for (b in 1:B) {
    load(paste0("/project/dogss/tests_thesis/02_networkreconstruction_qpgraph/02_networkreconstruction/net_whole_", b, ".RData"))

    for (m in 1:length(method)) {
      auroc <- rbind(auroc, data.frame(value=whole[[1]]$AUROC[m], method=method[m], grouping="original"))
      aupr <- rbind(aupr, data.frame(value=whole[[1]]$AUPR[m], method=method[m], grouping="original"))
      prederror <- rbind(prederror, data.frame(value=whole[[1]]$prederror[m], method=method[m], grouping="original"))
      if (m%in%c(1,3,4)) {
        auroc <- rbind(auroc, data.frame(value=whole[[2]]$AUROC[m], method=method[m], grouping="random"))
        aupr <- rbind(aupr, data.frame(value=whole[[2]]$AUPR[m], method=method[m], grouping="random"))
        prederror <- rbind(prederror, data.frame(value=whole[[2]]$prederror[m], method=method[m], grouping="random"))
      }

    }

  }

  plot_auroc <- ggplot(aes(y = value, x = method, fill = grouping), data = auroc) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("AUROC")
  plot_aupr <- ggplot(aes(y = value, x = method, fill = grouping), data = aupr) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("AUPR")
  plot_prederror <- ggplot(aes(y = value, x = method, fill = grouping), data = prederror) + geom_boxplot(alpha=0.6) + theme_Ed() + ylim(0, 1) + scale_fill_Ed() + theme(axis.title.x=element_blank()) + ylab("pred. error")

  grid_arrange_shared_legend(plot_auroc, plot_aupr, plot_prederror, ncol = 3, nrow = 1, which.legend = 1)


edgarst/dogss documentation built on May 27, 2019, 3:29 p.m.