library(GGally)
library(ggplot2)
library(igraph)
library(purrr)
library(data.table)
library(stringr)
solver <- "cplexAPI"
numberOfTrial <- 15
bootstrapIteraction <- 100
nsuboptimalSol <- 5
devtools::load_all()
# reformat node score for MStT (not MWCS) and add Terminal attributes
fixedTerminalLymphomaGraph <- lymphomaGraph
V(fixedTerminalLymphomaGraph)$isTerminal <- FALSE
V(fixedTerminalLymphomaGraph)[nodeScore > 0]$isTerminal <- TRUE
V(fixedTerminalLymphomaGraph)$nodeScore <- -1


#nodeCentricSteinerForestProblem$new(fixedTerminalLymphomaGraph,
#                                    solverChoice = solver)$sampleMultipleBootstrapSteinerSolutions(nBootstraps = bootstrapIteraction, maxItr = nsuboptimalSol)$getBootstrapSolutionPoolGraphs()

set.seed(2345)

fwrite(data.table(version = character(),
                  time = character(),
                  vcount = integer(),
                  modulesVcount = list(),
                  Niteration = integer(),
                 trial = integer()),
              paste0("./results/steinForestSuboptimalVersion_",packageVersion("stoneTrees") ,"_Bench_Nbootstrap_", bootstrapIteraction, ".tsv"))

for(i in 1:numberOfTrial){

  print(i)

    time <- system.time( SteinForSolverX <- nodeCentricSteinerForestProblem$new(fixedTerminalLymphomaGraph,
                                                                               solverChoice = solver,
                                                                               verbose = TRUE,
                                                                               solverTrace = 0)$sampleMultipleBootstrapSteinerSolutions(nBootstraps = bootstrapIteraction,
                                                                                                                                        maxItr = nsuboptimalSol) )

    SolutionPool <- SteinForSolverX$getBootstrapSolutionPoolGraphs(collapseSols = FALSE)


    save(SolutionPool, file = paste0("./results/steinForestSuboptimalVersion_",packageVersion("stoneTrees"),"_Trial", i,".RData"))



    Sizes <- map_int(SolutionPool, vcount)

  fwrite(data.table(version = paste(packageVersion("stoneTrees")),
                    time = time["elapsed"],
                    vcount = vcount(SteinForSolverX$getBootstrapSolutionPoolGraphs()),
                    modulesVcount = list(Sizes[-length(Sizes)]),
                    Niteration = length(Sizes[-length(Sizes)]),
                    trial = i),
                paste0("./results/steinForestSuboptimalVersion_",packageVersion("stoneTrees") ,"_Bench_Nbootstrap_", bootstrapIteraction, ".tsv"), sep = "\t",quote = FALSE, append = TRUE)


}
detach("package:stoneTrees", unload=TRUE)

#nodeCentricSteinerForestProblem$new(fixedTerminalLymphomaGraph,
#                                    solverChoice = solver)$sampleMultipleBootstrapSteinerSolutions(nBootstraps = bootstrapIteraction, maxItr = nsuboptimalSol)$getBootstrapSolutionPoolGraphs()

library(stoneTrees)

set.seed(2345)

fwrite(data.table(version = character(),
                  time = character(),
                  vcount = integer(),
                  modulesVcount = list(),
                  Niteration = integer(),
                 trial = integer()),
              paste0("./results/steinForestSuboptimalVersion_",packageVersion("stoneTrees") ,"_Bench_Nbootstrap_", bootstrapIteraction, ".tsv"))

for(i in 1:numberOfTrial){

  print(i)

    time <- system.time( SteinForSolverX <- nodeCentricSteinerForestProblem$new(fixedTerminalLymphomaGraph,
                                                                               solverChoice = solver,
                                                                               verbose = TRUE,
                                                                               solverTrace = 0)$sampleMultipleBootstrapSteinerSolutions(nBootstraps = bootstrapIteraction,
                                                                                                                                        maxItr = nsuboptimalSol) )

   SolutionPool <- SteinForSolverX$getBootstrapSolutionPoolGraphs(collapseSols = FALSE)


    save(SolutionPool, file = paste0("./results/steinForestSuboptimalVersion_",packageVersion("stoneTrees"),"_Trial", i,".RData"))



    Sizes <- map_int(SolutionPool, vcount)

  fwrite(data.table(version = paste(packageVersion("stoneTrees")),
                    time = time["elapsed"],
                    vcount = vcount(SteinForSolverX$getBootstrapSolutionPoolGraphs()),
                    modulesVcount = list(Sizes[-length(Sizes)]),
                    Niteration = length(Sizes[-length(Sizes)]),
                    trial = i),
                paste0("./results/steinForestSuboptimalVersion_",packageVersion("stoneTrees") ,"_Bench_Nbootstrap_", bootstrapIteraction, ".tsv"), sep = "\t",quote = FALSE, append = TRUE)

}
benchLatestVersion <- fread(paste0("./results/steinForestSuboptimalVersion_1.2_Bench_Nbootstrap_", bootstrapIteraction, ".tsv")) %>%
  .[, `:=`(modulesVcount = strsplit(modulesVcount, "\\|"))]



benchEtxVersion <- fread(paste0("./results/steinForestSuboptimalVersion_1.0.26000_Bench_Nbootstrap_", bootstrapIteraction, ".tsv")) %>%
  .[, `:=`(modulesVcount = strsplit(modulesVcount, "\\|"))]
benchVersion <- rbind(benchLatestVersion, benchEtxVersion)

benchVersion[, .(TotalTime = sum(time)/60/60), by = .(version)] 

benchVersion[, .(AvSise = mean(vcount)), by = .(version)] 
 p1 <- ggplot(benchVersion, aes(x = version, y = time/60, colour = version)) +
  geom_boxplot() +
  geom_jitter() +
  scale_y_continuous( breaks = seq(0,100,5)) +
  theme_bw(base_size = 20) +
  theme(legend.position = "none")+
  labs(y = "Time (min)",
       title = "Time taken to solve Steiner forest problem")
p2 <- ggplot(benchVersion, aes(x = version, y = vcount, colour = version))+
  geom_jitter() +
  geom_boxplot() + 
  theme_bw(base_size = 20) +
  theme(legend.position = "none")+
  labs( y = "Aggregated module size",
       title = "Size of Steiner forest solution")
p3 <- benchVersion[,.(modulesVcount = unlist(modulesVcount)), by = .(version, time, vcount, Niteration, trial)] %>%

ggplot(., aes(x = version, y = as.numeric(modulesVcount), colour = version)) +
  geom_jitter() +
  geom_boxplot() + 
  theme_bw(base_size = 20) +
  theme(legend.position = "none")+
  labs(y = "vCount of all modules in the pool",
       title = "Individual tree solutions to resampled terminals")
p4 <- ggplot(benchVersion, aes(x = version, y = Niteration, colour = version)) +
  geom_boxplot() +
  geom_jitter() +
  theme_bw(base_size = 20) +
  theme(legend.position = "none")+
  labs(y = "Number of solutions",
       title = "Size of solution pool")
egg::ggarrange(p1,p2,p3,p4, top =  paste(numberOfTrial , "runs with", bootstrapIteraction, "bootstraps, collecting up to",  nsuboptimalSol, "degenerate solutions on each run."))
allSolutions <- list.files("./results", pattern = ".RData", full.names = TRUE)

allSolutions <- allSolutions[grepl("Version", allSolutions)]

allSolutionDT <- data.table(files = allSolutions)

allSolutionDT[,`:=`(graphPool = map(files, ~{ load(.x)
                                               SolutionPool[-length(SolutionPool)] } )), by = files]

#allSolutionDT[,`:=`(graphPool = map(files, ~{ load(.x)
#                                               SolutionPool } )), by = files]

allSolutionDT[,`:=`(poolSize = map(graphPool, length) %>% unlist,
                    collapsedGraph = map(graphPool, ~{ induced.subgraph(fixedTerminalLymphomaGraph, V(fixedTerminalLymphomaGraph)[map(.x, ~{ V(.x)$name }) %>% unlist() %>% unique()]) }),
                    version =  str_match(files, "Version_(.*?)_")%>% .[1,2],
                    trial = str_match(files, "_Trial(.*?).RData")%>% .[1,2]), by = files]

allSolutionDT[,`:=`(eigenCentralityVector = map(collapsedGraph, ~{ eigen_centrality(.x)$vector  } )), by = files]


allSolutionDT[,`:=`(nodesVector = map(collapsedGraph, ~{ V(.x)$name  } )), by = files]
eigenCentralityDT <- allSolutionDT[,.(eigenCentrality = unlist(eigenCentralityVector),
                 nodeID = names(unlist(eigenCentralityVector))), by = .(trial, version)]


eigenCentralityDT <- dcast(eigenCentralityDT,  nodeID+trial~version, value.var = "eigenCentrality", fill = 0) 

# New facet label names for dose variable
trial.labs <- str_c("trial ", eigenCentralityDT[,trial] %>% unique)
names(trial.labs) <- eigenCentralityDT[,trial] %>% unique


eigenCentralityDT %>%
  ggplot(aes(x = `1.0.26000` , y = `1.2`)) +
  geom_point(aes(colour =trial), alpha = 0.5 ) +
  facet_wrap(~trial, ncol = 5, labeller = labeller(trial = trial.labs))+
  #scale_x_continuous(breaks = seq(0,20)) +
  #scale_y_continuous(breaks = seq(0,20)) +
  #xlim(0, 15) + ylim(0, 15) + 
 labs(y = "stoneTrees Version 1.2", x = "stoneTrees Version 1.0.26000", title = "eigen_centrality") +

  theme_bw()+
  theme_bw(base_size = 20) +
  theme(legend.position = "none")
nodesDT <- allSolutionDT[,.(nodeID = unlist(nodesVector),
                            value = 1), by = .(trial, version)]

nodesDT <- dcast(nodesDT,  version+trial~nodeID, fill = 0) %>% .[, type :=  str_c(version, "_", trial)]

nodesMatrix <- as.matrix(nodesDT[,3:(ncol(nodesDT)-1)])
row.names(nodesMatrix) <- nodesDT[,type]

nodesDistance <- philentropy::distance(nodesMatrix, method = "manhattan", use.row.names = TRUE)

nodesDistance <- melt(nodesDistance)

setkey(nodesDistance, )

ProbMatrix <- rbind(1:10/sum(1:10), 20:29/sum(20:29),30:39/sum(30:39))
rownames(ProbMatrix) <- paste0("Example", 1:3)
GGscatterPlot <- function(data, mapping, ..., 
                        method = "spearman") {

#Get correlation coefficient
    x <- GGally::eval_data_col(data, mapping$x)
    y <- GGally::eval_data_col(data, mapping$y)

    cor <- cor(x, y, method = method)
#Assemble data frame
    df <- data.frame(x = x, y = y)
# PCA
    nonNull <- x!=0 & y!=0
    dfpc <- prcomp(~x+y, df[nonNull,])
    df$cols <- predict(dfpc, df)[,1]
# Define the direction of color range based on PC1 orientation:
    dfsum <- x+y
    colDirection <- ifelse(dfsum[which.max(df$cols)] < 
                               dfsum[which.min(df$cols)],
                           1,
                           -1)
#Get 2D density for alpha
    dens2D <- MASS::kde2d(df$x, df$y)
    df$density <- fields::interp.surface(dens2D , 
                                         df[,c("x", "y")])

if (any(df$density==0)) {
    mini2D = min(df$density[df$density!=0]) #smallest non zero value
    df$density[df$density==0] <- mini2D
}
#Prepare plot
    pp <- ggplot(df, aes(x=x, y=y, color = cols, alpha = 1/density)) +
                ggplot2::geom_point(shape=16, show.legend = FALSE) +
                ggplot2::scale_color_viridis_c(direction = colDirection) +
#                scale_color_gradient(low = "#0091ff", high = "#f0650e") +
                ggplot2::scale_alpha(range = c(.05, .6)) +
                ggplot2::geom_abline(intercept = 0, slope = 1, col="darkred") +
                #ggplot2::geom_label(
                #        data = data.frame(
                #                        xlabel = min(x, na.rm = TRUE),
                #                        ylabel = max(y, na.rm = TRUE),
                #                        lab = round(cor, digits = 3)),
                #        mapping = ggplot2::aes(x = xlabel, 
                 #                              y = ylabel, 
                #                               label = lab),
                #        hjust = 0, vjust = 1,
                #        size = 3, fontface = "bold",
                #        inherit.aes = FALSE # do not inherit anything from the ...
                #        ) +
                theme_minimal()

return(pp)
}
eigenCentralityDT <- allSolutionDT[,.(eigenCentrality = unlist(eigenCentralityVector),
                 nodeID = names(unlist(eigenCentralityVector))), by = .(trial, version)]

eigenCentralityDT <- dcast(eigenCentralityDT,  nodeID~version+trial, value.var = "eigenCentrality", fill = 0) 


GGally::ggpairs(eigenCentralityDT, 
                columns = 2:ncol(eigenCentralityDT), 
                lower = list(continuous = GGally::wrap(GGscatterPlot, method="pearson")),
                upper = list(continuous = wrap("cor", method= "pearson")))


adamsardar/stoneTrees documentation built on May 20, 2022, 7:38 p.m.