simulations_study/multivariate_exploitation.R

#############################################
## Exploitation several K
#############################################

##############
## Parameters and functions
##############

library(doParallel)
library(foreach)
library(PhylogeneticEM)
# require(ape)
# #require(glmnet) # For Lasso initialization
# # require(quadrupen) # For Lasso initialization
# require(robustbase) # For robust fitting of alpha and variance
# library(TreeSim) # For simulation of the tree
# require(ggplot2) # Plot
# library(scales) # plot
library(reshape2) # Plot
# library(grid) # Plot
library(plyr)
# library(capushe)

saveresultdir <- "../Results/Simulations_Multivariate"
saveresultname <- "multivariate_estimations_SUN_rBM"
datestamp_day_cut <- "2017-06-2"
# datestamp_day <- "2016-11-28"
ak <- ""# "alpha_known" # ""

files_list <- list.files(path = saveresultdir,
                         pattern = paste0(saveresultname, ak, "-", datestamp_day_cut),
                         full.names = TRUE)

###############################################################

######################
## Forgotten subtree.list
######################
datestamp_data <- "2017-06-20" #
savedatafile = "../Results/Simulations_Multivariate/multivariate_simlist"
load(paste0(savedatafile, "_", datestamp_data, "_light.RData"))

subtree.list <- vector(mode = "list");
for (nta in c(128, 32, 64, 96, 160, 192, 256)){
  subtree.list[[paste0(nta)]] <- PhylogeneticEM:::enumerate_tips_under_edges(trees[[paste0(nta)]])
}

##############
## Add least squares
##############
# U_trees <- lapply(trees, incidence.matrix.full)
# names(U_trees) <- names(trees)
# times_shareds <- lapply(trees, compute_times_ca)
# names(times_shareds) <- names(trees)
# 
# add_lsq <- function(X){
#   # tree_one <- trees[[paste0(X$sim$ntaxa)]]
#   # U_tree_one <- U_trees[[paste0(X$sim$ntaxa)]]
#   # times_shared_one <- times_shareds[[paste0(X$sim$ntaxa)]]
#   # fun <- function(params){
#   #   tmpsim <- PhylogeneticEM:::simulate_internal(phylo = tree_one,
#   #                                                process = "scOU",
#   #                                                p = attr(params, "p_dim"),
#   #                                                root.state = params$root.state,
#   #                                                shifts = params$shifts,
#   #                                                variance = params$variance,
#   #                                                optimal.value = params$optimal.value,
#   #                                                selection.strength = params$selection.strength,
#   #                                                simulate_random = FALSE,
#   #                                                U_tree = U_tree_one,
#   #                                                times_shared = times_shared_one)
#   #   ## Mean at tips with estimated parameters
#   #   m_Y_estim <- PhylogeneticEM:::extract_simulate_internal(tmpsim, 
#   #                                                           where="tips",
#   #                                                           what="expectations") 
#   # }
#   
#   nums <- grep("alpha_", names(X$res))
#   for (i in nums){
#     # m_Y_estim <- lapply(X$res[[i]]$params_estim, fun)
#     # X$res[[i]]$m_Y_estim_new <- m_Y_estim
#     # X$res[[i]]$results_summary$least_squares_raw <- sapply(m_Y_estim,
#     #                                                        function(z) sum((X$sim$Y_data - z)^2, na.rm = TRUE))
#     # X$res[[i]]$results_summary$least_squares_bis <- sapply(X$res[[i]]$m_Y_estim,
#     # function(z) sum((X$sim$Y_data - z)^2, na.rm = TRUE))
#     X$res[[i]]$results_summary$least_squares <- sapply(X$res[[i]]$params_estim, function(z) sum(diag(z$variance)/(2*z$selection.strength)))
#   }
#   return(X)
# }
# 
# fct_to_keep <- c(fct_to_keep, "U_trees", "add_lsq")
##############
## Model Selection
##############
# model_selection_tmp <- function(simres){
#   X <- simres$sim
#   res <- PhyloEM(phylo = trees[[paste0(X$ntaxa)]],
#                  Y_data = X$Y_data,
#                  process = "scOU",
#                  K_max = max(K_try[[paste0(X$ntaxa)]]) + 5,
#                  random.root = TRUE,
#                  stationary.root = TRUE,
#                  estimates = simres$res,
#                  method.selection = c("BGHlsq", "BGHml"))#, "BGHlsqraw", "BGHmlraw"))
#   res$alpha_min$results_summary$total_time <- res$alpha_max$results_summary$total_time
#   simres$res <- res
#   return(simres)
# }
# 
# # fct_to_keep <- c(fct_to_keep, "model_selection_tmp")
# 
add_total_times_min <- function(simres){
  simres$res$alpha_min$results_summary$total_time <- simres$res$alpha_max$results_summary$total_time
  simres$res$alpha_min$results_summary$sum_times <- simres$res$alpha_max$results_summary$sum_times
  simres$res$alpha_min_raw$results_summary$total_time <- simres$res$alpha_max$results_summary$total_time
  simres$res$alpha_min_raw$results_summary$sum_times <- simres$res$alpha_max$results_summary$sum_times
  return(simres)
}

######################
## Extract inferences with K_true or K_select
######################

## Extract the results where K_try = K_e (K_true or something else)
extract_result <- function(simest, Ke){
  K_try <- 0:(length(simest$res$alpha_max$params_estim) - 1)
  if (Ke >= length(simest$res$alpha_max$params_estim)) return(NULL)
  fun <- function(l){
    if (Ke %in% names(l)) return(l[[paste(Ke)]])
    return(l)
  }
  res <- lapply(simest$res$alpha_max[c("params_estim",
                                       #"Yhat", "Zhat", "Yvar", "Zvar",
                                       "edge.quality",
                                       "params_raw", "params_init_estim", "m_Y_estim")], fun)
  res$results_summary  <-  subset(simest$res$alpha_max$results_summary, K_try == Ke)
  return(list(sim = simest$sim,
              res = res))
}

extract_result_min <- function(simest, Ke){
  K_try <- 0:(length(simest$res$alpha_min$params_estim) - 1)
  if (Ke >= length(simest$res$alpha_min$params_estim)) return(NULL)
  fun <- function(l){
    if (Ke %in% names(l)) return(l[[paste(Ke)]])
    return(l)
  }
  res <- lapply(simest$res$alpha_min[c("params_estim",
                                       #"Yhat", "Zhat", "Yvar", "Zvar",
                                       "edge.quality",
                                       "params_raw", "params_init_estim", "m_Y_estim")], fun)
  res$results_summary  <-  subset(simest$res$alpha_min$results_summary, K_try == Ke)
  return(list(sim = simest$sim,
              res = res))
}

## Extract K_true
extract_result_K_true <- function(simest){
  K_true <- simest$sim$K
  simest <- extract_result(simest, K_true)
  if (!is.null(simest)) simest$res$results_summary$K_select <- NA
  return(simest)
}

# simests_K_true <-  as.name(paste0("simestimations", ak, "_K_true"))
# assign(paste(simests_K_true), lapply(eval(simests), extract_result_K_true))

## Extract K_select
extract_result_K_select <- function(simest, method = "BGH"){
  if (!is.null(simest$res$alpha_max[[method]])){
    K_select <- length(simest$res$alpha_max[[method]]$params_select$shifts$edges)
    simest <- extract_result(simest, K_select)
  } else {
    K_select <- length(simest$res$alpha_min[[method]]$params_select$shifts$edges)
    simest <- extract_result_min(simest, K_select)
  }
  simest$res$results_summary$K_select <- K_select
  return(simest)
  #   res <- simest$res$alpha_max[[method]]
  #   res$params_estim <- res$params_select
  #   return(list(sim = simest$sim,
  #               res = res))
}

# nms <- names(eval(simests)[[1]]$res$alpha_max)
# method_sels <- nms[c(grep("DDSE", nms),
#                      grep("Djump", nms),
#                      grep("BGH", nms))]
# 
# simests_K_select <-  as.name(paste0("simestimations", ak, "_K_select"))
# for (meth in method_sels){
#   assign(paste0(simests_K_select, "_", meth),
#          lapply(eval(simests), extract_result_K_select, method = meth))
# }
# 
# rm(list = c(paste(simests), "simests"))
# gc()

######################
## Add simu parameters to summary
######################

add_simu_params <- function(simest){
  for (pp in colnames(simparams)){
    simest$res$results_summary[[pp]] <- simest$sim[[pp]]
  }
  #   simest$res$results_summary$alpha <- simest$sim$alpha
  #   simest$res$results_summary$gamma <- simest$sim$gamma
  #   simest$res$results_summary$K <- simest$sim$K
  #   simest$res$results_summary$s_noise <- simest$sim$s_noise
  #   simest$res$results_summary$n <- simest$sim$n
  #   simest$res$results_summary$ntaxa <- simest$sim$ntaxa
  #   simest$res$results_summary$grp <- simest$sim$grp
  simest$res$results_summary$correlated_base <- (simest$sim$rd == r_base[2] && simest$sim$grp %in% c("K_var", "base"))
  simest$res$results_summary$difficulty <- simest$sim$difficulty
  simest$res$results_summary$log_likelihood.true <- simest$sim$log_likelihood.true
  simest$res$results_summary$it <- simest$sim$it
  simest$res$results_summary$manova <- simest$sim$manova
  return(simest)
}

add_simu_params_l1ou <- function(simest){
  if (simest$sim$NA_per > 0) return(NULL) # Drop missing values tests.
  for (pp in colnames(simparams)){
    simest$res$results_summary[[pp]] <- simest$sim[[pp]]
  }
  simest$res$results_summary$correlated_base <- (simest$sim$rd == r_base[2] && simest$sim$grp %in% c("K_var", "base"))
  simest$res$results_summary$difficulty <- simest$sim$difficulty
  simest$res$results_summary$log_likelihood.true <- simest$sim$log_likelihood.true
  simest$res$results_summary$it <- simest$sim$it
  ## Format results
  simest$res$params_estims <- list(shifts = simest$res$params_estims$shifts,
                                   variance = diag(simest$res$sigma2),
                                   selection.strength = diag(simest$res$alpha),
                                   optimal.value = simest$res$intercept,
                                   root.state = list(random = TRUE,
                                                     stationary.root = TRUE,
                                                     value.root = NA,
                                                     exp.root = simest$res$intercept,
                                                     var.root = diag(simest$res$sigma2/(2*simest$res$alpha))))
  return(simest)
}

# assign(paste(simests_K_true), lapply(eval(simests_K_true), add_simu_params))
# for (meth in method_sels){
#   assign(paste0(simests_K_select, "_", meth),
#          lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_simu_params))
# }
# assign(paste(simests_K_select), lapply(eval(simests_K_select), add_simu_params))

###########################################
## Scores involving lists
## (Compute complex score and add them to result summary for each element of the list)
###########################################
TP <- function(tr.edges, est.edges) { # True Positives
  if (is.null(tr.edges) ) return(0) # If no true shift at all, any shift found is misplaced
  if (is.null(est.edges)) return(0) # If no estimated shift, TP = 0
  return(sum(est.edges %in% tr.edges))
}

FP <- function(tr.edges, est.edges){ # False Positives
  return(length(est.edges) - TP(tr.edges, est.edges))
}

FN <- function(tr.edges, est.edges){ # False Negatives
  return(length(tr.edges) - TP(tr.edges, est.edges))
}

TN <- function(tr.edges, est.edges, nedges){ # True Negatives
  edges <- 1:nedges
  if (is.null(est.edges)) est.edges <- nedges + 1
  if (is.null(tr.edges)) tr.edges <- nedges + 1
  return(sum(edges[-est.edges] %in% edges[-tr.edges]))
}

sensitivity <- function(tr.edges, est.edges) { # Sensibilité
  # if (is.null(tr.edges) ) return(NA) # If no true shift at all, any shift found is misplaced
  if (is.null(tr.edges) ) return(1) # If no true shift at all, we always find all of them
  if (is.null(est.edges)) return(0) # If no estimated shift, TP = 0
  return(sum(est.edges %in% tr.edges)/length(tr.edges))
}

specificity <- function(tr.edges, est.edges, nedges) { # spécificité : TN/N
  edges <- 1:nedges
  if (is.null(est.edges)) est.edges <- nedges + 1
  if (is.null(tr.edges)) tr.edges <- nedges + 1
  return(sum(edges[-est.edges] %in% edges[-tr.edges])/length(edges[-tr.edges]))
}

precision <- function(tr.edges, est.edges) { # Precision
  if (is.null(tr.edges) ) return(0) # If no true shift at all, any shift found is misplaced
  if (is.null(est.edges)) return(1) # If no estimated shift, we can't go wrong
  # if (is.null(est.edges)) return(NA) # If no estimated shift, TP = 0
  return(sum(est.edges %in% tr.edges)/length(est.edges))
}

match.partitions <- function(tr, est, nta) {
  tree <- trees[[paste0(nta)]]
  part.list <- subtree.list[[paste0(nta)]]
  ## computes ARI between partitions induced by edges in tr$edges and est$edges
  #   if (is.null(est$edges)){
  #     return(-1) # If lasso initialization failed
  #   } else {
  require(mclust)
  part1 <- clusters_from_shifts(tree, tr$edges, part.list)
  part2 <- clusters_from_shifts(tree, est$edges, part.list)
  return(adjustedRandIndex(part1, part2))        
  #   }
}

euclidian_distance <- function(m1, m2){
  return(sum((m2 - m1)^2))
}

RMSE <- function(y, true.value) {
  return(sqrt(mean((y - true.value)^2)))
}

extract_diag <- function(mat){
  if (is.vector(mat)){
    return(rep(mat, p_base))
  } else {
    return(diag(as.matrix(mat)))
  }
}

diag_RMSE <- function(mat_eval, mat_true){
  return(RMSE(extract_diag(mat_eval), extract_diag(mat_true)))
}

extract_off_diag <- function(mat){
  if (is.vector(mat)){
    return(rep(0, p_base*(p_base-1)/2))
  } else {
    mat <- as.matrix(mat)
    return(mat[upper.tri(mat)])
  }
}

offdiag_RMSE <- function(mat_eval, mat_true){
  return(RMSE(extract_off_diag(mat_eval), extract_off_diag(mat_true)))
}

compute_list_scores_element <- function(simest){
  if (is.null(simest)){ # || !simest$results_summary$CV_estim){
    #     return(data.frame(mean.edge.quality = NA, edge.recovery = NA,
    #                       edge.init.recovery = NA, adjrandind = NA, adjrandind.init = NA))
    return(NULL)
  } else {
    df <- data.frame(
      # mean.edge.quality = mean(simest$edge.quality),
      sensitivity = sensitivity(simest$sim$shifts$edges,
                                simest$res$params_estim$shifts$edges),
      sensitivity.init = sensitivity(simest$sim$shifts$edges,
                                     simest$res$params_init_estim$shifts$edges),
      specificity = specificity(simest$sim$shifts$edges,
                                simest$res$params_estim$shifts$edges,
                                nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
      specificity.init = specificity(simest$sim$shifts$edges,
                                     simest$res$params_init_estim$shifts$edges,
                                     nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
      precision = precision(simest$sim$shifts$edges,
                            simest$res$params_estim$shifts$edges),
      precision.init = precision(simest$sim$shifts$edges,
                                 simest$res$params_init_estim$shifts$edges),
      adjrandind = match.partitions(simest$sim$shifts, 
                                    simest$res$params_estim$shifts,
                                    simest$sim$ntaxa),
      adjrandind.init = match.partitions(simest$sim$shifts, 
                                         simest$res$params_init_estim$shifts,
                                         simest$sim$ntaxa),
      # distance_means = euclidian_distance(simest$sim$m_Y_true,
      #                                     simest$res$m_Y_estim),
      TP = TP(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges),
      TP.init = TP(simest$sim$shifts$edges,
                   simest$res$params_init_estim$shifts$edges),
      FP = FP(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges),
      FP.init = FP(simest$sim$shifts$edges,
                   simest$res$params_init_estim$shifts$edges),
      FN = FN(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges),
      FN.init = FN(simest$sim$shifts$edges,
                   simest$res$params_init_estim$shifts$edges),
      TN = TN(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges,
              nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
      TN.init = TN(simest$sim$shifts$edges,
                   simest$res$params_init_estim$shifts$edges,
                   nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
      gamma_diag_RMSE = diag_RMSE(simest$res$params_estim$root.state$var.root,
                                  simest$sim$params_simu$root.state$var.root),
      gamma_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$root.state$var.root,
                                        simest$sim$params_simu$root.state$var.root),
      alpha_diag_RMSE = diag_RMSE(simest$res$params_estim$selection.strength,
                                  simest$sim$params_simu$selection.strength),
      alpha_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$selection.strength,
                                        simest$sim$params_simu$selection.strength),
      sigma_diag_RMSE = diag_RMSE(simest$res$params_estim$variance,
                                  simest$sim$params_simu$variance),
      sigma_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$variance,
                                        simest$sim$params_simu$variance)
      # gamma_estim = as.vector(simest$res$params_estim$variance) / (2 * as.vector(simest$res$params_estim$selection.strength))
    )
    simest$res$results_summary <- cbind(simest$res$results_summary, df)
    return(simest)
  }
}

compute_list_scores_element_l1ou <- function(simest){
  if (is.null(simest)){ # || !simest$results_summary$CV_estim){
    #     return(data.frame(mean.edge.quality = NA, edge.recovery = NA,
    #                       edge.init.recovery = NA, adjrandind = NA, adjrandind.init = NA))
    return(NULL)
  } else {
    df <- data.frame(
      # mean.edge.quality = mean(simest$edge.quality),
      sensitivity = sensitivity(simest$sim$shifts$edges,
                                simest$res$params_estim$shifts$edges),
      sensitivity.init = NA,
      specificity = specificity(simest$sim$shifts$edges,
                                simest$res$params_estim$shifts$edges,
                                nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
      specificity.init = NA,
      precision = precision(simest$sim$shifts$edges,
                            simest$res$params_estim$shifts$edges),
      precision.init = NA,
      adjrandind = match.partitions(simest$sim$shifts, 
                                    simest$res$params_estim$shifts,
                                    simest$sim$ntaxa),
      adjrandind.init = NA,
      #       distance_means = euclidian_distance(simest$sim$m_Y_true,
      #                                           simest$res$m_Y_estim),
      TP = TP(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges),
      TP.init = NA,
      FP = FP(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges),
      FP.init = NA,
      FN = FN(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges),
      FN.init = NA,
      TN = TN(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges,
              nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
      TN.init = NA,
      gamma_diag_RMSE = diag_RMSE(simest$res$params_estim$root.state$var.root,
                                  simest$sim$params_simu$root.state$var.root),
      gamma_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$root.state$var.root,
                                        simest$sim$params_simu$root.state$var.root),
      alpha_diag_RMSE = diag_RMSE(simest$res$params_estim$selection.strength,
                                  simest$sim$params_simu$selection.strength),
      alpha_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$selection.strength,
                                        simest$sim$params_simu$selection.strength),
      sigma_diag_RMSE = diag_RMSE(simest$res$params_estim$variance,
                                  simest$sim$params_simu$variance),
      sigma_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$variance,
                                        simest$sim$params_simu$variance)
      # gamma_estim = as.vector(simest$res$params_estim$variance) / (2 * as.vector(simest$res$params_estim$selection.strength))
    )
    simest$res$results_summary <- cbind(simest$res$results_summary, df)
    return(simest)
  }
}

# assign(paste(simests_K_true), 
#        lapply(eval(simests_K_true), compute_list_scores_element))
# for (meth in method_sels){
#   assign(paste0(simests_K_select, "_", meth),
#          lapply(eval(as.name(paste0(simests_K_select, "_", meth))), compute_list_scores_element))
# }
# assign(paste(simests_K_select), 
#        lapply(eval(simests_K_select), compute_list_scores_element))

######################
## Add Matrix Estims
######################
add_name <- function(simest, name, nbr, tmp_true, tmp_eval){
  names(tmp_eval) <- paste0(name, "_estim_", 1:nbr)
  names(tmp_true) <- paste0(name, "_true_", 1:nbr)
  simest$res$results_summary <- cbind(simest$res$results_summary, 
                                      t(tmp_true), t(tmp_eval))
  return(simest)
}

add_diag <- function(simest){
  simest <- add_name(simest, "sigma_diag", p_base,
                     extract_diag(simest$sim$params_simu$variance),
                     extract_diag(simest$res$params_estim$variance))
  simest <- add_name(simest, "alpha_diag", p_base,
                     extract_diag(simest$sim$params_simu$selection.strength),
                     extract_diag(simest$res$params_estim$selection.strength))
  simest <- add_name(simest, "gamma_diag", p_base,
                     extract_diag(simest$sim$params_simu$root.state$var.root),
                     extract_diag(simest$res$params_estim$root.state$var.root))
  return(simest)
}

add_offdiag <- function(simest){
  simest <- add_name(simest, "sigma_offdiag", p_base*(p_base-1)/2,
                     extract_off_diag(simest$sim$params_simu$variance),
                     extract_off_diag(simest$res$params_estim$variance))
  simest <- add_name(simest, "alpha_offdiag", p_base*(p_base-1)/2,
                     extract_off_diag(simest$sim$params_simu$selection.strength),
                     extract_off_diag(simest$res$params_estim$selection.strength))
  simest <- add_name(simest, "gamma_offdiag", p_base*(p_base-1)/2,
                     extract_off_diag(simest$sim$params_simu$root.state$var.root),
                     extract_off_diag(simest$res$params_estim$root.state$var.root))
  return(simest)
}


# assign(paste(simests_K_true), 
#        lapply(eval(simests_K_true), add_diag))
# for (meth in method_sels){
#   assign(paste0(simests_K_select, "_", meth),
#          lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_diag))
# }
# 
# assign(paste(simests_K_true), 
#        lapply(eval(simests_K_true), add_offdiag))
# for (meth in method_sels){
#   assign(paste0(simests_K_select, "_", meth),
#          lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_offdiag))
# }

######################
## Format the data
######################

extract_data_frame <- function(simests){
  results_summary <- lapply(simests, function(z) return(z$res$results_summary))
  results_summary <- as.data.frame(do.call(rbind, results_summary))
  #   dd <- do.call(rbind, simests)
  #   dd <- do.call(rbind, dd[, "res"])
  #   results_summary <- as.data.frame(do.call(rbind, dd[,"results_summary"]))
  return(results_summary)
}

# res_sum <-  as.name(paste0("results_summary", ak))
# res_sum_K_true <-  as.name(paste0("results_summary", ak, "_K_true"))
# res_sum_K_select <-  as.name(paste0("results_summary", ak, "_K_select"))
# res_sum_K_select_true <-  as.name(paste0("results_summary", ak, "_K_select_true"))
# 
# # assign(paste(res_sum), extract_data_frame(eval(simests)))
# assign(paste(res_sum_K_true), extract_data_frame(eval(simests_K_true)))
# for (meth in method_sels){
#   assign(paste0(res_sum_K_select, "_", meth),
#          extract_data_frame(eval(as.name(paste0(simests_K_select, "_", meth)))))
# }
# # assign(paste(res_sum_K_select), extract_data_frame(eval(simests_K_select)))
# 
# rm(list = c(paste(simests_K_true), "simests_K_true",
#             paste0(simests_K_select, "_", method_sels)))
# gc()
# 
# assign(paste0(res_sum_K_true),
#        transform(eval(res_sum_K_true), K_type = "K_true"))
# for (meth in method_sels){
#   assign(paste0(res_sum_K_select, "_", meth),
#          transform(eval(as.name(paste0(res_sum_K_select, "_", meth))),
#                    K_type = paste0("K_select_", meth)))
# }
# # assign(paste0(res_sum_K_select),
# #        transform(eval(res_sum_K_select), K_type = "K_select"))
# assign(paste(res_sum_K_select_true), eval(res_sum_K_true))
# for (meth in method_sels){
#   assign(paste(res_sum_K_select_true), rbind(eval(res_sum_K_select_true),
#                                              eval(as.name(paste0(res_sum_K_select, "_", meth)))))
# }

# times_temp <- eval(as.name(paste0(res_sum_K_select, "_", method_sels[1])))$total_time
# 
# comp_time <-  as.name(paste0("computation_time", ak))
# assign(paste(comp_time), 
#        sum(times_temp))
# 
# m_time <-  as.name(paste0("mean_time", ak))
# assign(paste(m_time), 
#        mean(times_temp))
# 
# rm(times_temp)


######################################################################
## Sumarizing Function
######################################################################
## Simple Scores
RMSE <- function(y, true.value) {
  return(sqrt(mean((y - true.value)^2)))
}

compute_scores <- function(res_sum){
  # Combinations of "true" parameters (no n)
  # Aggregation of parameter estimates conditonal upon convergence of EM procedure
  return(ddply(subset(res_sum, CV_estim == 1),
               # .(alpha, gamma, K, s_noise, ntaxa, grp),
               as.quoted(colnames(simparams)[!(colnames(simparams) %in% c("gamma", "nrep"))]),
               summarize,
               ## Simple Scores
               #                alpha.mean = mean(alpha_estim),
               #                alpha.sd = sd(alpha_estim),
               #                alpha.RMSE = RMSE(alpha_estim, unique(alpha)),
               # alpha.init.RMSE = RMSE(alpha_estim_init, unique(alpha)),
               #                gamma.mean = mean(gamma_estim),
               #                gamma.sd = sd(gamma_estim),
               #                gamma.RMSE = RMSE(gamma_estim, unique(gamma)),
               # gamma.init.RMSE = RMSE(gamma_estim_init, unique(gamma)),
               #                beta_0.mean = mean(beta_0_estim),
               #                beta_0.sd = sd(beta_0_estim),
               #                beta_0.RMSE = RMSE(beta_0_estim, beta_0),
               # beta_0.init.RMSE = RMSE(beta_0_estim_init, beta_0),
               # lambda.mean = mean(lambda_estim),
               # lambda.sd = sd(lambda_estim),
               # lambda.RMSE = RMSE(lambda_estim, lambda),
               log_likelihood.mean = mean(log_likelihood),
               log_likelihood.mean.init = mean(log_likelihood_init),
               mean_number_new_shifts.mean = mean(mean_number_new_shifts),
               number_equivalent_solutions.mean = mean(number_equivalent_solutions),
               difficulty.mean = mean(difficulty),
               manova.mean = mean(manova),
               EM_steps.mean = mean(EM_steps, na.rm = TRUE),
               K.match = mean(K == K_try),
               K.match.leq = mean(K >= K_try),
               ## List Scores
               # mean.edge.quality.mean = mean(mean.edge.quality),
               sensitivity.mean = mean(sensitivity),
               sensitivity.init.mean = mean(sensitivity.init),
               specificity.mean = mean(specificity),
               specificity.init.mean = mean(specificity.init),
               precision.mean = mean(precision),
               precision.init.mean = mean(precision.init),
               mean.adjrandind = mean(adjrandind),
               mean.adjrandind.init = mean(adjrandind.init),
               # mean.distance_means = mean(distance_means),
               total_time.mean = mean(total_time)
  ))
}

compute_scores_l1ou <- function(res_sum){
  # Combinations of "true" parameters (no n)
  # Aggregation of parameter estimates conditonal upon convergence of EM procedure
  return(ddply(res_sum,
               # .(alpha, gamma, K, s_noise, ntaxa, grp),
               as.quoted(colnames(simparams)[!(colnames(simparams) %in% c("gamma", "nrep"))]),
               summarize,
               ## Simple Scores
               #                alpha.mean = mean(alpha_estim),
               #                alpha.sd = sd(alpha_estim),
               #                alpha.RMSE = RMSE(alpha_estim, unique(alpha)),
               # alpha.init.RMSE = RMSE(alpha_estim_init, unique(alpha)),
               #                gamma.mean = mean(gamma_estim),
               #                gamma.sd = sd(gamma_estim),
               #                gamma.RMSE = RMSE(gamma_estim, unique(gamma)),
               # gamma.init.RMSE = RMSE(gamma_estim_init, unique(gamma)),
               #                beta_0.mean = mean(beta_0_estim),
               #                beta_0.sd = sd(beta_0_estim),
               #                beta_0.RMSE = RMSE(beta_0_estim, beta_0),
               # beta_0.init.RMSE = RMSE(beta_0_estim_init, beta_0),
               # lambda.mean = mean(lambda_estim),
               # lambda.sd = sd(lambda_estim),
               # lambda.RMSE = RMSE(lambda_estim, lambda),
               log_likelihood.mean = mean(log_likelihood),
               # log_likelihood.mean.init = mean(log_likelihood_init),
               # mean_number_new_shifts.mean = mean(mean_number_new_shifts),
               number_equivalent_solutions.mean = mean(number_equivalent_solutions),
               # difficulty.mean = mean(difficulty),
               # EM_steps.mean = mean(EM_steps, na.rm = TRUE),
               # K.match = mean(K == K_try),
               # K.match.leq = mean(K >= K_try),
               ## List Scores
               # mean.edge.quality.mean = mean(mean.edge.quality),
               sensitivity.mean = mean(sensitivity),
               # sensitivity.init.mean = mean(sensitivity.init),
               specificity.mean = mean(specificity),
               # specificity.init.mean = mean(specificity.init),
               precision.mean = mean(precision),
               # precision.init.mean = mean(precision.init),
               mean.adjrandind = mean(adjrandind),
               # mean.adjrandind.init = mean(adjrandind.init),
               # mean.distance_means = mean(distance_means),
               total_time.mean = mean(total_time)
  ))
}

# sum_scores_K_true <-  as.name(paste0("summary_scores", ak, "_K_true"))
# sum_scores_K_select <-  as.name(paste0("summary_scores", ak, "_K_select"))
# sum_scores_K_select_true <-  as.name(paste0("summary_scores", ak, "_K_select_true"))
# 
# assign(paste(sum_scores_K_true), compute_scores(eval(res_sum_K_true)))
# for (meth in method_sels){
#   assign(paste0(sum_scores_K_select, "_", meth),
#          compute_scores(eval(as.name(paste0(res_sum_K_select, "_", meth)))))
# }
# # assign(paste(sum_scores_K_select), compute_scores(eval(res_sum_K_select)))
# 
# assign(paste0(sum_scores_K_true),
#        transform(eval(sum_scores_K_true), K_type = "K_true"))
# for (meth in method_sels){
#   assign(paste0(sum_scores_K_select, "_", meth),
#          transform(eval(as.name(paste0(sum_scores_K_select, "_", meth))),
#                    K_type = paste0("K_select_", meth)))
# }
# assign(paste(sum_scores_K_select_true), eval(sum_scores_K_true))
# for (meth in method_sels){
#   assign(paste(sum_scores_K_select_true), rbind(eval(sum_scores_K_select_true),
#                                                 eval(as.name(paste0(sum_scores_K_select, "_", meth)))))
# }
# 
# # save.image(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, "_heavy.RData"))
# 
# rm(list = c("comp_time", "m_time",
#             # "simparams_alpha", "simparams_base",
#             # "simparams_gamma", "simparams_K",
#             # paste(simests), "simests",
#             # paste(simests_K_true), "simests_K_true",
#             # paste(simests_K_select), "simests_K_select",
#             paste(res_sum_K_true), "res_sum_K_true",
#             paste0(res_sum_K_select, "_", method_sels)))
# 
# save.image(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))

######################
## Loop on inference files - PhyloEM
######################
res_sum_K_select_true_all <-  as.name(paste0("results_summary", ak, "_K_select_true_all"))
assign(paste(res_sum_K_select_true_all), NULL)
sum_scores_K_select_true_all <-  as.name(paste0("summary_scores", ak, "_K_select_true_all"))
assign(paste(sum_scores_K_select_true_all), NULL)

for (file in files_list){
  load(file)
  gc()
  ## simestimations
  ak <- ""#"alpha_known_"
  simests <- as.name(paste0("simestimations_", ak, inference.index))
  
  ## Add total times min
  assign(paste(simests), lapply(eval(simests), add_total_times_min))
  
  ## Add least squares
  # assign(paste(simests), lapply(eval(simests), add_lsq))
  
  ## Model Selection
  # assign(paste(simests), lapply(eval(simests), model_selection_tmp))

  ## Extract inferences with K_true or K_select
  simests_K_true <-  as.name(paste0("simestimations", ak, "_K_true"))
  assign(paste(simests_K_true), lapply(eval(simests), extract_result_K_true))
  nms <- c(names(eval(simests)[[1]]$res$alpha_max), names(eval(simests)[[1]]$res$alpha_min), names(eval(simests)[[1]]$res$alpha_min_raw))
  method_sels <- nms[c(grep("DDSE", nms),
                       grep("Djump", nms),
                       # grep("BGH", nms),
                       grep("BGHlsq", nms),
                       grep("BGHml", nms))]
                       # grep("BGHlsqraw", nms),
                       # grep("BGHmlraw", nms))]
  simests_K_select <-  as.name(paste0("simestimations", ak, "_K_select"))
  for (meth in method_sels){
    assign(paste0(simests_K_select, "_", meth),
           lapply(eval(simests), extract_result_K_select, method = meth))
  }
  rm(list = c(paste(simests), "simests"))
  gc()

  ## Add simu parameters to summary
  assign(paste(simests_K_true), lapply(eval(simests_K_true), add_simu_params))
  for (meth in method_sels){
    assign(paste0(simests_K_select, "_", meth),
           lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_simu_params))
  }

  ## Scores involving lists
  assign(paste(simests_K_true),
         lapply(eval(simests_K_true), compute_list_scores_element))
  for (meth in method_sels){
    assign(paste0(simests_K_select, "_", meth),
           lapply(eval(as.name(paste0(simests_K_select, "_", meth))), compute_list_scores_element))
  }

  ## Add Matrix Estims
  assign(paste(simests_K_true),
         lapply(eval(simests_K_true), add_diag))
  for (meth in method_sels){
    assign(paste0(simests_K_select, "_", meth),
           lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_diag))
  }
  assign(paste(simests_K_true),
         lapply(eval(simests_K_true), add_offdiag))
  for (meth in method_sels){
    assign(paste0(simests_K_select, "_", meth),
           lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_offdiag))
  }

  ## Format the data
  res_sum <-  as.name(paste0("results_summary", ak))
  res_sum_K_true <-  as.name(paste0("results_summary", ak, "_K_true"))
  res_sum_K_select <-  as.name(paste0("results_summary", ak, "_K_select"))
  res_sum_K_select_true <-  as.name(paste0("results_summary", ak, "_K_select_true"))
  assign(paste(res_sum_K_true), extract_data_frame(eval(simests_K_true)))
  for (meth in method_sels){
    assign(paste0(res_sum_K_select, "_", meth),
           extract_data_frame(eval(as.name(paste0(simests_K_select, "_", meth)))))
  }
  rm(list = c(paste(simests_K_true), "simests_K_true",
              paste0(simests_K_select, "_", method_sels)))
  gc()
  assign(paste0(res_sum_K_true),
         transform(eval(res_sum_K_true), K_type = "K_true"))
  for (meth in method_sels){
    assign(paste0(res_sum_K_select, "_", meth),
           transform(eval(as.name(paste0(res_sum_K_select, "_", meth))),
                     K_type = paste0("K_select_", meth)))
  }
  assign(paste(res_sum_K_select_true), eval(res_sum_K_true))
  for (meth in method_sels){
    assign(paste(res_sum_K_select_true), merge(eval(res_sum_K_select_true),
                                               eval(as.name(paste0(res_sum_K_select, "_", meth))),
                                               all = TRUE))
  }

  ## Sumarizing Function
  sum_scores_K_true <-  as.name(paste0("summary_scores", ak, "_K_true"))
  sum_scores_K_select <-  as.name(paste0("summary_scores", ak, "_K_select"))
  sum_scores_K_select_true <-  as.name(paste0("summary_scores", ak, "_K_select_true"))
  assign(paste(sum_scores_K_true), compute_scores(eval(res_sum_K_true)))
  for (meth in method_sels){
    assign(paste0(sum_scores_K_select, "_", meth),
           compute_scores(eval(as.name(paste0(res_sum_K_select, "_", meth)))))
  }
  assign(paste0(sum_scores_K_true),
         transform(eval(sum_scores_K_true), K_type = "K_true"))
  for (meth in method_sels){
    assign(paste0(sum_scores_K_select, "_", meth),
           transform(eval(as.name(paste0(sum_scores_K_select, "_", meth))),
                     K_type = paste0("K_select_", meth)))
  }
  assign(paste(sum_scores_K_select_true), eval(sum_scores_K_true))
  for (meth in method_sels){
    assign(paste(sum_scores_K_select_true), rbind(eval(sum_scores_K_select_true),
                                                  eval(as.name(paste0(sum_scores_K_select, "_", meth)))))
  }

  ## Merge all results together
  assign(paste(res_sum_K_select_true_all), rbind(eval(res_sum_K_select_true_all),
                                                 eval(res_sum_K_select_true)))
  assign(paste(sum_scores_K_select_true_all), rbind(eval(sum_scores_K_select_true_all),
                                                    eval(sum_scores_K_select_true)))

  ## Clean up
  rm(list = c(
    #"comp_time", "m_time",
    # "simparams_alpha", "simparams_base",
    # "simparams_gamma", "simparams_K",
    # paste(simests), "simests",
    # paste(simests_K_true), "simests_K_true",
    # paste(simests_K_select),
    "simests_K_select",
    paste(res_sum_K_true), "res_sum_K_true",
    paste0(res_sum_K_select, "_", method_sels),
    paste(sum_scores_K_true), "sum_scores_K_true",
    paste0(sum_scores_K_select, "_", method_sels),
    paste(res_sum_K_select_true), "res_sum_K_select_true",
    paste(sum_scores_K_select_true), "sum_scores_K_select_true"))
}

## Re-name correctly
res_sum_K_select_true <-  as.name(paste0("results_summary", ak, "_K_select_true"))
sum_scores_K_select_true <-  as.name(paste0("summary_scores", ak, "_K_select_true"))
assign(paste(res_sum_K_select_true), eval(res_sum_K_select_true_all))
assign(paste(sum_scores_K_select_true), eval(sum_scores_K_select_true_all))
rm(list = c(
  paste(res_sum_K_select_true_all), "res_sum_K_select_true_all",
  paste(sum_scores_K_select_true_all), "sum_scores_K_select_true_all"))

datestamp_day <- format(Sys.time(), "%Y-%m-%d")
save.image(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
    

# ######################
# ## Loop on inference files - l1ou
# ######################
# res_sum_all <-  as.name(paste0("results_summary_all"))
# assign(paste(res_sum_all), NULL)
# sum_scores_all <-  as.name(paste0("summary_scores_all"))
# assign(paste(sum_scores_all), NULL)
# 
# for (file in files_list){
#   load(file)
#   gc()
#   ## simestimations
#   simests <- as.name(paste0("simestimations_l1ou_", inference.index))
#   
#   ## Add simu parameters to summary
#   assign(paste(simests), lapply(eval(simests), add_simu_params_l1ou))
#   
#   ## Scores involving lists
#   assign(paste(simests), lapply(eval(simests), compute_list_scores_element_l1ou))
#   
#   # ## Add Matrix Estims
#   # assign(paste(simests), lapply(eval(simests), add_diag))
#   # assign(paste(simests), lapply(eval(simests), add_offdiag))
#   
#   ## Format the data
#   res_sum <-  as.name(paste0("results_summary"))
#   assign(paste(res_sum), extract_data_frame(eval(simests)))
#   rm(list = c(paste(simests), "simests"))
#   gc()
#   assign(paste0(res_sum), transform(eval(res_sum), K_type = "l1ou"))
#   
#   ## Sumarizing Function
#   sum_scores <-  as.name(paste0("summary_scores"))
#   assign(paste(sum_scores), compute_scores_l1ou(eval(res_sum)))
#   assign(paste0(sum_scores), transform(eval(sum_scores), K_type = "l1ou"))
#   
#   ## Merge all results together
#   assign(paste(res_sum_all), rbind(eval(res_sum_all), eval(res_sum)))
#   assign(paste(sum_scores_all), rbind(eval(sum_scores_all), eval(sum_scores)))
#   
#   ## Clean up
#   rm(list = c(
#     #"comp_time", "m_time",
#     # "simparams_alpha", "simparams_base",
#     # "simparams_gamma", "simparams_K",
#     # paste(simests), "simests",
#     # paste(simests_K_true), "simests_K_true",
#     # paste(simests_K_select),
#     paste(res_sum), "res_sum",
#     paste(sum_scores), "sum_scores"))
# }
# 
# ## Re-name correctly
# summary_scores_l1ou <- summary_scores_all
# results_summary_l1ou <- results_summary_all
# rm(list = c(
#   paste(res_sum_all), "res_sum_all",
#   paste(sum_scores_all), "sum_scores_all"))
# 
# save.image(paste0(saveresultdir, "/", saveresultname, "_aranged", ak, "-", datestamp_day, ".RData"))

# ######################
# ## New likelihood for missing
# ######################
# load("../Results/Simulations_Multivariate/multivariate_simlist_only_2016-07-26.RData")
# 
# # simulations2keep <- sapply(simlist, function(x) { x$nrep %in% 1:2 }, simplify = TRUE)
# # simlist <- simlist[simulations2keep]
# 
# change_log_lik <- function(simest, sim){
#   simest$sim$log_likelihood.true <- sim$log_likelihood.true
#   return(simest)
# }
# 
# assign(paste(simests), 
#        mapply(change_log_lik, eval(simests), simlist, SIMPLIFY = FALSE))

# #########################################################
# ## Model Selection
# #########################################################
# 
# model_selection_several_K <- function(ret){
#   if (length(names(simestimations_alpha_known[[1]]$res)) == 5){ # alpha known
#     alpha_grid <- ret$sim$alpha
#   } else {
#     alpha_grid <- find_grid_alpha(trees[[paste0(ret$sim$ntaxa)]],
#                                   nbr_alpha = 10,
#                                   factor_up_alpha = 2,
#                                   factor_down_alpha = 3,
#                                   quantile_low_distance = 0.001,
#                                   log_transform = TRUE)
#   }
#   res <- PhyloEM(phylo = trees[[paste0(ret$sim$ntaxa)]],
#                  Y_data = ret$sim$Y_data,
#                  process = "scOU",
#                  K_max = max(K_try[[paste0(ret$sim$ntaxa)]]),
#                  estimates = ret$res,
#                  random.root = FALSE,
#                  alpha = alpha_grid,
#                  save_step = FALSE,
#                  Nbr_It_Max = 2000,
#                  tol = list(variance = 10^(-2), 
#                             value.root = 10^(-2),
#                             log_likelihood = 10^(-2)),
#                  method.init = "lasso",
#                  use_previous = FALSE,
#                  method.selection = "BGH")
#   res <- add_total_time(res)
#   res <- enlight_res(res)
#   ret <- list(sim = ret$sim,
#               res = res)
#   return(ret)
# }
# 
# enlight_res <- function(res){
#   lres <- vector("list", 4)
#   lres[1:3] <- res[1:3]
#   lmax <- res$alpha_max$BGH[c("params_select", "params_raw", "params_init_estim",
#                               "results_summary",
#                               # "Yhat", "Zhat", "Yvar", "Zvar",
#                               "m_Y_estim", "edge.quality")]
#   lres$alpha_max <- res$alpha_max
#   lres$alpha_max$BGH <- lmax
#   return(lres)
# }
# 
# add_total_time <- function(res){
#   tot_time <- sum(sapply(res[-c(1, 2, 3, length(res))],
#                          function(z) z$results_summary$time))
#   res$alpha_max$results_summary$total_time <- tot_time
#   res$alpha_max$BGH$results_summary$total_time <- tot_time
#   res$alpha_max$BGH$results_summary <- as.data.frame(res$alpha_max$BGH$results_summary)
#   return(res)
# }
# 
# simests <- as.name(paste0("simestimations", ak))

# reqpckg <- c("ape", "glmnet", "robustbase")
# 
# tmp <- foreach(i = eval(simests), .packages = reqpckg) %do%
# {
#   model_selection_several_K(i)
# }
# 
# assign(paste(simests), tmp)
# rm(tmp)
# gc()
# 
# 
# save.image(paste0(saveresultfile, ak, "_", datestamp_day, "_selected.RData"))
# 
# # assign(paste(simests), lapply(eval(simests), add_K_select_to_list))
# 
# load(paste0(saveresultfile, ak, "_", datestamp_day, "_selected.RData"))
    
#########################################################
## Time computation
#########################################################
# add_total_time <- function(simest){
#   tot_time <- sum(sapply(simest$res[-c(1, 2, 3, length(simest$res))],
#                          function(z) z$results_summary$time))
#   simest$res$alpha_max$results_summary$total_time <- tot_time
#   simest$res$alpha_max$BGH$results_summary$total_time <- tot_time
#   simest$res$alpha_max$BGH$results_summary <- as.data.frame(simest$res$alpha_max$BGH$results_summary)
#   return(simest)
# }
# 
# assign(paste(simests), lapply(eval(simests), add_total_time))


# #########################################################
# ## Computation of means at tips
# #########################################################
# compute_mean_estim <- function(simest){
#   if (is.null(simest)){
#     return(NULL)
#   } else {
#   XX <- simulate_internal(phylo = trees[[paste(simest$ntaxa)]],
#                  process = "OU",
#                  root.state = simest$params_estim$root.state, 
#                  variance = simest$params_estim$variance,
#                  shifts = simest$params_estim$shifts, 
#                  selection.strength = simest$params_estim$selection.strength, 
#                  optimal.value = simest$params_estim$optimal.value)
#   m_Y_estim <- extract_simulate_internal(XX, where="tips", what="expectations")
#   simest$m_Y_estim <- m_Y_estim
#   return(simest)
#   }
# }
# 
# simestimations_alpha_known_K_true <- lapply(simestimations_alpha_known_K_true,
#                                             compute_mean_estim)
# simestimations_alpha_known_K_select<-lapply(simestimations_alpha_known_K_select,
#                                               compute_mean_estim)

# ######################
# ## Add lambda parameter
# ######################
# 
# add_lambda <- function(simest){
#   if (is.null(simest)) return(NULL)
#   simest$res$results_summary$lambda_estim <- simest$res$params_estim$lambda
#   simest$res$results_summary$lambda <- beta_0
#   return(simest)
# }
# 
# assign(paste(simests_K_true), lapply(eval(simests_K_true), add_lambda))
# for (meth in method_sels){
#   assign(paste0(simests_K_select, "_", meth),
#          lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_lambda))
# }
# # assign(paste(simests_K_select), lapply(eval(simests_K_select), add_lambda))


##################################################################
## Merge alpha known and unknown
##################################################################
rm(list = ls())

saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_SUN_rBM"

datestamp_day <- "2016-11-28" # 2015-03-24
ak <- ""
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))

datestamp_day <- "2016-11-15" # 2015-03-17
ak <- "_alpha_known"
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))

results_summary_K_select_true$alpha_known <- FALSE
results_summary_alpha_known_K_select_true$alpha_known <- TRUE
library(plyr)
results_summary_K_select_true <- rbind.fill(results_summary_K_select_true,
                                            results_summary_alpha_known_K_select_true)
rm(results_summary_alpha_known_K_select_true)

summary_scores_K_select_true$alpha_known <- FALSE
summary_scores_alpha_known_K_select_true$alpha_known <- TRUE
summary_scores_K_select_true <- rbind.fill(summary_scores_K_select_true,
                                      summary_scores_alpha_known_K_select_true)
rm(summary_scores_alpha_known_K_select_true)

datestamp_day <- format(Sys.time(), "%Y-%m-%d")
save.image(paste0(saveresultfile, "_aranged_both_alpha", "-", datestamp_day, ".RData"))

# ##################################################################
# ## Merge with lag init
# ##################################################################
# rm(list = ls())
# 
# saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_SUN_rBM"
# 
# datestamp_day <- "2016-07-13" # 2015-03-24
# ak <- "_alpha_known"
# load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
# results_summary_alpha_known_K_select_true_1 <- results_summary_alpha_known_K_select_true
# results_summary_alpha_known_K_select_true_1$K_lag_init <- FALSE
# summary_scores_alpha_known_K_select_true_1 <- summary_scores_alpha_known_K_select_true
# summary_scores_alpha_known_K_select_true_1$K_lag_init <- FALSE
# 
# datestamp_day <- "2016-07-23" # 2015-03-17
# ak <- "_alpha_known"
# load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
# results_summary_alpha_known_K_select_true_2 <- results_summary_alpha_known_K_select_true
# results_summary_alpha_known_K_select_true_2$K_lag_init <- TRUE
# summary_scores_alpha_known_K_select_true_2 <- summary_scores_alpha_known_K_select_true
# summary_scores_alpha_known_K_select_true_2$K_lag_init <- TRUE
# 
# results_summary_alpha_known_K_select_true <- rbind(results_summary_alpha_known_K_select_true_1,
#                                        results_summary_alpha_known_K_select_true_2)
# rm(results_summary_alpha_known_K_select_true_1,
#    results_summary_alpha_known_K_select_true_2)
# 
# summary_scores_alpha_known_K_select_true <- rbind(summary_scores_alpha_known_K_select_true_1,
#                                       summary_scores_alpha_known_K_select_true_2)
# rm(summary_scores_alpha_known_K_select_true_1,
#    summary_scores_alpha_known_K_select_true_2)
# 
# datestamp_day <- format(Sys.time(), "%Y-%m-%d")
# save.image(paste0(saveresultfile, "aranged_alpha_known_lag", "-", datestamp_day, ".RData"))


##################################################################
## Merge EM l1ou
##################################################################
rm(list = ls())
gc()

## EM
saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_SUN_rBM"
datestamp_day <- "2016-11-28"
ak <- "_both_alpha"
# ak <- ""
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_K_select_true$pPCA <- FALSE
summary_scores_K_select_true$pPCA <- FALSE

## l1OU
saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_l1ou"
datestamp_day <- "2016-10-31"
load(paste0(saveresultfile, "_aranged", "-", datestamp_day, ".RData"))
results_summary_l1ou$pPCA <- FALSE
summary_scores_l1ou$pPCA <- FALSE

## Merging
library(plyr)
results_summary_l1ou$alpha_known <- FALSE
results_summary_K_select_true_all <- rbind.fill(results_summary_K_select_true,
                                                results_summary_l1ou)
rm(results_summary_l1ou, results_summary_K_select_true)

summary_scores_l1ou$alpha_known <- FALSE
summary_scores_K_select_true_all <- rbind.fill(summary_scores_K_select_true,
                                               summary_scores_l1ou)
rm(summary_scores_l1ou, summary_scores_K_select_true)

## l1OU - PCA
saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_l1ou"
datestamp_day <- "2017-01-24"
load(paste0(saveresultfile, "_aranged", "-", datestamp_day, ".RData"))
# levels(results_summary_l1ou$K_type) <- "l1ou_PCA"
# levels(summary_scores_l1ou$K_type) <- "l1ou_PCA"
results_summary_l1ou$pPCA <- TRUE
summary_scores_l1ou$pPCA <- TRUE

results_summary_l1ou$alpha_known <- FALSE
results_summary_K_select_true_all <- rbind.fill(results_summary_K_select_true_all,
                                                results_summary_l1ou)
rm(results_summary_l1ou)

summary_scores_l1ou$alpha_known <- FALSE
summary_scores_K_select_true_all <- rbind.fill(summary_scores_K_select_true_all,
                                               summary_scores_l1ou)
rm(summary_scores_l1ou)

## EM - PCA
saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_SUN_rBM_pPCA"
datestamp_day <- "2017-04-20"
load(paste0(saveresultfile, "_aranged", "-", datestamp_day, ".RData"))
results_summary_K_select_true$pPCA <- TRUE
summary_scores_K_select_true$pPCA <- TRUE
results_summary_K_select_true$alpha_known <- FALSE
summary_scores_K_select_true$alpha_known <- FALSE

results_summary_K_select_true_all <- rbind.fill(results_summary_K_select_true_all,
                                                results_summary_K_select_true)
rm(results_summary_K_select_true)

summary_scores_K_select_true_all <- rbind.fill(summary_scores_K_select_true_all,
                                               summary_scores_K_select_true)
rm(summary_scores_K_select_true)

## Correct names
results_summary_K_select_true <- results_summary_K_select_true_all
summary_scores_K_select_true <- summary_scores_K_select_true_all
rm(results_summary_K_select_true_all, summary_scores_K_select_true_all)

saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_SUN_rBM_l1ou_PCA"
datestamp_day <- format(Sys.time(), "%Y-%m-%d")
save.image(paste0(saveresultfile, "_aranged", "-", datestamp_day, ".RData"))

######################################################################
## Equivalent Solutions
######################################################################
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations_FUN_"
datestamp_day <- "2015-11-20" # 03-17 03-24
ak <- "" # "_alpha_known"

load(paste0(saveresultfile, "aranged", ak, "-", datestamp_day, "_heavy.RData"))

indexes <- which((results_summary_K_select$number_equivalent_solutions > 1))
length(indexes)

simestimations_K_select_eqs <- simestimations_K_select[indexes]
results_summary_K_select_eqs <- results_summary_K_select[indexes,]

save(simestimations_K_select_eqs, results_summary_K_select_eqs, file = paste0(saveresultfile, "equivalent_solutions", ak, "-", datestamp_day, ".RData"))

###############################################################################
## Tests
###############################################################################
simest <- simestimations_alpha_known[[40]] # Case K_lag really help (3 shifts).
simest <- simestimations_alpha_known[[143]] # Case with missing values
simest <- simestimations_alpha_known[[30]] # Case 11 shifts used to be very bad
simest <- simestimations_alpha_known[[56]] # alpha non scalar
simest <- simestimations_alpha_known[[63]] # NA = 0.2%
simest <- simestimations_alpha_known[[92]] # ntaxa = 215

params_test <- simest$res$alpha_max$params_estim$`3`
params_test_true <- list(shifts = simest$sim$shifts,
                         optimal.value = c(0,0,0,0))

simest$sim$alpha
simest$sim$gamma
simest$sim$shifts
simest$sim$rd

plot_data.process.actual(Y.state = simest$sim$Y_data[1,],
                         phylo = trees[[paste0(simest$sim$ntaxa)]], 
                         params = params_test_true,
                         adj.root = 1.3,
                         automatic_colors = TRUE,
                         margin_plot = NULL,
                         cex = 1.3,
                         bg_shifts = "azure2",
                         bg_beta_0 = "azure2",
                         no.margin = TRUE)

plot_data.process.actual(Y.state = simest$sim$Y_data[1,],
                         phylo = trees[[paste0(simest$sim$ntaxa)]], 
                         params = params_test,
                         adj.root = 1.3,
                         automatic_colors = TRUE,
                         margin_plot = NULL,
                         cex = 1.3,
                         bg_shifts = "azure2",
                         bg_beta_0 = "azure2",
                         no.margin = TRUE)

plot_data.process.actual(Y.state = simest$sim$Y_data[1,],
                         phylo = trees[[paste0(simest$sim$ntaxa)]], 
                         params = simest$res$alpha_max$params_estim$`3`,
                         adj.root = 1.3,
                         automatic_colors = TRUE,
                         margin_plot = NULL,
                         cex = 1.3,
                         bg_shifts = "azure2",
                         bg_beta_0 = "azure2",
                         no.margin = TRUE)

plot(simest$res$alpha_max$capushe_outputBM1, newwindow=FALSE, ask=FALSE)

lp <- lineprof(results_estim_EM <- estimateEM(phylo = trees[[paste0(simest$sim$ntaxa)]],
                               Y_data = simest$sim$Y_data, 
                               process = "scOU", 
                               nbr_of_shifts = 3,
                               random.root = TRUE,
                               stationary.root = TRUE,
                               alpha_known = TRUE,
                               method.OUsun = "rescale",
                               known.selection.strength = simest$sim$alpha,
                               tol = list(variance = 10^(-2), 
                                          value.root = 10^(-2),
                                          log_likelihood = 10^(-2)),
                               Nbr_It_Max = 1000,
                               method.init = "lasso",
                               K_lag_init = 5,
               interval = 0.01)
#                                  edges.init = params_test_true$shifts$edges,
#                                  values.init = params_test_true$shifts$values,
#                                  exp.root.init = rep(0,4),

attr(results_estim_EM, "Divergence")
sapply(results_estim_EM$params_history, function(z) attr(z, "log_likelihood"))
sapply(results_estim_EM$params_history, function(z) z$shifts$edges)
simest$sim$log_likelihood.true
simest$res$alpha_max$results_summary$log_likelihood

plot_data.process.actual(Y.state = simest$sim$Y_data[1,],
                         phylo = trees[[paste0(simest$sim$ntaxa)]], 
                         params = results_estim_EM_3$params,
                         adj.root = 1.3,
                         automatic_colors = TRUE,
                         margin_plot = NULL,
                         cex = 1.3,
                         bg_shifts = "azure2",
                         bg_beta_0 = "azure2",
                         no.margin = TRUE)

X <- simest$sim
lp <- lineprof(
  res1 <- PhyloEM(phylo = trees[[paste0(X$ntaxa)]],
                Y_data = X$Y_data,
                process = "scOU",
                K_max = max(K_try[[paste0(X$ntaxa)]]),
                random.root = TRUE,
                stationary.root = TRUE,
                alpha = X$alpha,
                save_step = FALSE,
                Nbr_It_Max = 5,
                tol = list(variance = 10^(-2), 
                           value.root = 10^(-2),
                           log_likelihood = 10^(-2)),
                min_params = list(variance = 0, 
                                  value.root = -10^(5), 
                                  exp.root = -10^(5), 
                                  var.root = 0,
                                  selection.strength = 0),
                method.init = "lasso",
                use_previous = FALSE,
                method.selection = c("BirgeMassart1", "BirgeMassart2"),
                interval = 0.01)

lp <- lineprof(
res2 <- PhyloEM(phylo = trees[[paste0(X$ntaxa)]],
                Y_data = X$Y_data,
                process = "OU",
                K_max = 10,
                random.root = TRUE,
                stationary.root = TRUE,
                independent = TRUE,
                alpha_grid = FALSE,
                # alpha = c(2.9, 3, 3.1),
                save_step = FALSE,
                Nbr_It_Max = 50,
                tol = list(variance = 10^(-2), 
                           value.root = 10^(-2),
                           log_likelihood = 10^(-2),
                           normalized_half_life = 10^(-2)),
                min_params = list(variance = 0, 
                                  value.root = -10^(5), 
                                  exp.root = -10^(5), 
                                  var.root = 0,
                                  selection.strength = 0),
                method.init = "lasso",
                use_previous = FALSE,
                method.selection = c("BirgeMassart1", "BirgeMassart2"),
                K_lag_init = 5),
interval = 0.01)

res2$alpha_max$results_summary$log_likelihood
simest$sim$log_likelihood.true
plot(res2$alpha_max$capushe_outputBM1@DDSE)
res2$alpha_max$params_estim$`3`
sum(res2$alpha_max$results_summary$time)
simest$res$alpha_max$results_summary$total_time
pbastide/PhylogeneticEM documentation built on Feb. 12, 2024, 1:27 a.m.