simulations_study/multivariate_exploitation_l1ou.R

#############################################
## Exploitation several K
#############################################

##############
## Parameters and functions
##############

require(doParallel)
require(foreach)
require(ape)
#require(glmnet) # For Lasso initialization
# require(quadrupen) # For Lasso initialization
# require(robustbase) # For robust fitting of alpha
# library(TreeSim) # For simulation of the tree
# require(ggplot2) # Plot
# library(scales) # plot
# library(reshape2) # Plot
# library(grid) # Plot
library(plyr)
# library(capushe)

saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_l1ou"
datestamp_day <- "2016-08-17" 

###############################################################

load(paste0(saveresultfile, "_", datestamp_day, "_all.RData"))

simestimations_l1ou <- simestimations
rm(simestimations)
# colnames(simparams)[10] <- "nrep"

rm(list = c("simparams_alpha", "simparams_base",
            "simparams_K",
            "simparams_r_drift", "simparams_r_selection",
            "simparams_s", "simparams_NA",
            "simulations2keep",
            "simlist"))
gc()

######################
## Add simu parameters to summary
######################

add_simu_params <- function(simest){
  if (simest$sim$NA_per > 0) return(NULL) # Drop missing values tests.
  for (pp in colnames(simparams)){
    simest$res$results_summary[[pp]] <- simest$sim[[pp]]
  }
#   simest$res$results_summary$alpha <- simest$sim$alpha
#   simest$res$results_summary$gamma <- simest$sim$gamma
#   simest$res$results_summary$K <- simest$sim$K
#   simest$res$results_summary$s_noise <- simest$sim$s_noise
#   simest$res$results_summary$n <- simest$sim$n
#   simest$res$results_summary$ntaxa <- simest$sim$ntaxa
#   simest$res$results_summary$grp <- simest$sim$grp
  simest$res$results_summary$difficulty <- simest$sim$difficulty
  simest$res$results_summary$log_likelihood.true <- simest$sim$log_likelihood.true
  simest$res$results_summary$it <- simest$sim$it
  ## Format results
  simest$res$params_estims <- list(shifts = simest$res$params_estims$shifts,
                                   variance = diag(simest$res$sigma2),
                                   selection.strength = diag(simest$res$alpha),
                                   optimal.value = simest$res$intercept,
                                   root.state = list(random = TRUE,
                                                     stationary.root = TRUE,
                                                     value.root = NA,
                                                     exp.root = simest$res$intercept,
                                                     var.root = diag(simest$res$sigma2/(2*simest$res$alpha))))
  return(simest)
}

simestimations_l1ou <- lapply(simestimations_l1ou, add_simu_params)

###########################################
## Scores involving lists
## (Compute complex score and add them to result summary for each element of the list)
###########################################
TP <- function(tr.edges, est.edges) { # True Positives
  if (is.null(tr.edges) ) return(0) # If no true shift at all, any shift found is misplaced
  if (is.null(est.edges)) return(0) # If no estimated shift, TP = 0
  return(sum(est.edges %in% tr.edges))
}

FP <- function(tr.edges, est.edges){ # False Positives
  return(length(est.edges) - TP(tr.edges, est.edges))
}

FN <- function(tr.edges, est.edges){ # False Negatives
  return(length(tr.edges) - TP(tr.edges, est.edges))
}

TN <- function(tr.edges, est.edges, nedges){ # True Negatives
  edges <- 1:nedges
  if (is.null(est.edges)) est.edges <- nedges + 1
  if (is.null(tr.edges)) tr.edges <- nedges + 1
  return(sum(edges[-est.edges] %in% edges[-tr.edges]))
}

sensitivity <- function(tr.edges, est.edges) { # Sensibilité
  if (is.null(tr.edges) ) return(NA) # If no true shift at all, any shift found is misplaced
  if (is.null(est.edges)) return(0) # If no estimated shift, TP = 0
  return(sum(est.edges %in% tr.edges)/length(tr.edges))
}

specificity <- function(tr.edges, est.edges, nedges) { # spécificité : TN/N
  edges <- 1:nedges
  if (is.null(est.edges)) est.edges <- nedges + 1
  if (is.null(tr.edges)) tr.edges <- nedges + 1
  return(sum(edges[-est.edges] %in% edges[-tr.edges])/length(edges[-tr.edges]))
}

precision <- function(tr.edges, est.edges) { # Precision
  if (is.null(tr.edges) ) return(0) # If no true shift at all, any shift found is misplaced
  if (is.null(est.edges)) return(NA) # If no estimated shift, TP = 0
  return(sum(est.edges %in% tr.edges)/length(est.edges))
}

match.partitions <- function(tr, est, nta) {
  tree <- trees[[paste0(nta)]]
  part.list <- subtree.list[[paste0(nta)]]
  ## computes ARI between partitions induced by edges in tr$edges and est$edges
  #   if (is.null(est$edges)){
  #     return(-1) # If lasso initialization failed
  #   } else {
  require(mclust)
  part1 <- clusters_from_shifts(tree, tr$edges, part.list)
  part2 <- clusters_from_shifts(tree, est$edges, part.list)
  return(adjustedRandIndex(part1, part2))        
  #   }
}

euclidian_distance <- function(m1, m2){
  return(sum((m2 - m1)^2))
}

RMSE <- function(y, true.value) {
  return(sqrt(mean((y - true.value)^2)))
}

extract_diag <- function(mat){
  if (is.vector(mat)){
    return(rep(mat, p_base))
  } else {
    return(diag(mat))
  }
}

diag_RMSE <- function(mat_eval, mat_true){
  return(RMSE(extract_diag(mat_eval), extract_diag(mat_true)))
}

extract_off_diag <- function(mat){
  if (is.vector(mat)){
    return(rep(0, p_base*(p_base-1)/2))
  } else {
    return(mat[upper.tri(mat)])
  }
}

offdiag_RMSE <- function(mat_eval, mat_true){
  return(RMSE(extract_off_diag(mat_eval), extract_off_diag(mat_true)))
}

compute_list_scores_element <- function(simest){
  if (is.null(simest)){ # || !simest$results_summary$CV_estim){
    #     return(data.frame(mean.edge.quality = NA, edge.recovery = NA,
    #                       edge.init.recovery = NA, adjrandind = NA, adjrandind.init = NA))
    return(NULL)
  } else {
    df <- data.frame(
      # mean.edge.quality = mean(simest$edge.quality),
      sensitivity = sensitivity(simest$sim$shifts$edges,
                                simest$res$params_estim$shifts$edges),
      sensitivity.init = NA,
      specificity = specificity(simest$sim$shifts$edges,
                                simest$res$params_estim$shifts$edges,
                                nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
      specificity.init = NA,
      precision = precision(simest$sim$shifts$edges,
                            simest$res$params_estim$shifts$edges),
      precision.init = NA,
      adjrandind = match.partitions(simest$sim$shifts, 
                                    simest$res$params_estim$shifts,
                                    simest$sim$ntaxa),
      adjrandind.init = NA,
#       distance_means = euclidian_distance(simest$sim$m_Y_true,
#                                           simest$res$m_Y_estim),
      TP = TP(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges),
      TP.init = NA,
      FP = FP(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges),
      FP.init = NA,
      FN = FN(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges),
      FN.init = NA,
      TN = TN(simest$sim$shifts$edges,
              simest$res$params_estim$shifts$edges,
              nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
      TN.init = NA,
      gamma_diag_RMSE = diag_RMSE(simest$res$params_estim$root.state$var.root,
                                  simest$sim$params_simu$root.state$var.root),
      gamma_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$root.state$var.root,
                                        simest$sim$params_simu$root.state$var.root),
      alpha_diag_RMSE = diag_RMSE(simest$res$params_estim$selection.strength,
                                  simest$sim$params_simu$selection.strength),
      alpha_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$selection.strength,
                                        simest$sim$params_simu$selection.strength),
      sigma_diag_RMSE = diag_RMSE(simest$res$params_estim$variance,
                                  simest$sim$params_simu$variance),
      sigma_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$variance,
                                        simest$sim$params_simu$variance)
      # gamma_estim = as.vector(simest$res$params_estim$variance) / (2 * as.vector(simest$res$params_estim$selection.strength))
    )
    simest$res$results_summary <- cbind(simest$res$results_summary, df)
    return(simest)
  }
}


simestimations_l1ou <- lapply(simestimations_l1ou, compute_list_scores_element)

######################
## Format the data
######################

extract_data_frame <- function(simests){
  results_summary <- lapply(simests, function(z) return(z$res$results_summary))
  results_summary <- as.data.frame(do.call(rbind, results_summary))
#   dd <- do.call(rbind, simests)
#   dd <- do.call(rbind, dd[, "res"])
#   results_summary <- as.data.frame(do.call(rbind, dd[,"results_summary"]))
  return(results_summary)
}

results_summary_l1ou <- extract_data_frame(simestimations_l1ou)

rm(simestimations_l1ou)
gc()

results_summary_l1ou <- transform(results_summary_l1ou, K_type = "l1ou")

computation_time_l1ou <- sum(results_summary_l1ou$total_time)
mean_time_l1ou <- mean(results_summary_l1ou$total_time)

######################################################################
## Sumarizing Function
######################################################################
## Simple Scores
RMSE <- function(y, true.value) {
  return(sqrt(mean((y - true.value)^2)))
}

compute_scores <- function(res_sum){
  # Combinations of "true" parameters (no n)
  # Aggregation of parameter estimates conditonal upon convergence of EM procedure
  return(ddply(res_sum,
               # .(alpha, gamma, K, s_noise, ntaxa, grp),
               as.quoted(colnames(simparams)[!(colnames(simparams) %in% c("gamma", "nrep"))]),
               summarize,
               ## Simple Scores
#                alpha.mean = mean(alpha_estim),
#                alpha.sd = sd(alpha_estim),
#                alpha.RMSE = RMSE(alpha_estim, unique(alpha)),
               # alpha.init.RMSE = RMSE(alpha_estim_init, unique(alpha)),
#                gamma.mean = mean(gamma_estim),
#                gamma.sd = sd(gamma_estim),
#                gamma.RMSE = RMSE(gamma_estim, unique(gamma)),
               # gamma.init.RMSE = RMSE(gamma_estim_init, unique(gamma)),
#                beta_0.mean = mean(beta_0_estim),
#                beta_0.sd = sd(beta_0_estim),
#                beta_0.RMSE = RMSE(beta_0_estim, beta_0),
               # beta_0.init.RMSE = RMSE(beta_0_estim_init, beta_0),
               # lambda.mean = mean(lambda_estim),
               # lambda.sd = sd(lambda_estim),
               # lambda.RMSE = RMSE(lambda_estim, lambda),
               log_likelihood.mean = mean(log_likelihood),
               # log_likelihood.mean.init = mean(log_likelihood_init),
               # mean_number_new_shifts.mean = mean(mean_number_new_shifts),
               number_equivalent_solutions.mean = mean(number_equivalent_solutions),
               # difficulty.mean = mean(difficulty),
               # EM_steps.mean = mean(EM_steps, na.rm = TRUE),
               # K.match = mean(K == K_try),
               # K.match.leq = mean(K >= K_try),
               ## List Scores
               # mean.edge.quality.mean = mean(mean.edge.quality),
               sensitivity.mean = mean(sensitivity),
               # sensitivity.init.mean = mean(sensitivity.init),
               specificity.mean = mean(specificity),
               # specificity.init.mean = mean(specificity.init),
               precision.mean = mean(precision),
               # precision.init.mean = mean(precision.init),
               mean.adjrandind = mean(adjrandind),
               # mean.adjrandind.init = mean(adjrandind.init),
               # mean.distance_means = mean(distance_means),
               total_time.mean = mean(total_time)
  ))
}

summary_scores_l1ou <- compute_scores(results_summary_l1ou)
summary_scores_l1ou <- transform(summary_scores_l1ou, K_type = "K_true")

save.image(paste0(saveresultfile, "_aranged", "-", datestamp_day, ".RData"))
pbastide/PhylogeneticEM documentation built on Feb. 12, 2024, 1:27 a.m.