#############################################
## Exploitation several K
#############################################
##############
## Parameters and functions
##############
library(doParallel)
library(foreach)
library(PhylogeneticEM)
# require(ape)
# #require(glmnet) # For Lasso initialization
# # require(quadrupen) # For Lasso initialization
# require(robustbase) # For robust fitting of alpha and variance
# library(TreeSim) # For simulation of the tree
# require(ggplot2) # Plot
# library(scales) # plot
library(reshape2) # Plot
# library(grid) # Plot
library(plyr)
# library(capushe)
saveresultdir <- "../Results/Simulations_Multivariate"
saveresultname <- "multivariate_estimations_SUN_rBM"
datestamp_day_cut <- "2017-06-2"
# datestamp_day <- "2016-11-28"
ak <- ""# "alpha_known" # ""
files_list <- list.files(path = saveresultdir,
pattern = paste0(saveresultname, ak, "-", datestamp_day_cut),
full.names = TRUE)
###############################################################
######################
## Forgotten subtree.list
######################
datestamp_data <- "2017-06-20" #
savedatafile = "../Results/Simulations_Multivariate/multivariate_simlist"
load(paste0(savedatafile, "_", datestamp_data, "_light.RData"))
subtree.list <- vector(mode = "list");
for (nta in c(128, 32, 64, 96, 160, 192, 256)){
subtree.list[[paste0(nta)]] <- PhylogeneticEM:::enumerate_tips_under_edges(trees[[paste0(nta)]])
}
##############
## Add least squares
##############
# U_trees <- lapply(trees, incidence.matrix.full)
# names(U_trees) <- names(trees)
# times_shareds <- lapply(trees, compute_times_ca)
# names(times_shareds) <- names(trees)
#
# add_lsq <- function(X){
# # tree_one <- trees[[paste0(X$sim$ntaxa)]]
# # U_tree_one <- U_trees[[paste0(X$sim$ntaxa)]]
# # times_shared_one <- times_shareds[[paste0(X$sim$ntaxa)]]
# # fun <- function(params){
# # tmpsim <- PhylogeneticEM:::simulate_internal(phylo = tree_one,
# # process = "scOU",
# # p = attr(params, "p_dim"),
# # root.state = params$root.state,
# # shifts = params$shifts,
# # variance = params$variance,
# # optimal.value = params$optimal.value,
# # selection.strength = params$selection.strength,
# # simulate_random = FALSE,
# # U_tree = U_tree_one,
# # times_shared = times_shared_one)
# # ## Mean at tips with estimated parameters
# # m_Y_estim <- PhylogeneticEM:::extract_simulate_internal(tmpsim,
# # where="tips",
# # what="expectations")
# # }
#
# nums <- grep("alpha_", names(X$res))
# for (i in nums){
# # m_Y_estim <- lapply(X$res[[i]]$params_estim, fun)
# # X$res[[i]]$m_Y_estim_new <- m_Y_estim
# # X$res[[i]]$results_summary$least_squares_raw <- sapply(m_Y_estim,
# # function(z) sum((X$sim$Y_data - z)^2, na.rm = TRUE))
# # X$res[[i]]$results_summary$least_squares_bis <- sapply(X$res[[i]]$m_Y_estim,
# # function(z) sum((X$sim$Y_data - z)^2, na.rm = TRUE))
# X$res[[i]]$results_summary$least_squares <- sapply(X$res[[i]]$params_estim, function(z) sum(diag(z$variance)/(2*z$selection.strength)))
# }
# return(X)
# }
#
# fct_to_keep <- c(fct_to_keep, "U_trees", "add_lsq")
##############
## Model Selection
##############
# model_selection_tmp <- function(simres){
# X <- simres$sim
# res <- PhyloEM(phylo = trees[[paste0(X$ntaxa)]],
# Y_data = X$Y_data,
# process = "scOU",
# K_max = max(K_try[[paste0(X$ntaxa)]]) + 5,
# random.root = TRUE,
# stationary.root = TRUE,
# estimates = simres$res,
# method.selection = c("BGHlsq", "BGHml"))#, "BGHlsqraw", "BGHmlraw"))
# res$alpha_min$results_summary$total_time <- res$alpha_max$results_summary$total_time
# simres$res <- res
# return(simres)
# }
#
# # fct_to_keep <- c(fct_to_keep, "model_selection_tmp")
#
add_total_times_min <- function(simres){
simres$res$alpha_min$results_summary$total_time <- simres$res$alpha_max$results_summary$total_time
simres$res$alpha_min$results_summary$sum_times <- simres$res$alpha_max$results_summary$sum_times
simres$res$alpha_min_raw$results_summary$total_time <- simres$res$alpha_max$results_summary$total_time
simres$res$alpha_min_raw$results_summary$sum_times <- simres$res$alpha_max$results_summary$sum_times
return(simres)
}
######################
## Extract inferences with K_true or K_select
######################
## Extract the results where K_try = K_e (K_true or something else)
extract_result <- function(simest, Ke){
K_try <- 0:(length(simest$res$alpha_max$params_estim) - 1)
if (Ke >= length(simest$res$alpha_max$params_estim)) return(NULL)
fun <- function(l){
if (Ke %in% names(l)) return(l[[paste(Ke)]])
return(l)
}
res <- lapply(simest$res$alpha_max[c("params_estim",
#"Yhat", "Zhat", "Yvar", "Zvar",
"edge.quality",
"params_raw", "params_init_estim", "m_Y_estim")], fun)
res$results_summary <- subset(simest$res$alpha_max$results_summary, K_try == Ke)
return(list(sim = simest$sim,
res = res))
}
extract_result_min <- function(simest, Ke){
K_try <- 0:(length(simest$res$alpha_min$params_estim) - 1)
if (Ke >= length(simest$res$alpha_min$params_estim)) return(NULL)
fun <- function(l){
if (Ke %in% names(l)) return(l[[paste(Ke)]])
return(l)
}
res <- lapply(simest$res$alpha_min[c("params_estim",
#"Yhat", "Zhat", "Yvar", "Zvar",
"edge.quality",
"params_raw", "params_init_estim", "m_Y_estim")], fun)
res$results_summary <- subset(simest$res$alpha_min$results_summary, K_try == Ke)
return(list(sim = simest$sim,
res = res))
}
## Extract K_true
extract_result_K_true <- function(simest){
K_true <- simest$sim$K
simest <- extract_result(simest, K_true)
if (!is.null(simest)) simest$res$results_summary$K_select <- NA
return(simest)
}
# simests_K_true <- as.name(paste0("simestimations", ak, "_K_true"))
# assign(paste(simests_K_true), lapply(eval(simests), extract_result_K_true))
## Extract K_select
extract_result_K_select <- function(simest, method = "BGH"){
if (!is.null(simest$res$alpha_max[[method]])){
K_select <- length(simest$res$alpha_max[[method]]$params_select$shifts$edges)
simest <- extract_result(simest, K_select)
} else {
K_select <- length(simest$res$alpha_min[[method]]$params_select$shifts$edges)
simest <- extract_result_min(simest, K_select)
}
simest$res$results_summary$K_select <- K_select
return(simest)
# res <- simest$res$alpha_max[[method]]
# res$params_estim <- res$params_select
# return(list(sim = simest$sim,
# res = res))
}
# nms <- names(eval(simests)[[1]]$res$alpha_max)
# method_sels <- nms[c(grep("DDSE", nms),
# grep("Djump", nms),
# grep("BGH", nms))]
#
# simests_K_select <- as.name(paste0("simestimations", ak, "_K_select"))
# for (meth in method_sels){
# assign(paste0(simests_K_select, "_", meth),
# lapply(eval(simests), extract_result_K_select, method = meth))
# }
#
# rm(list = c(paste(simests), "simests"))
# gc()
######################
## Add simu parameters to summary
######################
add_simu_params <- function(simest){
for (pp in colnames(simparams)){
simest$res$results_summary[[pp]] <- simest$sim[[pp]]
}
# simest$res$results_summary$alpha <- simest$sim$alpha
# simest$res$results_summary$gamma <- simest$sim$gamma
# simest$res$results_summary$K <- simest$sim$K
# simest$res$results_summary$s_noise <- simest$sim$s_noise
# simest$res$results_summary$n <- simest$sim$n
# simest$res$results_summary$ntaxa <- simest$sim$ntaxa
# simest$res$results_summary$grp <- simest$sim$grp
simest$res$results_summary$correlated_base <- (simest$sim$rd == r_base[2] && simest$sim$grp %in% c("K_var", "base"))
simest$res$results_summary$difficulty <- simest$sim$difficulty
simest$res$results_summary$log_likelihood.true <- simest$sim$log_likelihood.true
simest$res$results_summary$it <- simest$sim$it
simest$res$results_summary$manova <- simest$sim$manova
return(simest)
}
add_simu_params_l1ou <- function(simest){
if (simest$sim$NA_per > 0) return(NULL) # Drop missing values tests.
for (pp in colnames(simparams)){
simest$res$results_summary[[pp]] <- simest$sim[[pp]]
}
simest$res$results_summary$correlated_base <- (simest$sim$rd == r_base[2] && simest$sim$grp %in% c("K_var", "base"))
simest$res$results_summary$difficulty <- simest$sim$difficulty
simest$res$results_summary$log_likelihood.true <- simest$sim$log_likelihood.true
simest$res$results_summary$it <- simest$sim$it
## Format results
simest$res$params_estims <- list(shifts = simest$res$params_estims$shifts,
variance = diag(simest$res$sigma2),
selection.strength = diag(simest$res$alpha),
optimal.value = simest$res$intercept,
root.state = list(random = TRUE,
stationary.root = TRUE,
value.root = NA,
exp.root = simest$res$intercept,
var.root = diag(simest$res$sigma2/(2*simest$res$alpha))))
return(simest)
}
# assign(paste(simests_K_true), lapply(eval(simests_K_true), add_simu_params))
# for (meth in method_sels){
# assign(paste0(simests_K_select, "_", meth),
# lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_simu_params))
# }
# assign(paste(simests_K_select), lapply(eval(simests_K_select), add_simu_params))
###########################################
## Scores involving lists
## (Compute complex score and add them to result summary for each element of the list)
###########################################
TP <- function(tr.edges, est.edges) { # True Positives
if (is.null(tr.edges) ) return(0) # If no true shift at all, any shift found is misplaced
if (is.null(est.edges)) return(0) # If no estimated shift, TP = 0
return(sum(est.edges %in% tr.edges))
}
FP <- function(tr.edges, est.edges){ # False Positives
return(length(est.edges) - TP(tr.edges, est.edges))
}
FN <- function(tr.edges, est.edges){ # False Negatives
return(length(tr.edges) - TP(tr.edges, est.edges))
}
TN <- function(tr.edges, est.edges, nedges){ # True Negatives
edges <- 1:nedges
if (is.null(est.edges)) est.edges <- nedges + 1
if (is.null(tr.edges)) tr.edges <- nedges + 1
return(sum(edges[-est.edges] %in% edges[-tr.edges]))
}
sensitivity <- function(tr.edges, est.edges) { # Sensibilité
# if (is.null(tr.edges) ) return(NA) # If no true shift at all, any shift found is misplaced
if (is.null(tr.edges) ) return(1) # If no true shift at all, we always find all of them
if (is.null(est.edges)) return(0) # If no estimated shift, TP = 0
return(sum(est.edges %in% tr.edges)/length(tr.edges))
}
specificity <- function(tr.edges, est.edges, nedges) { # spécificité : TN/N
edges <- 1:nedges
if (is.null(est.edges)) est.edges <- nedges + 1
if (is.null(tr.edges)) tr.edges <- nedges + 1
return(sum(edges[-est.edges] %in% edges[-tr.edges])/length(edges[-tr.edges]))
}
precision <- function(tr.edges, est.edges) { # Precision
if (is.null(tr.edges) ) return(0) # If no true shift at all, any shift found is misplaced
if (is.null(est.edges)) return(1) # If no estimated shift, we can't go wrong
# if (is.null(est.edges)) return(NA) # If no estimated shift, TP = 0
return(sum(est.edges %in% tr.edges)/length(est.edges))
}
match.partitions <- function(tr, est, nta) {
tree <- trees[[paste0(nta)]]
part.list <- subtree.list[[paste0(nta)]]
## computes ARI between partitions induced by edges in tr$edges and est$edges
# if (is.null(est$edges)){
# return(-1) # If lasso initialization failed
# } else {
require(mclust)
part1 <- clusters_from_shifts(tree, tr$edges, part.list)
part2 <- clusters_from_shifts(tree, est$edges, part.list)
return(adjustedRandIndex(part1, part2))
# }
}
euclidian_distance <- function(m1, m2){
return(sum((m2 - m1)^2))
}
RMSE <- function(y, true.value) {
return(sqrt(mean((y - true.value)^2)))
}
extract_diag <- function(mat){
if (is.vector(mat)){
return(rep(mat, p_base))
} else {
return(diag(as.matrix(mat)))
}
}
diag_RMSE <- function(mat_eval, mat_true){
return(RMSE(extract_diag(mat_eval), extract_diag(mat_true)))
}
extract_off_diag <- function(mat){
if (is.vector(mat)){
return(rep(0, p_base*(p_base-1)/2))
} else {
mat <- as.matrix(mat)
return(mat[upper.tri(mat)])
}
}
offdiag_RMSE <- function(mat_eval, mat_true){
return(RMSE(extract_off_diag(mat_eval), extract_off_diag(mat_true)))
}
compute_list_scores_element <- function(simest){
if (is.null(simest)){ # || !simest$results_summary$CV_estim){
# return(data.frame(mean.edge.quality = NA, edge.recovery = NA,
# edge.init.recovery = NA, adjrandind = NA, adjrandind.init = NA))
return(NULL)
} else {
df <- data.frame(
# mean.edge.quality = mean(simest$edge.quality),
sensitivity = sensitivity(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
sensitivity.init = sensitivity(simest$sim$shifts$edges,
simest$res$params_init_estim$shifts$edges),
specificity = specificity(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges,
nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
specificity.init = specificity(simest$sim$shifts$edges,
simest$res$params_init_estim$shifts$edges,
nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
precision = precision(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
precision.init = precision(simest$sim$shifts$edges,
simest$res$params_init_estim$shifts$edges),
adjrandind = match.partitions(simest$sim$shifts,
simest$res$params_estim$shifts,
simest$sim$ntaxa),
adjrandind.init = match.partitions(simest$sim$shifts,
simest$res$params_init_estim$shifts,
simest$sim$ntaxa),
# distance_means = euclidian_distance(simest$sim$m_Y_true,
# simest$res$m_Y_estim),
TP = TP(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
TP.init = TP(simest$sim$shifts$edges,
simest$res$params_init_estim$shifts$edges),
FP = FP(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
FP.init = FP(simest$sim$shifts$edges,
simest$res$params_init_estim$shifts$edges),
FN = FN(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
FN.init = FN(simest$sim$shifts$edges,
simest$res$params_init_estim$shifts$edges),
TN = TN(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges,
nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
TN.init = TN(simest$sim$shifts$edges,
simest$res$params_init_estim$shifts$edges,
nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
gamma_diag_RMSE = diag_RMSE(simest$res$params_estim$root.state$var.root,
simest$sim$params_simu$root.state$var.root),
gamma_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$root.state$var.root,
simest$sim$params_simu$root.state$var.root),
alpha_diag_RMSE = diag_RMSE(simest$res$params_estim$selection.strength,
simest$sim$params_simu$selection.strength),
alpha_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$selection.strength,
simest$sim$params_simu$selection.strength),
sigma_diag_RMSE = diag_RMSE(simest$res$params_estim$variance,
simest$sim$params_simu$variance),
sigma_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$variance,
simest$sim$params_simu$variance)
# gamma_estim = as.vector(simest$res$params_estim$variance) / (2 * as.vector(simest$res$params_estim$selection.strength))
)
simest$res$results_summary <- cbind(simest$res$results_summary, df)
return(simest)
}
}
compute_list_scores_element_l1ou <- function(simest){
if (is.null(simest)){ # || !simest$results_summary$CV_estim){
# return(data.frame(mean.edge.quality = NA, edge.recovery = NA,
# edge.init.recovery = NA, adjrandind = NA, adjrandind.init = NA))
return(NULL)
} else {
df <- data.frame(
# mean.edge.quality = mean(simest$edge.quality),
sensitivity = sensitivity(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
sensitivity.init = NA,
specificity = specificity(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges,
nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
specificity.init = NA,
precision = precision(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
precision.init = NA,
adjrandind = match.partitions(simest$sim$shifts,
simest$res$params_estim$shifts,
simest$sim$ntaxa),
adjrandind.init = NA,
# distance_means = euclidian_distance(simest$sim$m_Y_true,
# simest$res$m_Y_estim),
TP = TP(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
TP.init = NA,
FP = FP(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
FP.init = NA,
FN = FN(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
FN.init = NA,
TN = TN(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges,
nrow(trees[[paste0(simest$sim$ntaxa)]]$edge)),
TN.init = NA,
gamma_diag_RMSE = diag_RMSE(simest$res$params_estim$root.state$var.root,
simest$sim$params_simu$root.state$var.root),
gamma_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$root.state$var.root,
simest$sim$params_simu$root.state$var.root),
alpha_diag_RMSE = diag_RMSE(simest$res$params_estim$selection.strength,
simest$sim$params_simu$selection.strength),
alpha_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$selection.strength,
simest$sim$params_simu$selection.strength),
sigma_diag_RMSE = diag_RMSE(simest$res$params_estim$variance,
simest$sim$params_simu$variance),
sigma_offdiag_RMSE = offdiag_RMSE(simest$res$params_estim$variance,
simest$sim$params_simu$variance)
# gamma_estim = as.vector(simest$res$params_estim$variance) / (2 * as.vector(simest$res$params_estim$selection.strength))
)
simest$res$results_summary <- cbind(simest$res$results_summary, df)
return(simest)
}
}
# assign(paste(simests_K_true),
# lapply(eval(simests_K_true), compute_list_scores_element))
# for (meth in method_sels){
# assign(paste0(simests_K_select, "_", meth),
# lapply(eval(as.name(paste0(simests_K_select, "_", meth))), compute_list_scores_element))
# }
# assign(paste(simests_K_select),
# lapply(eval(simests_K_select), compute_list_scores_element))
######################
## Add Matrix Estims
######################
add_name <- function(simest, name, nbr, tmp_true, tmp_eval){
names(tmp_eval) <- paste0(name, "_estim_", 1:nbr)
names(tmp_true) <- paste0(name, "_true_", 1:nbr)
simest$res$results_summary <- cbind(simest$res$results_summary,
t(tmp_true), t(tmp_eval))
return(simest)
}
add_diag <- function(simest){
simest <- add_name(simest, "sigma_diag", p_base,
extract_diag(simest$sim$params_simu$variance),
extract_diag(simest$res$params_estim$variance))
simest <- add_name(simest, "alpha_diag", p_base,
extract_diag(simest$sim$params_simu$selection.strength),
extract_diag(simest$res$params_estim$selection.strength))
simest <- add_name(simest, "gamma_diag", p_base,
extract_diag(simest$sim$params_simu$root.state$var.root),
extract_diag(simest$res$params_estim$root.state$var.root))
return(simest)
}
add_offdiag <- function(simest){
simest <- add_name(simest, "sigma_offdiag", p_base*(p_base-1)/2,
extract_off_diag(simest$sim$params_simu$variance),
extract_off_diag(simest$res$params_estim$variance))
simest <- add_name(simest, "alpha_offdiag", p_base*(p_base-1)/2,
extract_off_diag(simest$sim$params_simu$selection.strength),
extract_off_diag(simest$res$params_estim$selection.strength))
simest <- add_name(simest, "gamma_offdiag", p_base*(p_base-1)/2,
extract_off_diag(simest$sim$params_simu$root.state$var.root),
extract_off_diag(simest$res$params_estim$root.state$var.root))
return(simest)
}
# assign(paste(simests_K_true),
# lapply(eval(simests_K_true), add_diag))
# for (meth in method_sels){
# assign(paste0(simests_K_select, "_", meth),
# lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_diag))
# }
#
# assign(paste(simests_K_true),
# lapply(eval(simests_K_true), add_offdiag))
# for (meth in method_sels){
# assign(paste0(simests_K_select, "_", meth),
# lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_offdiag))
# }
######################
## Format the data
######################
extract_data_frame <- function(simests){
results_summary <- lapply(simests, function(z) return(z$res$results_summary))
results_summary <- as.data.frame(do.call(rbind, results_summary))
# dd <- do.call(rbind, simests)
# dd <- do.call(rbind, dd[, "res"])
# results_summary <- as.data.frame(do.call(rbind, dd[,"results_summary"]))
return(results_summary)
}
# res_sum <- as.name(paste0("results_summary", ak))
# res_sum_K_true <- as.name(paste0("results_summary", ak, "_K_true"))
# res_sum_K_select <- as.name(paste0("results_summary", ak, "_K_select"))
# res_sum_K_select_true <- as.name(paste0("results_summary", ak, "_K_select_true"))
#
# # assign(paste(res_sum), extract_data_frame(eval(simests)))
# assign(paste(res_sum_K_true), extract_data_frame(eval(simests_K_true)))
# for (meth in method_sels){
# assign(paste0(res_sum_K_select, "_", meth),
# extract_data_frame(eval(as.name(paste0(simests_K_select, "_", meth)))))
# }
# # assign(paste(res_sum_K_select), extract_data_frame(eval(simests_K_select)))
#
# rm(list = c(paste(simests_K_true), "simests_K_true",
# paste0(simests_K_select, "_", method_sels)))
# gc()
#
# assign(paste0(res_sum_K_true),
# transform(eval(res_sum_K_true), K_type = "K_true"))
# for (meth in method_sels){
# assign(paste0(res_sum_K_select, "_", meth),
# transform(eval(as.name(paste0(res_sum_K_select, "_", meth))),
# K_type = paste0("K_select_", meth)))
# }
# # assign(paste0(res_sum_K_select),
# # transform(eval(res_sum_K_select), K_type = "K_select"))
# assign(paste(res_sum_K_select_true), eval(res_sum_K_true))
# for (meth in method_sels){
# assign(paste(res_sum_K_select_true), rbind(eval(res_sum_K_select_true),
# eval(as.name(paste0(res_sum_K_select, "_", meth)))))
# }
# times_temp <- eval(as.name(paste0(res_sum_K_select, "_", method_sels[1])))$total_time
#
# comp_time <- as.name(paste0("computation_time", ak))
# assign(paste(comp_time),
# sum(times_temp))
#
# m_time <- as.name(paste0("mean_time", ak))
# assign(paste(m_time),
# mean(times_temp))
#
# rm(times_temp)
######################################################################
## Sumarizing Function
######################################################################
## Simple Scores
RMSE <- function(y, true.value) {
return(sqrt(mean((y - true.value)^2)))
}
compute_scores <- function(res_sum){
# Combinations of "true" parameters (no n)
# Aggregation of parameter estimates conditonal upon convergence of EM procedure
return(ddply(subset(res_sum, CV_estim == 1),
# .(alpha, gamma, K, s_noise, ntaxa, grp),
as.quoted(colnames(simparams)[!(colnames(simparams) %in% c("gamma", "nrep"))]),
summarize,
## Simple Scores
# alpha.mean = mean(alpha_estim),
# alpha.sd = sd(alpha_estim),
# alpha.RMSE = RMSE(alpha_estim, unique(alpha)),
# alpha.init.RMSE = RMSE(alpha_estim_init, unique(alpha)),
# gamma.mean = mean(gamma_estim),
# gamma.sd = sd(gamma_estim),
# gamma.RMSE = RMSE(gamma_estim, unique(gamma)),
# gamma.init.RMSE = RMSE(gamma_estim_init, unique(gamma)),
# beta_0.mean = mean(beta_0_estim),
# beta_0.sd = sd(beta_0_estim),
# beta_0.RMSE = RMSE(beta_0_estim, beta_0),
# beta_0.init.RMSE = RMSE(beta_0_estim_init, beta_0),
# lambda.mean = mean(lambda_estim),
# lambda.sd = sd(lambda_estim),
# lambda.RMSE = RMSE(lambda_estim, lambda),
log_likelihood.mean = mean(log_likelihood),
log_likelihood.mean.init = mean(log_likelihood_init),
mean_number_new_shifts.mean = mean(mean_number_new_shifts),
number_equivalent_solutions.mean = mean(number_equivalent_solutions),
difficulty.mean = mean(difficulty),
manova.mean = mean(manova),
EM_steps.mean = mean(EM_steps, na.rm = TRUE),
K.match = mean(K == K_try),
K.match.leq = mean(K >= K_try),
## List Scores
# mean.edge.quality.mean = mean(mean.edge.quality),
sensitivity.mean = mean(sensitivity),
sensitivity.init.mean = mean(sensitivity.init),
specificity.mean = mean(specificity),
specificity.init.mean = mean(specificity.init),
precision.mean = mean(precision),
precision.init.mean = mean(precision.init),
mean.adjrandind = mean(adjrandind),
mean.adjrandind.init = mean(adjrandind.init),
# mean.distance_means = mean(distance_means),
total_time.mean = mean(total_time)
))
}
compute_scores_l1ou <- function(res_sum){
# Combinations of "true" parameters (no n)
# Aggregation of parameter estimates conditonal upon convergence of EM procedure
return(ddply(res_sum,
# .(alpha, gamma, K, s_noise, ntaxa, grp),
as.quoted(colnames(simparams)[!(colnames(simparams) %in% c("gamma", "nrep"))]),
summarize,
## Simple Scores
# alpha.mean = mean(alpha_estim),
# alpha.sd = sd(alpha_estim),
# alpha.RMSE = RMSE(alpha_estim, unique(alpha)),
# alpha.init.RMSE = RMSE(alpha_estim_init, unique(alpha)),
# gamma.mean = mean(gamma_estim),
# gamma.sd = sd(gamma_estim),
# gamma.RMSE = RMSE(gamma_estim, unique(gamma)),
# gamma.init.RMSE = RMSE(gamma_estim_init, unique(gamma)),
# beta_0.mean = mean(beta_0_estim),
# beta_0.sd = sd(beta_0_estim),
# beta_0.RMSE = RMSE(beta_0_estim, beta_0),
# beta_0.init.RMSE = RMSE(beta_0_estim_init, beta_0),
# lambda.mean = mean(lambda_estim),
# lambda.sd = sd(lambda_estim),
# lambda.RMSE = RMSE(lambda_estim, lambda),
log_likelihood.mean = mean(log_likelihood),
# log_likelihood.mean.init = mean(log_likelihood_init),
# mean_number_new_shifts.mean = mean(mean_number_new_shifts),
number_equivalent_solutions.mean = mean(number_equivalent_solutions),
# difficulty.mean = mean(difficulty),
# EM_steps.mean = mean(EM_steps, na.rm = TRUE),
# K.match = mean(K == K_try),
# K.match.leq = mean(K >= K_try),
## List Scores
# mean.edge.quality.mean = mean(mean.edge.quality),
sensitivity.mean = mean(sensitivity),
# sensitivity.init.mean = mean(sensitivity.init),
specificity.mean = mean(specificity),
# specificity.init.mean = mean(specificity.init),
precision.mean = mean(precision),
# precision.init.mean = mean(precision.init),
mean.adjrandind = mean(adjrandind),
# mean.adjrandind.init = mean(adjrandind.init),
# mean.distance_means = mean(distance_means),
total_time.mean = mean(total_time)
))
}
# sum_scores_K_true <- as.name(paste0("summary_scores", ak, "_K_true"))
# sum_scores_K_select <- as.name(paste0("summary_scores", ak, "_K_select"))
# sum_scores_K_select_true <- as.name(paste0("summary_scores", ak, "_K_select_true"))
#
# assign(paste(sum_scores_K_true), compute_scores(eval(res_sum_K_true)))
# for (meth in method_sels){
# assign(paste0(sum_scores_K_select, "_", meth),
# compute_scores(eval(as.name(paste0(res_sum_K_select, "_", meth)))))
# }
# # assign(paste(sum_scores_K_select), compute_scores(eval(res_sum_K_select)))
#
# assign(paste0(sum_scores_K_true),
# transform(eval(sum_scores_K_true), K_type = "K_true"))
# for (meth in method_sels){
# assign(paste0(sum_scores_K_select, "_", meth),
# transform(eval(as.name(paste0(sum_scores_K_select, "_", meth))),
# K_type = paste0("K_select_", meth)))
# }
# assign(paste(sum_scores_K_select_true), eval(sum_scores_K_true))
# for (meth in method_sels){
# assign(paste(sum_scores_K_select_true), rbind(eval(sum_scores_K_select_true),
# eval(as.name(paste0(sum_scores_K_select, "_", meth)))))
# }
#
# # save.image(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, "_heavy.RData"))
#
# rm(list = c("comp_time", "m_time",
# # "simparams_alpha", "simparams_base",
# # "simparams_gamma", "simparams_K",
# # paste(simests), "simests",
# # paste(simests_K_true), "simests_K_true",
# # paste(simests_K_select), "simests_K_select",
# paste(res_sum_K_true), "res_sum_K_true",
# paste0(res_sum_K_select, "_", method_sels)))
#
# save.image(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
######################
## Loop on inference files - PhyloEM
######################
res_sum_K_select_true_all <- as.name(paste0("results_summary", ak, "_K_select_true_all"))
assign(paste(res_sum_K_select_true_all), NULL)
sum_scores_K_select_true_all <- as.name(paste0("summary_scores", ak, "_K_select_true_all"))
assign(paste(sum_scores_K_select_true_all), NULL)
for (file in files_list){
load(file)
gc()
## simestimations
ak <- ""#"alpha_known_"
simests <- as.name(paste0("simestimations_", ak, inference.index))
## Add total times min
assign(paste(simests), lapply(eval(simests), add_total_times_min))
## Add least squares
# assign(paste(simests), lapply(eval(simests), add_lsq))
## Model Selection
# assign(paste(simests), lapply(eval(simests), model_selection_tmp))
## Extract inferences with K_true or K_select
simests_K_true <- as.name(paste0("simestimations", ak, "_K_true"))
assign(paste(simests_K_true), lapply(eval(simests), extract_result_K_true))
nms <- c(names(eval(simests)[[1]]$res$alpha_max), names(eval(simests)[[1]]$res$alpha_min), names(eval(simests)[[1]]$res$alpha_min_raw))
method_sels <- nms[c(grep("DDSE", nms),
grep("Djump", nms),
# grep("BGH", nms),
grep("BGHlsq", nms),
grep("BGHml", nms))]
# grep("BGHlsqraw", nms),
# grep("BGHmlraw", nms))]
simests_K_select <- as.name(paste0("simestimations", ak, "_K_select"))
for (meth in method_sels){
assign(paste0(simests_K_select, "_", meth),
lapply(eval(simests), extract_result_K_select, method = meth))
}
rm(list = c(paste(simests), "simests"))
gc()
## Add simu parameters to summary
assign(paste(simests_K_true), lapply(eval(simests_K_true), add_simu_params))
for (meth in method_sels){
assign(paste0(simests_K_select, "_", meth),
lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_simu_params))
}
## Scores involving lists
assign(paste(simests_K_true),
lapply(eval(simests_K_true), compute_list_scores_element))
for (meth in method_sels){
assign(paste0(simests_K_select, "_", meth),
lapply(eval(as.name(paste0(simests_K_select, "_", meth))), compute_list_scores_element))
}
## Add Matrix Estims
assign(paste(simests_K_true),
lapply(eval(simests_K_true), add_diag))
for (meth in method_sels){
assign(paste0(simests_K_select, "_", meth),
lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_diag))
}
assign(paste(simests_K_true),
lapply(eval(simests_K_true), add_offdiag))
for (meth in method_sels){
assign(paste0(simests_K_select, "_", meth),
lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_offdiag))
}
## Format the data
res_sum <- as.name(paste0("results_summary", ak))
res_sum_K_true <- as.name(paste0("results_summary", ak, "_K_true"))
res_sum_K_select <- as.name(paste0("results_summary", ak, "_K_select"))
res_sum_K_select_true <- as.name(paste0("results_summary", ak, "_K_select_true"))
assign(paste(res_sum_K_true), extract_data_frame(eval(simests_K_true)))
for (meth in method_sels){
assign(paste0(res_sum_K_select, "_", meth),
extract_data_frame(eval(as.name(paste0(simests_K_select, "_", meth)))))
}
rm(list = c(paste(simests_K_true), "simests_K_true",
paste0(simests_K_select, "_", method_sels)))
gc()
assign(paste0(res_sum_K_true),
transform(eval(res_sum_K_true), K_type = "K_true"))
for (meth in method_sels){
assign(paste0(res_sum_K_select, "_", meth),
transform(eval(as.name(paste0(res_sum_K_select, "_", meth))),
K_type = paste0("K_select_", meth)))
}
assign(paste(res_sum_K_select_true), eval(res_sum_K_true))
for (meth in method_sels){
assign(paste(res_sum_K_select_true), merge(eval(res_sum_K_select_true),
eval(as.name(paste0(res_sum_K_select, "_", meth))),
all = TRUE))
}
## Sumarizing Function
sum_scores_K_true <- as.name(paste0("summary_scores", ak, "_K_true"))
sum_scores_K_select <- as.name(paste0("summary_scores", ak, "_K_select"))
sum_scores_K_select_true <- as.name(paste0("summary_scores", ak, "_K_select_true"))
assign(paste(sum_scores_K_true), compute_scores(eval(res_sum_K_true)))
for (meth in method_sels){
assign(paste0(sum_scores_K_select, "_", meth),
compute_scores(eval(as.name(paste0(res_sum_K_select, "_", meth)))))
}
assign(paste0(sum_scores_K_true),
transform(eval(sum_scores_K_true), K_type = "K_true"))
for (meth in method_sels){
assign(paste0(sum_scores_K_select, "_", meth),
transform(eval(as.name(paste0(sum_scores_K_select, "_", meth))),
K_type = paste0("K_select_", meth)))
}
assign(paste(sum_scores_K_select_true), eval(sum_scores_K_true))
for (meth in method_sels){
assign(paste(sum_scores_K_select_true), rbind(eval(sum_scores_K_select_true),
eval(as.name(paste0(sum_scores_K_select, "_", meth)))))
}
## Merge all results together
assign(paste(res_sum_K_select_true_all), rbind(eval(res_sum_K_select_true_all),
eval(res_sum_K_select_true)))
assign(paste(sum_scores_K_select_true_all), rbind(eval(sum_scores_K_select_true_all),
eval(sum_scores_K_select_true)))
## Clean up
rm(list = c(
#"comp_time", "m_time",
# "simparams_alpha", "simparams_base",
# "simparams_gamma", "simparams_K",
# paste(simests), "simests",
# paste(simests_K_true), "simests_K_true",
# paste(simests_K_select),
"simests_K_select",
paste(res_sum_K_true), "res_sum_K_true",
paste0(res_sum_K_select, "_", method_sels),
paste(sum_scores_K_true), "sum_scores_K_true",
paste0(sum_scores_K_select, "_", method_sels),
paste(res_sum_K_select_true), "res_sum_K_select_true",
paste(sum_scores_K_select_true), "sum_scores_K_select_true"))
}
## Re-name correctly
res_sum_K_select_true <- as.name(paste0("results_summary", ak, "_K_select_true"))
sum_scores_K_select_true <- as.name(paste0("summary_scores", ak, "_K_select_true"))
assign(paste(res_sum_K_select_true), eval(res_sum_K_select_true_all))
assign(paste(sum_scores_K_select_true), eval(sum_scores_K_select_true_all))
rm(list = c(
paste(res_sum_K_select_true_all), "res_sum_K_select_true_all",
paste(sum_scores_K_select_true_all), "sum_scores_K_select_true_all"))
datestamp_day <- format(Sys.time(), "%Y-%m-%d")
save.image(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
# ######################
# ## Loop on inference files - l1ou
# ######################
# res_sum_all <- as.name(paste0("results_summary_all"))
# assign(paste(res_sum_all), NULL)
# sum_scores_all <- as.name(paste0("summary_scores_all"))
# assign(paste(sum_scores_all), NULL)
#
# for (file in files_list){
# load(file)
# gc()
# ## simestimations
# simests <- as.name(paste0("simestimations_l1ou_", inference.index))
#
# ## Add simu parameters to summary
# assign(paste(simests), lapply(eval(simests), add_simu_params_l1ou))
#
# ## Scores involving lists
# assign(paste(simests), lapply(eval(simests), compute_list_scores_element_l1ou))
#
# # ## Add Matrix Estims
# # assign(paste(simests), lapply(eval(simests), add_diag))
# # assign(paste(simests), lapply(eval(simests), add_offdiag))
#
# ## Format the data
# res_sum <- as.name(paste0("results_summary"))
# assign(paste(res_sum), extract_data_frame(eval(simests)))
# rm(list = c(paste(simests), "simests"))
# gc()
# assign(paste0(res_sum), transform(eval(res_sum), K_type = "l1ou"))
#
# ## Sumarizing Function
# sum_scores <- as.name(paste0("summary_scores"))
# assign(paste(sum_scores), compute_scores_l1ou(eval(res_sum)))
# assign(paste0(sum_scores), transform(eval(sum_scores), K_type = "l1ou"))
#
# ## Merge all results together
# assign(paste(res_sum_all), rbind(eval(res_sum_all), eval(res_sum)))
# assign(paste(sum_scores_all), rbind(eval(sum_scores_all), eval(sum_scores)))
#
# ## Clean up
# rm(list = c(
# #"comp_time", "m_time",
# # "simparams_alpha", "simparams_base",
# # "simparams_gamma", "simparams_K",
# # paste(simests), "simests",
# # paste(simests_K_true), "simests_K_true",
# # paste(simests_K_select),
# paste(res_sum), "res_sum",
# paste(sum_scores), "sum_scores"))
# }
#
# ## Re-name correctly
# summary_scores_l1ou <- summary_scores_all
# results_summary_l1ou <- results_summary_all
# rm(list = c(
# paste(res_sum_all), "res_sum_all",
# paste(sum_scores_all), "sum_scores_all"))
#
# save.image(paste0(saveresultdir, "/", saveresultname, "_aranged", ak, "-", datestamp_day, ".RData"))
# ######################
# ## New likelihood for missing
# ######################
# load("../Results/Simulations_Multivariate/multivariate_simlist_only_2016-07-26.RData")
#
# # simulations2keep <- sapply(simlist, function(x) { x$nrep %in% 1:2 }, simplify = TRUE)
# # simlist <- simlist[simulations2keep]
#
# change_log_lik <- function(simest, sim){
# simest$sim$log_likelihood.true <- sim$log_likelihood.true
# return(simest)
# }
#
# assign(paste(simests),
# mapply(change_log_lik, eval(simests), simlist, SIMPLIFY = FALSE))
# #########################################################
# ## Model Selection
# #########################################################
#
# model_selection_several_K <- function(ret){
# if (length(names(simestimations_alpha_known[[1]]$res)) == 5){ # alpha known
# alpha_grid <- ret$sim$alpha
# } else {
# alpha_grid <- find_grid_alpha(trees[[paste0(ret$sim$ntaxa)]],
# nbr_alpha = 10,
# factor_up_alpha = 2,
# factor_down_alpha = 3,
# quantile_low_distance = 0.001,
# log_transform = TRUE)
# }
# res <- PhyloEM(phylo = trees[[paste0(ret$sim$ntaxa)]],
# Y_data = ret$sim$Y_data,
# process = "scOU",
# K_max = max(K_try[[paste0(ret$sim$ntaxa)]]),
# estimates = ret$res,
# random.root = FALSE,
# alpha = alpha_grid,
# save_step = FALSE,
# Nbr_It_Max = 2000,
# tol = list(variance = 10^(-2),
# value.root = 10^(-2),
# log_likelihood = 10^(-2)),
# method.init = "lasso",
# use_previous = FALSE,
# method.selection = "BGH")
# res <- add_total_time(res)
# res <- enlight_res(res)
# ret <- list(sim = ret$sim,
# res = res)
# return(ret)
# }
#
# enlight_res <- function(res){
# lres <- vector("list", 4)
# lres[1:3] <- res[1:3]
# lmax <- res$alpha_max$BGH[c("params_select", "params_raw", "params_init_estim",
# "results_summary",
# # "Yhat", "Zhat", "Yvar", "Zvar",
# "m_Y_estim", "edge.quality")]
# lres$alpha_max <- res$alpha_max
# lres$alpha_max$BGH <- lmax
# return(lres)
# }
#
# add_total_time <- function(res){
# tot_time <- sum(sapply(res[-c(1, 2, 3, length(res))],
# function(z) z$results_summary$time))
# res$alpha_max$results_summary$total_time <- tot_time
# res$alpha_max$BGH$results_summary$total_time <- tot_time
# res$alpha_max$BGH$results_summary <- as.data.frame(res$alpha_max$BGH$results_summary)
# return(res)
# }
#
# simests <- as.name(paste0("simestimations", ak))
# reqpckg <- c("ape", "glmnet", "robustbase")
#
# tmp <- foreach(i = eval(simests), .packages = reqpckg) %do%
# {
# model_selection_several_K(i)
# }
#
# assign(paste(simests), tmp)
# rm(tmp)
# gc()
#
#
# save.image(paste0(saveresultfile, ak, "_", datestamp_day, "_selected.RData"))
#
# # assign(paste(simests), lapply(eval(simests), add_K_select_to_list))
#
# load(paste0(saveresultfile, ak, "_", datestamp_day, "_selected.RData"))
#########################################################
## Time computation
#########################################################
# add_total_time <- function(simest){
# tot_time <- sum(sapply(simest$res[-c(1, 2, 3, length(simest$res))],
# function(z) z$results_summary$time))
# simest$res$alpha_max$results_summary$total_time <- tot_time
# simest$res$alpha_max$BGH$results_summary$total_time <- tot_time
# simest$res$alpha_max$BGH$results_summary <- as.data.frame(simest$res$alpha_max$BGH$results_summary)
# return(simest)
# }
#
# assign(paste(simests), lapply(eval(simests), add_total_time))
# #########################################################
# ## Computation of means at tips
# #########################################################
# compute_mean_estim <- function(simest){
# if (is.null(simest)){
# return(NULL)
# } else {
# XX <- simulate_internal(phylo = trees[[paste(simest$ntaxa)]],
# process = "OU",
# root.state = simest$params_estim$root.state,
# variance = simest$params_estim$variance,
# shifts = simest$params_estim$shifts,
# selection.strength = simest$params_estim$selection.strength,
# optimal.value = simest$params_estim$optimal.value)
# m_Y_estim <- extract_simulate_internal(XX, where="tips", what="expectations")
# simest$m_Y_estim <- m_Y_estim
# return(simest)
# }
# }
#
# simestimations_alpha_known_K_true <- lapply(simestimations_alpha_known_K_true,
# compute_mean_estim)
# simestimations_alpha_known_K_select<-lapply(simestimations_alpha_known_K_select,
# compute_mean_estim)
# ######################
# ## Add lambda parameter
# ######################
#
# add_lambda <- function(simest){
# if (is.null(simest)) return(NULL)
# simest$res$results_summary$lambda_estim <- simest$res$params_estim$lambda
# simest$res$results_summary$lambda <- beta_0
# return(simest)
# }
#
# assign(paste(simests_K_true), lapply(eval(simests_K_true), add_lambda))
# for (meth in method_sels){
# assign(paste0(simests_K_select, "_", meth),
# lapply(eval(as.name(paste0(simests_K_select, "_", meth))), add_lambda))
# }
# # assign(paste(simests_K_select), lapply(eval(simests_K_select), add_lambda))
##################################################################
## Merge alpha known and unknown
##################################################################
rm(list = ls())
saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_SUN_rBM"
datestamp_day <- "2016-11-28" # 2015-03-24
ak <- ""
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
datestamp_day <- "2016-11-15" # 2015-03-17
ak <- "_alpha_known"
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_K_select_true$alpha_known <- FALSE
results_summary_alpha_known_K_select_true$alpha_known <- TRUE
library(plyr)
results_summary_K_select_true <- rbind.fill(results_summary_K_select_true,
results_summary_alpha_known_K_select_true)
rm(results_summary_alpha_known_K_select_true)
summary_scores_K_select_true$alpha_known <- FALSE
summary_scores_alpha_known_K_select_true$alpha_known <- TRUE
summary_scores_K_select_true <- rbind.fill(summary_scores_K_select_true,
summary_scores_alpha_known_K_select_true)
rm(summary_scores_alpha_known_K_select_true)
datestamp_day <- format(Sys.time(), "%Y-%m-%d")
save.image(paste0(saveresultfile, "_aranged_both_alpha", "-", datestamp_day, ".RData"))
# ##################################################################
# ## Merge with lag init
# ##################################################################
# rm(list = ls())
#
# saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_SUN_rBM"
#
# datestamp_day <- "2016-07-13" # 2015-03-24
# ak <- "_alpha_known"
# load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
# results_summary_alpha_known_K_select_true_1 <- results_summary_alpha_known_K_select_true
# results_summary_alpha_known_K_select_true_1$K_lag_init <- FALSE
# summary_scores_alpha_known_K_select_true_1 <- summary_scores_alpha_known_K_select_true
# summary_scores_alpha_known_K_select_true_1$K_lag_init <- FALSE
#
# datestamp_day <- "2016-07-23" # 2015-03-17
# ak <- "_alpha_known"
# load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
# results_summary_alpha_known_K_select_true_2 <- results_summary_alpha_known_K_select_true
# results_summary_alpha_known_K_select_true_2$K_lag_init <- TRUE
# summary_scores_alpha_known_K_select_true_2 <- summary_scores_alpha_known_K_select_true
# summary_scores_alpha_known_K_select_true_2$K_lag_init <- TRUE
#
# results_summary_alpha_known_K_select_true <- rbind(results_summary_alpha_known_K_select_true_1,
# results_summary_alpha_known_K_select_true_2)
# rm(results_summary_alpha_known_K_select_true_1,
# results_summary_alpha_known_K_select_true_2)
#
# summary_scores_alpha_known_K_select_true <- rbind(summary_scores_alpha_known_K_select_true_1,
# summary_scores_alpha_known_K_select_true_2)
# rm(summary_scores_alpha_known_K_select_true_1,
# summary_scores_alpha_known_K_select_true_2)
#
# datestamp_day <- format(Sys.time(), "%Y-%m-%d")
# save.image(paste0(saveresultfile, "aranged_alpha_known_lag", "-", datestamp_day, ".RData"))
##################################################################
## Merge EM l1ou
##################################################################
rm(list = ls())
gc()
## EM
saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_SUN_rBM"
datestamp_day <- "2016-11-28"
ak <- "_both_alpha"
# ak <- ""
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_K_select_true$pPCA <- FALSE
summary_scores_K_select_true$pPCA <- FALSE
## l1OU
saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_l1ou"
datestamp_day <- "2016-10-31"
load(paste0(saveresultfile, "_aranged", "-", datestamp_day, ".RData"))
results_summary_l1ou$pPCA <- FALSE
summary_scores_l1ou$pPCA <- FALSE
## Merging
library(plyr)
results_summary_l1ou$alpha_known <- FALSE
results_summary_K_select_true_all <- rbind.fill(results_summary_K_select_true,
results_summary_l1ou)
rm(results_summary_l1ou, results_summary_K_select_true)
summary_scores_l1ou$alpha_known <- FALSE
summary_scores_K_select_true_all <- rbind.fill(summary_scores_K_select_true,
summary_scores_l1ou)
rm(summary_scores_l1ou, summary_scores_K_select_true)
## l1OU - PCA
saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_l1ou"
datestamp_day <- "2017-01-24"
load(paste0(saveresultfile, "_aranged", "-", datestamp_day, ".RData"))
# levels(results_summary_l1ou$K_type) <- "l1ou_PCA"
# levels(summary_scores_l1ou$K_type) <- "l1ou_PCA"
results_summary_l1ou$pPCA <- TRUE
summary_scores_l1ou$pPCA <- TRUE
results_summary_l1ou$alpha_known <- FALSE
results_summary_K_select_true_all <- rbind.fill(results_summary_K_select_true_all,
results_summary_l1ou)
rm(results_summary_l1ou)
summary_scores_l1ou$alpha_known <- FALSE
summary_scores_K_select_true_all <- rbind.fill(summary_scores_K_select_true_all,
summary_scores_l1ou)
rm(summary_scores_l1ou)
## EM - PCA
saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_SUN_rBM_pPCA"
datestamp_day <- "2017-04-20"
load(paste0(saveresultfile, "_aranged", "-", datestamp_day, ".RData"))
results_summary_K_select_true$pPCA <- TRUE
summary_scores_K_select_true$pPCA <- TRUE
results_summary_K_select_true$alpha_known <- FALSE
summary_scores_K_select_true$alpha_known <- FALSE
results_summary_K_select_true_all <- rbind.fill(results_summary_K_select_true_all,
results_summary_K_select_true)
rm(results_summary_K_select_true)
summary_scores_K_select_true_all <- rbind.fill(summary_scores_K_select_true_all,
summary_scores_K_select_true)
rm(summary_scores_K_select_true)
## Correct names
results_summary_K_select_true <- results_summary_K_select_true_all
summary_scores_K_select_true <- summary_scores_K_select_true_all
rm(results_summary_K_select_true_all, summary_scores_K_select_true_all)
saveresultfile = "../Results/Simulations_Multivariate/multivariate_estimations_SUN_rBM_l1ou_PCA"
datestamp_day <- format(Sys.time(), "%Y-%m-%d")
save.image(paste0(saveresultfile, "_aranged", "-", datestamp_day, ".RData"))
######################################################################
## Equivalent Solutions
######################################################################
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations_FUN_"
datestamp_day <- "2015-11-20" # 03-17 03-24
ak <- "" # "_alpha_known"
load(paste0(saveresultfile, "aranged", ak, "-", datestamp_day, "_heavy.RData"))
indexes <- which((results_summary_K_select$number_equivalent_solutions > 1))
length(indexes)
simestimations_K_select_eqs <- simestimations_K_select[indexes]
results_summary_K_select_eqs <- results_summary_K_select[indexes,]
save(simestimations_K_select_eqs, results_summary_K_select_eqs, file = paste0(saveresultfile, "equivalent_solutions", ak, "-", datestamp_day, ".RData"))
###############################################################################
## Tests
###############################################################################
simest <- simestimations_alpha_known[[40]] # Case K_lag really help (3 shifts).
simest <- simestimations_alpha_known[[143]] # Case with missing values
simest <- simestimations_alpha_known[[30]] # Case 11 shifts used to be very bad
simest <- simestimations_alpha_known[[56]] # alpha non scalar
simest <- simestimations_alpha_known[[63]] # NA = 0.2%
simest <- simestimations_alpha_known[[92]] # ntaxa = 215
params_test <- simest$res$alpha_max$params_estim$`3`
params_test_true <- list(shifts = simest$sim$shifts,
optimal.value = c(0,0,0,0))
simest$sim$alpha
simest$sim$gamma
simest$sim$shifts
simest$sim$rd
plot_data.process.actual(Y.state = simest$sim$Y_data[1,],
phylo = trees[[paste0(simest$sim$ntaxa)]],
params = params_test_true,
adj.root = 1.3,
automatic_colors = TRUE,
margin_plot = NULL,
cex = 1.3,
bg_shifts = "azure2",
bg_beta_0 = "azure2",
no.margin = TRUE)
plot_data.process.actual(Y.state = simest$sim$Y_data[1,],
phylo = trees[[paste0(simest$sim$ntaxa)]],
params = params_test,
adj.root = 1.3,
automatic_colors = TRUE,
margin_plot = NULL,
cex = 1.3,
bg_shifts = "azure2",
bg_beta_0 = "azure2",
no.margin = TRUE)
plot_data.process.actual(Y.state = simest$sim$Y_data[1,],
phylo = trees[[paste0(simest$sim$ntaxa)]],
params = simest$res$alpha_max$params_estim$`3`,
adj.root = 1.3,
automatic_colors = TRUE,
margin_plot = NULL,
cex = 1.3,
bg_shifts = "azure2",
bg_beta_0 = "azure2",
no.margin = TRUE)
plot(simest$res$alpha_max$capushe_outputBM1, newwindow=FALSE, ask=FALSE)
lp <- lineprof(results_estim_EM <- estimateEM(phylo = trees[[paste0(simest$sim$ntaxa)]],
Y_data = simest$sim$Y_data,
process = "scOU",
nbr_of_shifts = 3,
random.root = TRUE,
stationary.root = TRUE,
alpha_known = TRUE,
method.OUsun = "rescale",
known.selection.strength = simest$sim$alpha,
tol = list(variance = 10^(-2),
value.root = 10^(-2),
log_likelihood = 10^(-2)),
Nbr_It_Max = 1000,
method.init = "lasso",
K_lag_init = 5,
interval = 0.01)
# edges.init = params_test_true$shifts$edges,
# values.init = params_test_true$shifts$values,
# exp.root.init = rep(0,4),
attr(results_estim_EM, "Divergence")
sapply(results_estim_EM$params_history, function(z) attr(z, "log_likelihood"))
sapply(results_estim_EM$params_history, function(z) z$shifts$edges)
simest$sim$log_likelihood.true
simest$res$alpha_max$results_summary$log_likelihood
plot_data.process.actual(Y.state = simest$sim$Y_data[1,],
phylo = trees[[paste0(simest$sim$ntaxa)]],
params = results_estim_EM_3$params,
adj.root = 1.3,
automatic_colors = TRUE,
margin_plot = NULL,
cex = 1.3,
bg_shifts = "azure2",
bg_beta_0 = "azure2",
no.margin = TRUE)
X <- simest$sim
lp <- lineprof(
res1 <- PhyloEM(phylo = trees[[paste0(X$ntaxa)]],
Y_data = X$Y_data,
process = "scOU",
K_max = max(K_try[[paste0(X$ntaxa)]]),
random.root = TRUE,
stationary.root = TRUE,
alpha = X$alpha,
save_step = FALSE,
Nbr_It_Max = 5,
tol = list(variance = 10^(-2),
value.root = 10^(-2),
log_likelihood = 10^(-2)),
min_params = list(variance = 0,
value.root = -10^(5),
exp.root = -10^(5),
var.root = 0,
selection.strength = 0),
method.init = "lasso",
use_previous = FALSE,
method.selection = c("BirgeMassart1", "BirgeMassart2"),
interval = 0.01)
lp <- lineprof(
res2 <- PhyloEM(phylo = trees[[paste0(X$ntaxa)]],
Y_data = X$Y_data,
process = "OU",
K_max = 10,
random.root = TRUE,
stationary.root = TRUE,
independent = TRUE,
alpha_grid = FALSE,
# alpha = c(2.9, 3, 3.1),
save_step = FALSE,
Nbr_It_Max = 50,
tol = list(variance = 10^(-2),
value.root = 10^(-2),
log_likelihood = 10^(-2),
normalized_half_life = 10^(-2)),
min_params = list(variance = 0,
value.root = -10^(5),
exp.root = -10^(5),
var.root = 0,
selection.strength = 0),
method.init = "lasso",
use_previous = FALSE,
method.selection = c("BirgeMassart1", "BirgeMassart2"),
K_lag_init = 5),
interval = 0.01)
res2$alpha_max$results_summary$log_likelihood
simest$sim$log_likelihood.true
plot(res2$alpha_max$capushe_outputBM1@DDSE)
res2$alpha_max$params_estim$`3`
sum(res2$alpha_max$results_summary$time)
simest$res$alpha_max$results_summary$total_time
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.