#############################################
## Exploitation several K
#############################################
##############
## Parameters and functions
##############
require(doParallel)
require(foreach)
require(ape)
#require(glmnet) # For Lasso initialization
# require(quadrupen) # For Lasso initialization
# require(robustbase) # For robust fitting of alpha
library(TreeSim) # For simulation of the tree
require(ggplot2) # Plot
library(scales) # plot
library(reshape2) # Plot
library(grid) # Plot
library(plyr)
library(capushe)
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations_SUN_rBM_uncertainties"
datestamp_day <- "2016-03-14" # 12-20 12-04 2016-02-17
ak <- "_alpha_known" # ""
###############################################################
load(paste0(saveresultfile, ak, "_", datestamp_day, "_all.RData"))
ak <- "_alpha_known" # ""
simests <- as.name(paste0("simestimations", ak))
rm(list = c("simparams_alpha", "simparams_base",
"simparams_gamma", "simparams_K",
"simparams_keep", "simulations2keep",
"simlist"))
gc()
# simestimations_alpha_known <- simestimations_alpha_known_1
# rm(simestimations_alpha_known_1)
# gc()
## Source functions
source("R/simulate.R")
source("R/estimateEM.R")
source("R/init_EM.R")
source("R/E_step.R")
source("R/M_step.R")
source("R/shutoff.R")
source("R/generic_functions.R")
source("R/shifts_manipulations.R")
source("R/plot_functions.R")
source("R/parsimonyNumber.R")
source("R/partitionsNumber.R")
source("R/model_selection.R")
# saveresultfile = "../Results/Simulations_Several_K/several_K"
# datestamp_day <- "2015-03-31_0"
#
# ak <- "_alpha_0" # "_alpha_known"
#
# load(paste0(saveresultfile, ak, "-", datestamp_day, ".RData"))
#########################################################
## Model Selection
#########################################################
model_selection_several_K <- function(ret){
if (length(names(simestimations_alpha_known[[1]]$res)) == 5){ # alpha known
alpha_grid <- ret$sim$alpha
} else {
alpha_grid <- find_grid_alpha(trees[[paste0(ret$sim$ntaxa)]],
nbr_alpha = 10,
factor_up_alpha = 2,
factor_down_alpha = 3,
quantile_low_distance = 0.001,
log_transform = TRUE)
}
res <- PhyloEM(phylo = trees[[paste0(ret$sim$ntaxa)]],
Y_data = ret$sim$Y_data,
process = "scOU",
K_max = max(K_try[[paste0(ret$sim$ntaxa)]]),
estimates = ret$res,
random.root = FALSE,
alpha = alpha_grid,
save_step = FALSE,
Nbr_It_Max = 2000,
tol = list(variance = 10^(-2),
value.root = 10^(-2),
log_likelihood = 10^(-2)),
method.init = "lasso",
use_previous = FALSE,
method.selection = "BGH")
res <- add_total_time(res)
res <- enlight_res(res)
ret <- list(sim = ret$sim,
res = res)
return(ret)
}
enlight_res <- function(res){
lres <- vector("list", 4)
lres[1:3] <- res[1:3]
lmax <- res$alpha_max$BGH[c("params_select", "params_raw", "params_init_estim",
"results_summary",
# "Yhat", "Zhat", "Yvar", "Zvar",
"m_Y_estim", "edge.quality")]
lres$alpha_max <- res$alpha_max
lres$alpha_max$BGH <- lmax
return(lres)
}
add_total_time <- function(res){
tot_time <- sum(sapply(res[-c(1, 2, 3, length(res))],
function(z) z$results_summary$time))
res$alpha_max$results_summary$total_time <- tot_time
res$alpha_max$BGH$results_summary$total_time <- tot_time
res$alpha_max$BGH$results_summary <- as.data.frame(res$alpha_max$BGH$results_summary)
return(res)
}
simests <- as.name(paste0("simestimations", ak))
reqpckg <- c("ape", "glmnet", "robustbase")
tmp <- foreach(i = eval(simests), .packages = reqpckg) %do%
{
model_selection_several_K(i)
}
assign(paste(simests), tmp)
rm(tmp)
gc()
save.image(paste0(saveresultfile, ak, "_", datestamp_day, "_selected.RData"))
# assign(paste(simests), lapply(eval(simests), add_K_select_to_list))
load(paste0(saveresultfile, ak, "_", datestamp_day, "_selected.RData"))
#########################################################
## Time computation
#########################################################
# add_total_time <- function(simest){
# tot_time <- sum(sapply(simest$res[-c(1, 2, 3, length(simest$res))],
# function(z) z$results_summary$time))
# simest$res$alpha_max$results_summary$total_time <- tot_time
# simest$res$alpha_max$BGH$results_summary$total_time <- tot_time
# simest$res$alpha_max$BGH$results_summary <- as.data.frame(simest$res$alpha_max$BGH$results_summary)
# return(simest)
# }
#
# assign(paste(simests), lapply(eval(simests), add_total_time))
######################
## Extract inferences with K_true or K_select
######################
## Extract the results where K_try = K_e (K_true or something else)
extract_result <- function(simest, Ke){
K_try <- 0:(length(simest$res$alpha_max$params_estim) - 1)
if (Ke >= length(simest$res$alpha_max$params_estim)) return(NULL)
fun <- function(l){
if (Ke %in% names(l)) return(l[[paste(Ke)]])
return(l)
}
res <- lapply(simest$res$alpha_max[c("params_estim",
#"Yhat", "Zhat", "Yvar", "Zvar",
"edge.quality",
"params_raw", "params_init_estim", "m_Y_estim")], fun)
res$results_summary <- subset(simest$res$alpha_max$results_summary, K_try == Ke)
return(list(sim = simest$sim,
res = res))
}
## Extract K_true
extract_result_K_true <- function(simest){
K_true <- simest$sim$K
return(extract_result(simest, K_true))
}
simests_K_true <- as.name(paste0("simestimations", ak, "_K_true"))
assign(paste(simests_K_true), lapply(eval(simests), extract_result_K_true))
## Extract K_select
extract_result_K_select <- function(simest, method = "BGH"){
res <- simest$res$alpha_max[[method]]
res$params_estim <- res$params_select
return(list(sim = simest$sim,
res = res))
}
simests_K_select <- as.name(paste0("simestimations", ak, "_K_select"))
assign(paste(simests_K_select), lapply(eval(simests), extract_result_K_select))
rm(list = c(paste(simests), "simests"))
gc()
# #########################################################
# ## Computation of means at tips
# #########################################################
# compute_mean_estim <- function(simest){
# if (is.null(simest)){
# return(NULL)
# } else {
# XX <- simulate_internal(phylo = trees[[paste(simest$ntaxa)]],
# process = "OU",
# root.state = simest$params_estim$root.state,
# variance = simest$params_estim$variance,
# shifts = simest$params_estim$shifts,
# selection.strength = simest$params_estim$selection.strength,
# optimal.value = simest$params_estim$optimal.value)
# m_Y_estim <- extract_simulate_internal(XX, where="tips", what="expectations")
# simest$m_Y_estim <- m_Y_estim
# return(simest)
# }
# }
#
# simestimations_alpha_known_K_true <- lapply(simestimations_alpha_known_K_true,
# compute_mean_estim)
# simestimations_alpha_known_K_select<-lapply(simestimations_alpha_known_K_select,
# compute_mean_estim)
######################
## Add simu parameters to summary
######################
add_simu_params <- function(simest){
simest$res$results_summary$alpha <- simest$sim$alpha
simest$res$results_summary$gamma <- simest$sim$gamma
simest$res$results_summary$K <- simest$sim$K
simest$res$results_summary$s_noise <- simest$sim$s_noise
simest$res$results_summary$n <- simest$sim$n
simest$res$results_summary$ntaxa <- simest$sim$ntaxa
simest$res$results_summary$grp <- simest$sim$grp
simest$res$results_summary$difficulty <- simest$sim$difficulty
simest$res$results_summary$log_likelihood.true <- simest$sim$log_likelihood.true
simest$res$results_summary$it <- simest$sim$it
return(simest)
}
assign(paste(simests_K_true), lapply(eval(simests_K_true), add_simu_params))
assign(paste(simests_K_select), lapply(eval(simests_K_select), add_simu_params))
######################
## Add lambda parameter
######################
add_lambda <- function(simest){
if (is.null(simest)) return(NULL)
simest$res$results_summary$lambda_estim <- simest$res$params_estim$lambda
simest$res$results_summary$lambda <- beta_0
return(simest)
}
assign(paste(simests_K_true), lapply(eval(simests_K_true), add_lambda))
assign(paste(simests_K_select), lapply(eval(simests_K_select), add_lambda))
###########################################
## Scores involving lists
## (Compute complex score and add them to result summary for each element of the list)
###########################################
sensitivity <- function(tr.edges, est.edges) { # Sensibilité
if (is.null(tr.edges) ) return(NA) # If no true shift at all, any shift found is misplaced
if (is.null(est.edges)) return(0) # If no estimated shift, TP = 0
return(sum(est.edges %in% tr.edges)/length(tr.edges))
}
specificity <- function(tr.edges, est.edges, nedges) { # spécificité
edges <- 1:nedges
if (is.null(est.edges)) est.edges <- nedges + 1
if (is.null(tr.edges)) tr.edges <- nedges + 1
return(sum(edges[-est.edges] %in% edges[-tr.edges])/length(edges[-tr.edges]))
}
precision <- function(tr.edges, est.edges) { # Precision
if (is.null(tr.edges) ) return(NA) # If no true shift at all, any shift found is misplaced
if (is.null(est.edges)) return(0) # If no estimated shift, TP = 0
return(sum(est.edges %in% tr.edges)/length(est.edges))
}
match.partitions <- function(tr, est, nta) {
tree <- trees[[paste0(nta)]]
part.list <- subtree.list[[paste0(nta)]]
## computes ARI between partitions induced by edges in tr$edges and est$edges
# if (is.null(est$edges)){
# return(-1) # If lasso initialization failed
# } else {
require(mclust)
part1 <- clusters_from_shifts(tree, tr$edges, part.list)
part2 <- clusters_from_shifts(tree, est$edges, part.list)
return(adjustedRandIndex(part1, part2))
# }
}
euclidian_distance <- function(m1, m2){
return(sum((m2 - m1)^2))
}
compute_list_scores_element <- function(simest){
if (is.null(simest)){ # || !simest$results_summary$CV_estim){
# return(data.frame(mean.edge.quality = NA, edge.recovery = NA,
# edge.init.recovery = NA, adjrandind = NA, adjrandind.init = NA))
return(NULL)
} else {
df <- data.frame(
# mean.edge.quality = mean(simest$edge.quality),
sensitivity = sensitivity(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
sensitivity.init = sensitivity(simest$sim$shifts$edges,
simest$res$params_init_estim$shifts$edges),
specificity = specificity(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges,
length(simest$sim$Z_data) + length(simest$sim$Y_data) - 1),
specificity.init = specificity(simest$sim$shifts$edges,
simest$res$params_init_estim$shifts$edges,
length(simest$sim$Z_data) + length(simest$sim$Y_data) - 1),
precision = precision(simest$sim$shifts$edges,
simest$res$params_estim$shifts$edges),
precision.init = precision(simest$sim$shifts$edges,
simest$res$params_init_estim$shifts$edges),
adjrandind = match.partitions(simest$sim$shifts,
simest$res$params_estim$shifts,
simest$sim$ntaxa),
adjrandind.init = match.partitions(simest$sim$shifts,
simest$res$params_init_estim$shifts,
simest$sim$ntaxa),
distance_means = euclidian_distance(simest$sim$m_Y_data,
simest$res$m_Y_estim),
gamma_estim = as.vector(simest$res$params_estim$variance) / (2 * as.vector(simest$res$params_estim$selection.strength))
)
simest$res$results_summary <- cbind(simest$res$results_summary, df)
return(simest)
}
}
assign(paste(simests_K_true),
lapply(eval(simests_K_true), compute_list_scores_element))
assign(paste(simests_K_select),
lapply(eval(simests_K_select), compute_list_scores_element))
######################
## Format the data
######################
extract_data_frame <- function(simests){
dd <- do.call(rbind, simests)
dd <- do.call(rbind, dd[, "res"])
results_summary <- as.data.frame(do.call(rbind, dd[,"results_summary"]))
return(results_summary)
}
res_sum <- as.name(paste0("results_summary", ak))
res_sum_K_true <- as.name(paste0("results_summary", ak, "_K_true"))
res_sum_K_select <- as.name(paste0("results_summary", ak, "_K_select"))
res_sum_K_select_true <- as.name(paste0("results_summary", ak, "_K_select_true"))
# assign(paste(res_sum), extract_data_frame(eval(simests)))
assign(paste(res_sum_K_true), extract_data_frame(eval(simests_K_true)))
assign(paste(res_sum_K_select), extract_data_frame(eval(simests_K_select)))
rm(list = c(paste(simests_K_true), "simests_K_true",
paste(simests_K_select), "simests_K_select"))
gc()
assign(paste0(res_sum_K_true),
transform(eval(res_sum_K_true), K_type = "K_true"))
assign(paste0(res_sum_K_select),
transform(eval(res_sum_K_select), K_type = "K_select"))
assign(paste(res_sum_K_select_true), rbind(eval(res_sum_K_true),
eval(res_sum_K_select)))
comp_time <- as.name(paste0("computation_time", ak))
assign(paste(comp_time),
sum(eval(res_sum_K_true)$total_time))
m_time <- as.name(paste0("mean_time", ak))
assign(paste(m_time),
mean(eval(res_sum_K_true)$total_time))
######################################################################
## Sumarizing Function
######################################################################
## Simple Scores
RMSE <- function(y, true.value) {
return(sqrt(mean((y - true.value)^2)))
}
compute_scores <- function(res_sum){
# Combinations of "true" parameters (no n)
# Aggregation of parameter estimates conditonal upon convergence of EM procedure
return(ddply(subset(res_sum, CV_estim == 1),
.(alpha, gamma, K, s_noise, ntaxa, grp),
summarize,
## Simple Scores
alpha.mean = mean(alpha_estim),
alpha.sd = sd(alpha_estim),
alpha.RMSE = RMSE(alpha_estim, unique(alpha)),
# alpha.init.RMSE = RMSE(alpha_estim_init, unique(alpha)),
gamma.mean = mean(gamma_estim),
gamma.sd = sd(gamma_estim),
gamma.RMSE = RMSE(gamma_estim, unique(gamma)),
# gamma.init.RMSE = RMSE(gamma_estim_init, unique(gamma)),
beta_0.mean = mean(beta_0_estim),
beta_0.sd = sd(beta_0_estim),
beta_0.RMSE = RMSE(beta_0_estim, beta_0),
# beta_0.init.RMSE = RMSE(beta_0_estim_init, beta_0),
lambda.mean = mean(lambda_estim),
lambda.sd = sd(lambda_estim),
lambda.RMSE = RMSE(lambda_estim, lambda),
log_likelihood.mean = mean(log_likelihood),
log_likelihood.mean.init = mean(log_likelihood_init),
mean_number_new_shifts.mean = mean(mean_number_new_shifts),
number_equivalent_solutions.mean = mean(number_equivalent_solutions),
difficulty.mean = mean(difficulty),
EM_steps.mean = mean(EM_steps, na.rm = TRUE),
K.match = mean(K == K_try),
K.match.leq = mean(K >= K_try),
## List Scores
# mean.edge.quality.mean = mean(mean.edge.quality),
sensitivity.mean = mean(sensitivity),
sensitivity.init.mean = mean(sensitivity.init),
specificity.mean = mean(specificity),
specificity.init.mean = mean(specificity.init),
precision.mean = mean(precision),
precision.init.mean = mean(precision.init),
mean.adjrandind = mean(adjrandind),
mean.adjrandind.init = mean(adjrandind.init),
mean.distance_means = mean(distance_means),
total_time.mean = mean(total_time)
))
}
sum_scores_K_true <- as.name(paste0("summary_scores", ak, "_K_true"))
sum_scores_K_select <- as.name(paste0("summary_scores", ak, "_K_select"))
sum_scores_K_select_true <- as.name(paste0("summary_scores", ak, "_K_select_true"))
assign(paste(sum_scores_K_true), compute_scores(eval(res_sum_K_true)))
assign(paste(sum_scores_K_select), compute_scores(eval(res_sum_K_select)))
assign(paste0(sum_scores_K_true),
transform(eval(sum_scores_K_true), K_type = "K_true"))
assign(paste0(sum_scores_K_select),
transform(eval(sum_scores_K_select), K_type = "K_select"))
assign(paste(sum_scores_K_select_true),
rbind(eval(sum_scores_K_true),
eval(sum_scores_K_select)))
rm(list = c(paste(sum_scores_K_true), paste(sum_scores_K_select)))
# save.image(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, "_heavy.RData"))
rm(list = c("comp_time", "m_time",
# "simparams_alpha", "simparams_base",
# "simparams_gamma", "simparams_K",
# paste(simests), "simests",
# paste(simests_K_true), "simests_K_true",
# paste(simests_K_select), "simests_K_select",
paste(res_sum_K_true), "res_sum_K_true",
paste(res_sum_K_select), "res_sum_K_select"))
save.image(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
##################################################################
## Merge alpha known and unknown
##################################################################
rm(list = ls())
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations_SUN_rBM"
datestamp_day <- "2016-02-17" # 2015-03-24
ak <- ""
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
datestamp_day <- "2015-12-02" # 2015-03-17
ak <- "_alpha_known"
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_K_select_true$alpha_known <- FALSE
results_summary_alpha_known_K_select_true$alpha_known <- TRUE
results_summary_K_select_true <- rbind(results_summary_K_select_true,
results_summary_alpha_known_K_select_true)
rm(results_summary_alpha_known_K_select_true)
summary_scores_K_select_true$alpha_known <- FALSE
summary_scores_alpha_known_K_select_true$alpha_known <- TRUE
summary_scores_K_select_true <- rbind(summary_scores_K_select_true,
summary_scores_alpha_known_K_select_true)
rm(summary_scores_alpha_known_K_select_true)
datestamp_day <- format(Sys.time(), "%Y-%m-%d")
save.image(paste0(saveresultfile, "aranged_both_alpha", "-", datestamp_day, ".RData"))
##################################################################
## Merge fixed / random roots, alpha known
##################################################################
rm(list = ls())
## SUN OU
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- "2015-03-17"
ak <- "_alpha_known"
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_alpha_known_K_select_true$method_inference <- "ak_SUN_OU"
results_summary_alpha_known_K_select_true$lambda <- beta_0
results_summary_alpha_known_K_select_true$lambda_estim <- results_summary_alpha_known_K_select_true$beta_0_estim
results_summary_K_select_true_SUN_OU <- results_summary_alpha_known_K_select_true
summary_scores_alpha_known_K_select_true$method_inference <- "ak_SUN_OU"
summary_scores_K_select_true_SUN_OU <- summary_scores_alpha_known_K_select_true
## SUN rBM
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- "2016-03-17" #"2015-12-02"
ak <- "_alpha_known"
load(paste0(saveresultfile, "_SUN_rBM_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_alpha_known_K_select_true$method_inference <- "ak_SUN_rBM"
results_summary_K_select_true_SUN_rBM <- results_summary_alpha_known_K_select_true
results_summary_K_select_true_SUN_rBM$K_select <- results_summary_K_select_true_SUN_rBM$K_select_BGH
summary_scores_alpha_known_K_select_true$method_inference <- "ak_SUN_rBM"
summary_scores_K_select_true_SUN_rBM <- summary_scores_alpha_known_K_select_true
## FUN rBM
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- "2015-12-04"
ak <- "_alpha_known"
load(paste0(saveresultfile, "_FUN_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_alpha_known_K_select_true$method_inference <- "ak_FUN_rBM"
results_summary_K_select_true_FUN <- results_summary_alpha_known_K_select_true
results_summary_K_select_true_FUN$K_select <- results_summary_K_select_true_FUN$K_select_BGH
names_inter <- intersect(colnames(results_summary_K_select_true_SUN_OU),
intersect(colnames(results_summary_K_select_true_SUN_rBM),
colnames(results_summary_K_select_true_FUN)))
results_summary_K_select_true <- rbind(results_summary_K_select_true_SUN_OU[, names_inter],
results_summary_K_select_true_SUN_rBM[, names_inter],
results_summary_K_select_true_FUN[, names_inter])
rm(results_summary_alpha_known_K_select_true,
results_summary_K_select_true_SUN_OU,
results_summary_K_select_true_SUN_rBM,
results_summary_K_select_true_FUN)
summary_scores_alpha_known_K_select_true$method_inference <- "ak_FUN_rBM"
summary_scores_K_select_true_FUN <- summary_scores_alpha_known_K_select_true
names_inter <- intersect(colnames(summary_scores_K_select_true_SUN_OU),
intersect(colnames(summary_scores_K_select_true_SUN_rBM),
colnames(summary_scores_K_select_true_FUN)))
summary_scores_K_select_true <- rbind(summary_scores_K_select_true_SUN_OU[, names_inter],
summary_scores_K_select_true_SUN_rBM[, names_inter],
summary_scores_K_select_true_FUN[, names_inter])
rm(summary_scores_alpha_known_K_select_true,
summary_scores_K_select_true_SUN_OU,
summary_scores_K_select_true_SUN_rBM,
summary_scores_K_select_true_FUN)
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- format(Sys.time(), "%Y-%m-%d")
save.image(paste0(saveresultfile, "_aranged_alpha_known_SUN_FUN", "-", datestamp_day, ".RData"))
##################################################################
## Merge fixed / random roots, both alpha
##################################################################
rm(list = ls())
## SUN OU
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- "2016-02-19"
ak <- "_both_alpha"
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_K_select_true$method_inference <- "ak_SUN_OU"
results_summary_K_select_true$lambda <- beta_0
results_summary_K_select_true$lambda_estim <- results_summary_K_select_true$beta_0_estim
results_summary_K_select_true_SUN_OU <- results_summary_K_select_true
summary_scores_K_select_true$method_inference <- "ak_SUN_OU"
summary_scores_K_select_true_SUN_OU <- summary_scores_K_select_true
## SUN rBM
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- "2016-02-26"
ak <- "_both_alpha"
load(paste0(saveresultfile, "_SUN_rBM_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_K_select_true$method_inference <- "ak_SUN_rBM"
results_summary_K_select_true_SUN_rBM <- results_summary_K_select_true
results_summary_K_select_true_SUN_rBM$K_select <- results_summary_K_select_true_SUN_rBM$K_select_BGH
summary_scores_K_select_true$method_inference <- "ak_SUN_rBM"
summary_scores_K_select_true_SUN_rBM <- summary_scores_K_select_true
# ## FUN rBM
# saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
# datestamp_day <- "2015-12-04"
# ak <- "_both_alpha"
# load(paste0(saveresultfile, "_FUN_aranged", ak, "-", datestamp_day, ".RData"))
#
# results_summary_K_select_true$method_inference <- "ak_FUN_rBM"
# results_summary_K_select_true_FUN <- results_summary_K_select_true
# results_summary_K_select_true_FUN$K_select <- results_summary_K_select_true_FUN$K_select_BGH
## Merging
names_inter <- intersect(colnames(results_summary_K_select_true_SUN_OU),
colnames(results_summary_K_select_true_SUN_rBM))
# intersect(colnames(results_summary_K_select_true_SUN_rBM),
# colnames(results_summary_K_select_true_FUN)))
results_summary_K_select_true <- rbind(results_summary_K_select_true_SUN_OU[, names_inter],
results_summary_K_select_true_SUN_rBM[, names_inter]
# results_summary_K_select_true_FUN[, names_inter]
)
rm(results_summary_K_select_true_SUN_OU,
results_summary_K_select_true_SUN_rBM
# results_summary_K_select_true_FUN
)
summary_scores_K_select_true$method_inference <- "ak_FUN_rBM"
summary_scores_K_select_true_FUN <- summary_scores_K_select_true
names_inter <- intersect(colnames(summary_scores_K_select_true_SUN_OU),
colnames(summary_scores_K_select_true_SUN_rBM))
# intersect(colnames(summary_scores_K_select_true_SUN_rBM),
# colnames(summary_scores_K_select_true_FUN)))
summary_scores_K_select_true <- rbind(summary_scores_K_select_true_SUN_OU[, names_inter],
summary_scores_K_select_true_SUN_rBM[, names_inter]
# summary_scores_K_select_true_FUN[, names_inter]
)
rm(summary_scores_K_select_true_SUN_OU,
summary_scores_K_select_true_SUN_rBM
# summary_scores_K_select_true_FUN
)
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- format(Sys.time(), "%Y-%m-%d")
save.image(paste0(saveresultfile, "_aranged_SUN_both", "-", datestamp_day, ".RData"))
######################################################################
## Equivalent Solutions
######################################################################
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations_FUN_"
datestamp_day <- "2015-11-20" # 03-17 03-24
ak <- "" # "_alpha_known"
load(paste0(saveresultfile, "aranged", ak, "-", datestamp_day, "_heavy.RData"))
indexes <- which((results_summary_K_select$number_equivalent_solutions > 1))
length(indexes)
simestimations_K_select_eqs <- simestimations_K_select[indexes]
results_summary_K_select_eqs <- results_summary_K_select[indexes,]
save(simestimations_K_select_eqs, results_summary_K_select_eqs, file = paste0(saveresultfile, "equivalent_solutions", ak, "-", datestamp_day, ".RData"))
#################################################################
## Plots
#################################################################
load(paste0(saveresultfile, "_aranged", "_alpha_known-", datestamp_day, "_", inference.index, "_heavy.RData"))
## Process
conf <- 17
pars <- simestimations_alpha_known_K_select[[conf]]
## Clustering of tips and edges
plot_data.process.actual(Y.state = pars$m_Y_estim,
phylo = trees[[paste(pars$ntaxa)]],
params = list(shifts = pars$params_estim$shifts,
optimal.value = pars$params_estim$optimal.value),
adj = 2,
automatic_colors = TRUE)
pars <- simlist[[conf]]
plot_data.process.actual(Y.state = pars$m_Y_data,
phylo = trees[[paste(pars$ntaxa)]],
params = list(shifts = pars$shifts,
optimal.value = beta_0),
adj = 2,
automatic_colors = TRUE)
## Utility functions
gtable_filter <- function (x, pattern, fixed = FALSE, trim = TRUE, complementary = FALSE)
{
matches <- grepl(pattern, x$layout$name, fixed = fixed)
if (complementary) matches <- !matches
x$layout <- x$layout[matches, , drop = FALSE]
x$grobs <- x$grobs[matches]
if (trim)
x <- gtable_trim(x)
x
}
g_table_replace_xlab_strips <- function(g){
xlab <- gtable_filter(g, "xlab", trim = FALSE, complementary = FALSE)
g <- gtable_filter(g, "xlab", trim = FALSE, complementary = TRUE)
matches <- grepl("strip-top", g$layout$name)
g$layout[matches, "t"] <- xlab$layout[1,"t"]
g$layout[matches, "b"] <- xlab$layout[1,"b"]
return(g)
}
g_table_put_right_strips <- function(g, grid){
# Find x label and delete it
xlab <- gtable_filter(g, "xlab", trim = FALSE, complementary = FALSE)
g <- gtable_filter(g, "xlab", trim = FALSE, complementary = TRUE)
# delete unused strips
g <- gtable_filter(g, "strip_t-[4-9]", trim = FALSE, complementary = TRUE)
# deplace used strips
matches <- grepl("strip_t", g$layout$name)
g$layout[matches, "t"] <- xlab$layout[1,"t"]
g$layout[matches, "b"] <- xlab$layout[1,"b"]
# rename used strips
# fun <- function(strip){
# index <- grep("strip.text.x.text", names(strip$children))
# text <- strip$children[[index]]$label
# strip$children[[index]]$label <- sub(".*, ", "", text)
# strip$children[[index]]$label <- lapply(strip$children[[index]]$label,
# function(x) parse(text = x))
# return(strip)
# }
# g$grobs[matches] <- lapply(g$grobs[matches], fun)
# g$grob[matches] <- lapply(g$grob[matches], fun)
return(g)
}
facet_wrap_labeller <- function(g, labels = NULL) {
gg <- g$grobs
strips <- grep("strip_t", names(gg))
for(ii in seq_along(labels)) {
modgrob <- getGrob(gg[[strips[ii]]], "strip.text",
grep=TRUE, global=TRUE)
gg[[strips[ii]]]$children[[modgrob$name]] <- editGrob(modgrob,label=labels[ii])
}
g$grobs <- gg
return(g)
}
my.labeller <- function(variable, value) {
value <- paste(variable, "==", as.character(value))
value <- lapply(value, function(x) parse(text = x))
return(value)
}
## Format data for plots
format_plot <- function(df){
# Save true values
df$alpha.true <- df$alpha
df$gamma.true <- df$gamma
df$K.true <- df$K
# Transformations and names
df[["ln(2)/alpha"]] <- log(2)/df$alpha
df[["sigma^2 == 2*alpha*gamma^2"]] <- 2*df$alpha*df$gamma
# Melt
df_plot <- melt(df,
measure.vars = c("ln(2)/alpha", "sigma^2 == 2*alpha*gamma^2", "K"),
value.name = "parameter.value")
# Supress lines where group != variable
grp_var <- c(alpha_var = "ln(2)/alpha",
gamma_var = "sigma^2 == 2*alpha*gamma^2",
K_var = "K")
fun <- function(z){
if (z["grp"] == "base") return(TRUE)
return(unname(grp_var[z["grp"]] == z["variable"]))
}
masque <- apply(df_plot, 1, fun)
df_plot <- df_plot[masque,]
df_plot$ntaxa <- as.factor(df_plot$ntaxa)
df_plot$parameter.value <- round(df_plot$parameter.value, 2)
df_plot$parameter.value <- as.factor(df_plot$parameter.value)
return(df_plot)
}
summary_scores_K_select_true_plot <- format_plot(summary_scores_K_select_true)
results_summary_K_select_true_plot <- format_plot(results_summary_K_select_true)
## Plot functions
plot_results_score <- function(score, score_name){
p <- ggplot(summary_scores_K_select_true_plot,
aes_string(x = "parameter.value",
y = score,
linetype = "K_type",
color = "alpha_known"))
p <- p + aes(group = interaction(alpha_known, K_type))
p <- p + facet_wrap(ntaxa ~ variable, scales = "free")
p <- p + scale_linetype(name = "K",
labels = c("Known", "Estimated"))
p <- p + geom_line()
p <- p + scale_colour_discrete(name = expression(alpha),
breaks = c(TRUE, FALSE),
labels = c("Known", "Estimated"))
p <- p + labs(x = "",
y = score_name)
#p <- p + scale_x_continuous(trans = log_trans(10))
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1))
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
strip.background = element_blank()
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
g <- ggplotGrob(p)
g <- g_table_put_right_strips(g)
labels <- unname(sapply(levels(summary_scores_K_select_true_plot$variable),
function(z) parse(text = z)))
g <- facet_wrap_labeller(g, labels = labels)
grid.newpage()
grid.draw(g)
}
plot_results_score_smooth <- function(score, score_name, plot_points = TRUE){
replace_name <- function(i){
i <- as.integer(as.character(i))
i <- formatC(i, width = 3, format = "d", flag = "0")
i <- paste0("ntaxa == ", i)
return(i)
}
data <- results_summary_K_select_true_plot
data$ntaxa <- sapply(data$ntaxa, replace_name)
data$ntaxa <- as.factor(data$ntaxa)
p <- ggplot(data,
aes_string(x = "parameter.value",
y = score,
color = "K_type"),
shrink = TRUE)
# p <- p + aes(group = interaction(ntaxa, K_type))
p <- p + facet_grid(ntaxa ~ variable,
labeller = label_parsed,
scales = "free",
shrink = TRUE)
if (plot_points){
p <- p + geom_point(alpha = 0.2,
position = position_jitter(width = 0.4, height = 0))
} else {
sts <- quantile(data[[score]], probs = c(0.05, 0.95)) # Compute lower and upper whisker limits
p = p + coord_cartesian(ylim = c(sts[1], sts[2]))
}
p <- p + geom_boxplot(outlier.size = 0, alpha = 0.5)
p <- p + geom_smooth(aes(group = K_type), method = "loess", se = FALSE, size = 1.2)
p <- p + scale_colour_discrete(name = "Estimation")
p <- p + labs(x = "",
y = score_name)
#p <- p + scale_x_continuous(trans = log_trans(10))
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1))
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
strip.background = element_blank()
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
g <- ggplotGrob(p)
g <- g_table_replace_xlab_strips(g)
grid.newpage()
grid.draw(g)
}
plot_results_K_select_true <- function(name){
replace_name <- function(i){
i <- as.integer(as.character(i))
i <- formatC(i, width = 3, format = "d", flag = "0")
i <- paste0("ntaxa == ", i)
return(i)
}
data <- results_summary_K_select_true_plot
data$ntaxa <- sapply(data$ntaxa, replace_name)
data$ntaxa <- as.factor(data$ntaxa)
data$parameter.value <- data$parameter.value
p <- ggplot(data,
aes_string(x = "parameter.value",
y = "K_select",
color = "alpha_known"))
# p <- p + aes(group = interaction(ntaxa, K_type))
p <- p + facet_wrap(ntaxa ~ variable,
#labeller = label_parsed,
scales = "free",
shrink = TRUE)
# p <- p + geom_point(alpha = 0.2,
# position = position_jitter(width = 0.4, height = 0))
p <- p + geom_boxplot(outlier.size = 0.5, alpha = 0.5)
p <- p + geom_point(aes_string(x = "parameter.value", y = "K.true",
group = interaction("ntaxa", "variable")),
colour = "black", size = 2)
p <- p + scale_colour_discrete(name = expression(alpha),
breaks = c(TRUE, FALSE),
labels = c("Known", "Estimated"))
p <- p + labs(x = "",
y = name)
#p <- p + scale_x_continuous(trans = log_trans(10))
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1))
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
strip.background = element_blank()
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
g <- ggplotGrob(p)
g <- g_table_put_right_strips(g)
labels <- unname(sapply(levels(results_summary_K_select_true_plot$variable),
function(z) parse(text = z)))
g <- facet_wrap_labeller(g, labels = labels)
grid.newpage()
grid.draw(g)
}
plot_results_estim_vs_true <- function(estim, true, name, sub, val){
replace_name <- function(i){
i <- as.integer(as.character(i))
i <- formatC(i, width = 3, format = "d", flag = "0")
i <- paste0("ntaxa == ", i)
return(i)
}
data <- subset(results_summary_K_select_true_plot,
results_summary_K_select_true_plot[, sub] == val)
data$ntaxa <- sapply(data$ntaxa, replace_name)
data$ntaxa <- as.factor(data$ntaxa)
data$parameter.value <- data$parameter.value
if (sub == "K_type"){
p <- ggplot(data,
aes_string(x = "parameter.value",
y = estim,
color = "alpha_known"))
p <- p + scale_colour_discrete(name = expression(alpha),
breaks = c(TRUE, FALSE),
labels = c("Known", "Estimated"))
} else {
p <- ggplot(data,
aes_string(x = "parameter.value",
y = estim,
color = "K_type"))
p <- p + scale_colour_discrete(name = "K",
labels = c("Known", "Estimated"))
}
# p <- p + aes(group = interaction(ntaxa, K_type))
p <- p + facet_wrap(ntaxa ~ variable,
#labeller = label_parsed,
scales = "free",
shrink = TRUE)
# p <- p + geom_point(alpha = 0.2,
# position = position_jitter(width = 0.4, height = 0))
p <- p + geom_boxplot(outlier.size = 0.5, alpha = 0.5)
if (estim == "log_likelihood"){
# p <- p + geom_boxplot(aes_string(x = "parameter.value", y = true),
# colour = "red", outlier.size = 0.5, alpha = 0.1)
p <- p + stat_summary(fun.y = median, geom = "point",
aes_string(x = "parameter.value", y = true,
group = interaction("ntaxa", "variable")),
colour = "black", size = 2)
} else {
p <- p + geom_point(aes_string(x = "parameter.value", y = true,
group = interaction("ntaxa", "variable")),
colour = "black", size = 2)
}
p <- p + labs(x = "",
y = name)
#p <- p + scale_x_continuous(trans = log_trans(10))
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1))
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
strip.background = element_blank()
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
g <- ggplotGrob(p)
g <- g_table_put_right_strips(g)
labels <- unname(sapply(levels(results_summary_K_select_true_plot$variable),
function(z) parse(text = z)))
g <- facet_wrap_labeller(g, labels = labels)
grid.newpage()
grid.draw(g)
}
## Gamma RMSE
plot_results_score("gamma.RMSE/gamma.true",
expression(paste("RMSE(", gamma^2, ")/", gamma^2)))
plot_results_score_smooth("gamma_estim",
expression(paste("Estimated ", gamma^2)))
plot_results_estim_vs_true("gamma_estim", "gamma.true",
expression(gamma^2),
"alpha_known", TRUE)
plot_results_estim_vs_true("gamma_estim", "gamma.true",
expression(gamma^2),
"alpha_known", FALSE)
plot_results_score_alpha("gamma.RMSE/gamma.true",
expression(paste("RMSE(", gamma^2, ")/", gamma^2)))
## beta_0 RMSE
plot_results_score("beta_0.RMSE",
expression(paste("RMSE(", beta[0], ")")))
plot_results_score_smooth("beta_0_estim",
expression(paste("Estimated ", beta[0])))
plot_results_score_alpha("beta_0.RMSE",
expression(paste("RMSE(", beta[0], ")")))
## alpha RMSE
plot_results_score("alpha.RMSE/alpha.true",
expression(paste("RMSE(", alpha, ")/", alpha)))
## log_likelihood
plot_results_score("log_likelihood.mean", "Log Likelihood")
plot_results_score_smooth("log_likelihood", "Log Likelihood")
## ARI
plot_results_score("mean.adjrandind", "ARI")
plot_results_score_smooth("adjrandind", "ARI")
## Euclidian distances
plot_results_score("mean.distance_means", "Euclidian distance")
plot_results_score_smooth("distance_means", "Euclidian distance")#, plot_points = FALSE)
## K match
plot_results_score("K.match", "Percent Exact Match")
## K_s
plot_results_K_select_true("K select")
## Sensibilité, spécificité, précision
plot_results_score_smooth("sensitivity", "Sensitivity")
plot_results_score_smooth("1 - specificity", "False Positive Rate")
plot_results_score_smooth("1 - precision", "False Discovery Rate")
## K_true vs K_select
nbr_inferences_K <- function(df){
len <- unique(floor(sqrt(nta)))
res <- vector(length = len + 1)
names(res) <- 0:len
for (K_e in 0:len){
res[K_e + 1] <- sum(df$K_select == K_e) / length(df$K_select)
}
names(res) <- 0:len
return(res)
}
incidences_K_select <- ddply(results_summary,
.(K, ntaxa),
nbr_inferences_K)
incidences_K_select <- melt(incidences_K_select,
id.vars = c("K", "ntaxa"),
variable.name = "K_select",
value.name = "frequence_observed")
incidences_K_select$K_select <- as.numeric(levels(incidences_K_select$K_select))[incidences_K_select$K_select]
incidences_K_select <- merge(results_summary, incidences_K_select)
p <- ggplot(incidences_K_select, aes(x = K, y = K_select, size = frequence_observed))
p <- p + facet_grid(. ~ ntaxa, labeller = my.labeller)
p <- p + geom_abline(intercept = 0, slope=1)
p <- p + geom_hline(aes(yintercept = floor(sqrt(ntaxa))))
p <- p + geom_smooth(method = "lm", se = FALSE, show_guide=FALSE)
p <- p + geom_point()
p <- p + scale_size(name = "Frequence Estimated")
# breaks = c("FALSE", "TRUE"),
# labels = c("Estimated", "Known"))
p <- p + theme_bw()
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12)
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
p
## Faire un box plot
p <- ggplot(results_summary, aes(x = as.factor(K), y = K_select))
p <- p + facet_grid(. ~ ntaxa, labeller = my.labeller)
p <- p + geom_boxplot()
p <- p + geom_abline(intercept = 0, slope=1)
p <- p + geom_hline(aes(yintercept = floor(sqrt(ntaxa))))
p <- p + geom_smooth(aes(group = 1), method = "lm", se = FALSE, show_guide=FALSE)
p <- p + scale_size(name = "Frequence Estimated")
# breaks = c("FALSE", "TRUE"),
# labels = c("Estimated", "Known"))
p <- p + labs(x = "True K",
y = "Selected K")
p <- p + coord_cartesian(ylim = c(-0.5, 16.5))
p <- p + theme_bw()
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12)
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
p
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.