#############################################
## Exploitation several K
#############################################
##############
## Parameters and functions
##############
require(doParallel)
require(foreach)
require(ape)
#require(glmnet) # For Lasso initialization
require(quadrupen) # For Lasso initialization
require(robustbase) # For robust fitting of alpha
library(TreeSim) # For simulation of the tree
require(ggplot2) # Plot
library(scales) # plot
library(reshape2) # Plot
library(grid) # Plot
library(plyr)
## Alpha known - simulation model
# saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
# datestamp_day <- "2015-03-17"
# ak <- "_alpha_known"
# student <- ""
# load(paste0(saveresultfile, ak, "-", datestamp_day, "_all.RData"))
## Alpha unknown - model simulation
# saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
# datestamp_day <- "2015-03-24"
# ak <- ""
# student <- ""
# load(paste0(saveresultfile, ak, "-", datestamp_day, "_all.RData"))
# ## Alpha unknown
# saveresultfile = "../Results/Simulations_Student/student_estimations"
# datestamp_day <- "2015-09-09"
# ak <- ""
# student <- "_df5"
# favorable <- "favorables"
# load(paste0(saveresultfile, favorable, student, ak, "-", datestamp_day, "_all.RData"))
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- "2015-03-17" # 17 24
ak <- "_alpha_known" # ""
load(paste0(saveresultfile, ak, "-", datestamp_day, "_all.RData"))
## Source functions
source("R/simulate.R")
source("R/estimateEM.R")
source("R/init_EM.R")
source("R/E_step.R")
source("R/M_step.R")
source("R/shutoff.R")
source("R/generic_functions.R")
source("R/shifts_manipulations.R")
source("R/plot_functions.R")
source("R/parsimonyNumber.R")
source("R/partitionsNumber.R")
source("R/model_selection.R")
# saveresultfile = "../Results/Simulations_Several_K/several_K"
# datestamp_day <- "2015-03-31_0"
#
# ak <- "_alpha_0" # "_alpha_known"
#
# load(paste0(saveresultfile, ak, "-", datestamp_day, ".RData"))
#########################################################
## Time computation
#########################################################
add_total_time <- function(simest){
simest$results_summary$total_time <- sum(simest$results_summary$time)
return(simest)
}
simests <- as.name(paste0("simestimations", favorable, student, ak))
assign(paste(simests), lapply(eval(simests), add_total_time))
#########################################################
## Model Selection
#########################################################
## Compute penalty and criteria for each set of parameters
penalty <- function(K_try, complexity, ntaxa){
return(1/2 * penalty_BaraudGiraudHuet_likelihood(K_try,
complexity,
ntaxa,
C = 1.1))
}
criteria <- function(ll, pen){
return(-ll + pen)
}
selected_K <- function(crit_ll, K_try){
K_try[which.min(crit_ll)]
}
add_crit_and_pen <- function(z){
z$pen_ll <- penalty(z$K_try,
z$complexity,
unique(z$ntaxa))
z$crit_ll <- criteria(z$log_likelihood,
z$pen_ll)
z$K_select <- z$K_try[which.min(z$crit_ll)]
return(z)
}
## Find K_select and add it to the list of results
add_K_select_to_list <- function(simest){
simest$results_summary <- ddply(simest$results_summary,
.(alpha, gamma, K, n, ntaxa, grp),
add_crit_and_pen)
simest$K_select <- unique(simest$results_summary$K_select)
return(simest)
}
assign(paste(simests), lapply(eval(simests), add_K_select_to_list))
#########################################################
## Model Selection Birge Massart Capushe
#########################################################
library(capushe)
## Compute penalty and criteria for each set of parameters
pen_shape_BM1 <- function(K_try, complexity){
return(penalty_BirgeMassart_shape1(K_try,
p = 1,
complexity,
B = 0.1))
}
pen_shape_BM2 <- function(K_try, complexity){
return(penalty_BirgeMassart_shape2(K_try,
p = 1,
complexity,
C = 2.5))
}
model_selection_capushe <- function(K_try, complexity, log_likelihood, pen_shape){
## If not enaugh data (ntaxa = 64), return NAs
if (length(K_try) < 11){
return(list(pen_DDSE = NA,
crit_DDSE = NA,
pen_Djump = NA,
crit_Djump = NA))
}
## Format data for capushe
data_capushe <- data.frame(names = K_try,
pen_shape = pen_shape,
complexity = complexity,
contrast = -log_likelihood)
## Capushe
cap_res <- capushe(data_capushe)
## Assign results
# DDSE
pen_DDSE <- 2 * cap_res@DDSE@interval$interval["max"] * pen_shape
crit_DDSE <- data_capushe$contrast + pen_DDSE
# Djump
pen_Djump <- cap_res@Djump@ModelHat$Kopt * pen_shape
crit_Djump <- data_capushe$contrast + pen_Djump
return(list(pen_DDSE = pen_DDSE,
crit_DDSE = crit_DDSE,
pen_Djump = pen_Djump,
crit_Djump = crit_Djump))
}
selected_K <- function(crit_ll, K_try){
if (anyNA(crit_ll)) return(NA)
return(K_try[which.min(crit_ll)])
}
add_crit_and_pen_BirgeMassart1 <- function(z){
pen_shape <- pen_shape_BM1(z$K_try, z$complexity)
tmp <- model_selection_capushe(z$K_try, z$complexity, z$log_likelihood,
pen_shape)
z$pen_BM1_DDSE <- tmp$pen_DDSE
z$crit_BM1_DDSE <- tmp$crit_DDSE
z$pen_BM1_Djump <- tmp$pen_Djump
z$crit_BM1_Djump <- tmp$crit_Djump
z$K_select_BM1_DDSE <- selected_K(z$crit_BM1_DDSE, z$K_try)
z$K_select_BM1_Djump <- selected_K(z$crit_BM1_Djump, z$K_try)
return(z)
}
add_crit_and_pen_BirgeMassart2 <- function(z){
pen_shape <- pen_shape_BM2(z$K_try, z$complexity)
tmp <- model_selection_capushe(z$K_try, z$complexity, z$log_likelihood,
pen_shape)
z$pen_BM2_DDSE <- tmp$pen_DDSE
z$crit_BM2_DDSE <- tmp$crit_DDSE
z$pen_BM2_Djump <- tmp$pen_Djump
z$crit_BM2_Djump <- tmp$crit_Djump
z$K_select_BM2_DDSE <- selected_K(z$crit_BM2_DDSE, z$K_try)
z$K_select_BM2_Djump <- selected_K(z$crit_BM2_Djump, z$K_try)
return(z)
}
## Find K_select and add it to the list of results
add_K_select_to_list_BirgeMassart <- function(simest){
simest$results_summary <- ddply(simest$results_summary,
.(alpha, gamma, K, n, ntaxa, grp),
add_crit_and_pen_BirgeMassart1)
simest$results_summary <- ddply(simest$results_summary,
.(alpha, gamma, K, n, ntaxa, grp),
add_crit_and_pen_BirgeMassart2)
simest$K_select_BM1_DDSE <- unique(simest$results_summary$K_select_BM1_DDSE)
simest$K_select_BM1_Djump <- unique(simest$results_summary$K_select_BM1_Djump)
simest$K_select_BM2_DDSE <- unique(simest$results_summary$K_select_BM2_DDSE)
simest$K_select_BM2_Djump <- unique(simest$results_summary$K_select_BM2_Djump)
return(simest)
}
assign(paste(simests), lapply(eval(simests), add_K_select_to_list_BirgeMassart))
######################
## Extract inferences with K_true or K_select
######################
## Extract the results where K_try = K_e (K_true or something else)
extract_result <- function(simest, Ke){
K_try <- 0:(length(simest$params_estim) - 1)
if (Ke >= length(simest$params_estim)) return(NULL)
fun <- function(l){
if (Ke %in% names(l)) return(l[[paste(Ke)]])
return(l)
}
res <- lapply(simest, fun)
res$results_summary <- subset(simest$results_summary, K_try == Ke)
return(res)
}
## Extract K_true
extract_result_K_true <- function(simest){
K_true <- simest$K
return(extract_result(simest, K_true))
}
simests_K_true <- as.name(paste0("simestimations", favorable, student, ak, "_K_true"))
assign(paste(simests_K_true), lapply(eval(simests), extract_result_K_true))
## Extract K_select
extract_result_K_select <- function(simest){
K_select <- simest$K_select
return(extract_result(simest, K_select))
}
simests_K_select <- as.name(paste0("simestimations", favorable, student, ak, "_K_select"))
assign(paste(simests_K_select), lapply(eval(simests), extract_result_K_select))
# #########################################################
# ## Computation of means at tips
# #########################################################
# compute_mean_estim <- function(simest){
# if (is.null(simest)){
# return(NULL)
# } else {
# XX <- simulate_internal(phylo = trees[[paste(simest$ntaxa)]],
# process = "OU",
# root.state = simest$params_estim$root.state,
# variance = simest$params_estim$variance,
# shifts = simest$params_estim$shifts,
# selection.strength = simest$params_estim$selection.strength,
# optimal.value = simest$params_estim$optimal.value)
# m_Y_estim <- extract_simulate_internal(XX, where="tips", what="expectations")
# simest$m_Y_estim <- m_Y_estim
# return(simest)
# }
# }
#
# simestimations_alpha_known_K_true <- lapply(simestimations_alpha_known_K_true,
# compute_mean_estim)
# simestimations_alpha_known_K_select<-lapply(simestimations_alpha_known_K_select,
# compute_mean_estim)
###########################################
## Scores involving lists
## (Compute complex score and add them to result summary for each element of the list)
###########################################
sensitivity <- function(tr.edges, est.edges) { # Sensibilité
if (is.null(tr.edges) ) return(NA) # If no true shift at all, any shift found is misplaced
if (is.null(est.edges)) return(0) # If no estimated shift, TP = 0
return(sum(est.edges %in% tr.edges)/length(tr.edges))
}
specificity <- function(tr.edges, est.edges, nedges) { # spécificité
edges <- 1:nedges
if (is.null(est.edges)) est.edges <- nedges + 1
if (is.null(tr.edges)) tr.edges <- nedges + 1
return(sum(edges[-est.edges] %in% edges[-tr.edges])/length(edges[-tr.edges]))
}
precision <- function(tr.edges, est.edges) { # Precision
if (is.null(tr.edges) ) return(NA) # If no true shift at all, any shift found is misplaced
if (is.null(est.edges)) return(0) # If no estimated shift, TP = 0
return(sum(est.edges %in% tr.edges)/length(est.edges))
}
match.partitions <- function(tr, est, nta) {
tree <- trees[[paste0(nta)]]
part.list <- subtree.list[[paste0(nta)]]
## computes ARI between partitions induced by edges in tr$edges and est$edges
# if (is.null(est$edges)){
# return(-1) # If lasso initialization failed
# } else {
require(mclust)
part1 <- clusters_from_shifts(tree, tr$edges, part.list)
part2 <- clusters_from_shifts(tree, est$edges, part.list)
return(adjustedRandIndex(part1, part2))
# }
}
euclidian_distance <- function(m1, m2){
return(sum((m2 - m1)^2))
}
compute_list_scores_element <- function(simest){
if (is.null(simest)){ # || !simest$results_summary$CV_estim){
# return(data.frame(mean.edge.quality = NA, edge.recovery = NA,
# edge.init.recovery = NA, adjrandind = NA, adjrandind.init = NA))
return(NULL)
} else {
df <- data.frame(
mean.edge.quality = mean(simest$edge.quality),
sensitivity = sensitivity(simest$shifts$edges,
simest$params_estim$shifts$edges),
sensitivity.init = sensitivity(simest$shifts$edges,
simest$params_init_estim$shifts$edges),
specificity = specificity(simest$shifts$edges,
simest$params_estim$shifts$edges,
length(simest$Z_data) + length(simest$Y_data) - 1),
specificity.init = specificity(simest$shifts$edges,
simest$params_init_estim$shifts$edges,
length(simest$Z_data) + length(simest$Y_data) - 1),
precision = precision(simest$shifts$edges,
simest$params_estim$shifts$edges),
precision.init = precision(simest$shifts$edges,
simest$params_init_estim$shifts$edges),
adjrandind = match.partitions(simest$shifts,
simest$params_estim$shifts,
simest$ntaxa),
adjrandind.init = match.partitions(simest$shifts,
simest$params_init_estim$shifts,
simest$ntaxa),
distance_means = euclidian_distance(simest$m_Y_data,
simest$m_Y_estim)
)
simest$results_summary <- cbind(simest$results_summary, df)
return(simest)
}
}
assign(paste(simests_K_true),
lapply(eval(simests_K_true), compute_list_scores_element))
assign(paste(simests_K_select),
lapply(eval(simests_K_select), compute_list_scores_element))
######################
## Format the data
######################
extract_data_frame <- function(simests){
dd <- do.call(rbind, simests)
results_summary <- as.data.frame(do.call(rbind, dd[,"results_summary"]))
return(results_summary)
}
res_sum <- as.name(paste0("results_summary", favorable, student, ak))
res_sum_K_true <- as.name(paste0("results_summary", favorable, student, ak, "_K_true"))
res_sum_K_select <- as.name(paste0("results_summary", favorable, student, ak, "_K_select"))
res_sum_K_select_true <- as.name(paste0("results_summary", favorable, student, ak, "_K_select_true"))
# assign(paste(res_sum), extract_data_frame(eval(simests)))
assign(paste(res_sum_K_true), extract_data_frame(eval(simests_K_true)))
assign(paste(res_sum_K_select), extract_data_frame(eval(simests_K_select)))
assign(paste0(res_sum_K_true),
transform(eval(res_sum_K_true), K_type = "K_true"))
assign(paste0(res_sum_K_select),
transform(eval(res_sum_K_select), K_type = "K_select"))
assign(paste(res_sum_K_select_true), rbind(eval(res_sum_K_true),
eval(res_sum_K_select)))
comp_time <- as.name(paste0("computation_time", favorable, student, ak))
assign(paste(comp_time),
sum(eval(res_sum_K_true)$total_time))
m_time <- as.name(paste0("mean_time", favorable, student, ak))
assign(paste(m_time),
mean(eval(res_sum_K_true)$total_time))
######################################################################
## Sumarizing Function
######################################################################
## Simple Scores
RMSE <- function(y, true.value) {
return(sqrt(mean((y - true.value)^2)))
}
compute_scores <- function(res_sum){
# Combinations of "true" parameters (no n)
# Aggregation of parameter estimates conditonal upon convergence of EM procedure
return(ddply(subset(res_sum, CV_estim == 1),
.(alpha, gamma, K, ntaxa, grp),
summarize,
## Simple Scores
alpha.mean = mean(alpha_estim),
alpha.sd = sd(alpha_estim),
alpha.RMSE = RMSE(alpha_estim, unique(alpha)),
alpha.init.RMSE = RMSE(alpha_estim_init, unique(alpha)),
gamma.mean = mean(gamma_estim),
gamma.sd = sd(gamma_estim),
gamma.RMSE = RMSE(gamma_estim, unique(gamma)),
gamma.init.RMSE = RMSE(gamma_estim_init, unique(gamma)),
beta_0.mean = mean(beta_0_estim),
beta_0.sd = sd(beta_0_estim),
beta_0.RMSE = RMSE(beta_0_estim, beta_0),
beta_0.init.RMSE = RMSE(beta_0_estim_init, beta_0),
log_likelihood.mean = mean(log_likelihood),
log_likelihood.mean.init = mean(log_likelihood_init),
mean_number_new_shifts.mean = mean(mean_number_new_shifts),
number_equivalent_solutions.mean = mean(number_equivalent_solutions),
difficulty.mean = mean(difficulty),
EM_steps.mean = mean(EM_steps, na.rm = TRUE),
K.match = mean(K == K_try),
K.match.leq = mean(K >= K_try),
## List Scores
mean.edge.quality.mean = mean(mean.edge.quality),
sensitivity.mean = mean(sensitivity),
sensitivity.init.mean = mean(sensitivity.init),
specificity.mean = mean(specificity),
specificity.init.mean = mean(specificity.init),
precision.mean = mean(precision),
precision.init.mean = mean(precision.init),
mean.adjrandind = mean(adjrandind),
mean.adjrandind.init = mean(adjrandind.init),
mean.distance_means = mean(distance_means),
total_time.mean = mean(total_time)
))
}
sum_scores_K_true <- as.name(paste0("summary_scores", favorable, student, ak, "_K_true"))
sum_scores_K_select <- as.name(paste0("summary_scores", favorable, student, ak, "_K_select"))
sum_scores_K_select_true <- as.name(paste0("summary_scores", favorable, student, ak, "_K_select_true"))
assign(paste(sum_scores_K_true), compute_scores(eval(res_sum_K_true)))
assign(paste(sum_scores_K_select), compute_scores(eval(res_sum_K_select)))
assign(paste0(sum_scores_K_true),
transform(eval(sum_scores_K_true), K_type = "K_true"))
assign(paste0(sum_scores_K_select),
transform(eval(sum_scores_K_select), K_type = "K_select"))
assign(paste(sum_scores_K_select_true),
rbind(eval(sum_scores_K_true),
eval(sum_scores_K_select)))
rm(list = c(paste(sum_scores_K_true), paste(sum_scores_K_select)))
save.image(paste0(saveresultfile, "_aranged", favorable, student, ak, "-", datestamp_day, "_heavy.RData"))
rm(list = c("simparams_alpha", "simparams_base",
"simparams_gamma", "simparams_K",
"comp_time", "m_time",
paste(simests), "simests",
paste(simests_K_true), "simests_K_true",
paste(simests_K_select), "simests_K_select",
paste(res_sum_K_true), "res_sum_K_true",
paste(res_sum_K_select), "res_sum_K_select"))
save.image(paste0(saveresultfile, "_aranged", favorable, student, ak, "-", datestamp_day, ".RData"))
##################################################################
## Merge alpha known and unknown
##################################################################
rm(list = ls())
## Alpha unknown - Model simulation
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- "2015-03-24"
ak <- ""
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_K_select_true$alpha_known <- FALSE
results_summary_K_select_true$student <- Inf
summary_scores_K_select_true$alpha_known <- FALSE
summary_scores_K_select_true$student <- Inf
## Alpha known - Model simulation
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- "2015-03-17"
ak <- "_alpha_known"
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, ".RData"))
results_summary_alpha_known_K_select_true$alpha_known <- TRUE
results_summary_alpha_known_K_select_true$student <- Inf
summary_scores_alpha_known_K_select_true$alpha_known <- TRUE
summary_scores_alpha_known_K_select_true$student <- Inf
## Alpha unknown - Student simulation
saveresultfile = "../Results/Simulations_Student/student_estimations"
datestamp_day <- "2015-09-09"
ak <- ""
favorable <- "favorables"
student <- "_df5"
load(paste0(saveresultfile, "_aranged", favorable, student, ak, "-", datestamp_day, ".RData"))
results_summaryfavorables_df5_K_select_true$alpha_known <- FALSE
results_summaryfavorables_df5_K_select_true$student <- 5
summary_scoresfavorables_df5_K_select_true$alpha_known <- FALSE
summary_scoresfavorables_df5_K_select_true$student <- 5
names_int <- intersect(names(results_summaryfavorables_df5_K_select_true),
c(names(results_summary_K_select_true),
names(results_summary_alpha_known_K_select_true)))
results_summary_K_select_true <- rbind(results_summary_K_select_true[, names_int],
results_summary_alpha_known_K_select_true[, names_int],
results_summaryfavorables_df5_K_select_true[, names_int])
rm(results_summary_alpha_known_K_select_true, results_summaryfavorables_df5_K_select_true)
names_int <- intersect(names(summary_scoresfavorables_df5_K_select_true),
c(names(summary_scores_K_select_true),
names(summary_scores_alpha_known_K_select_true)))
summary_scores_K_select_true <- rbind(summary_scores_K_select_true[, names_int],
summary_scores_alpha_known_K_select_true[, names_int],
summary_scoresfavorables_df5_K_select_true[, names_int])
rm(summary_scores_alpha_known_K_select_true, summary_scoresfavorables_df5_K_select_true)
datestamp_day <- format(Sys.time(), "%Y-%m-%d")
save.image(paste0(saveresultfile, "_aranged_both_alpha", "-", datestamp_day, ".RData"))
######################################################################
## Equivalent Solutions
######################################################################
saveresultfile = "../Results/Simulations_Several_K/several_K_estimations"
datestamp_day <- "2015-03-24" # 17
ak <- "" # "_alpha_known"
load(paste0(saveresultfile, "_aranged", ak, "-", datestamp_day, "_heavy.RData"))
indexes <- which((results_summary_K_select$number_equivalent_solutions > 1))
length(indexes)
simestimations_K_select_eqs <- simestimations_K_select[indexes]
results_summary_K_select_eqs <- results_summary_K_select[indexes,]
save(simestimations_K_select_eqs, results_summary_K_select_eqs, file = paste0(saveresultfile, "_equivalent_solutions", ak, "-", datestamp_day, ".RData"))
#################################################################
## Plots
#################################################################
load(paste0(saveresultfile, "_aranged", "_alpha_known-", datestamp_day, "_", inference.index, "_heavy.RData"))
## Process
conf <- 17
pars <- simestimations_alpha_known_K_select[[conf]]
## Clustering of tips and edges
plot_data.process.actual(Y.state = pars$m_Y_estim,
phylo = trees[[paste(pars$ntaxa)]],
params = list(shifts = pars$params_estim$shifts,
optimal.value = pars$params_estim$optimal.value),
adj = 2,
automatic_colors = TRUE)
pars <- simlist[[conf]]
plot_data.process.actual(Y.state = pars$m_Y_data,
phylo = trees[[paste(pars$ntaxa)]],
params = list(shifts = pars$shifts,
optimal.value = beta_0),
adj = 2,
automatic_colors = TRUE)
## Utility functions
gtable_filter <- function (x, pattern, fixed = FALSE, trim = TRUE, complementary = FALSE)
{
matches <- grepl(pattern, x$layout$name, fixed = fixed)
if (complementary) matches <- !matches
x$layout <- x$layout[matches, , drop = FALSE]
x$grobs <- x$grobs[matches]
if (trim)
x <- gtable_trim(x)
x
}
g_table_replace_xlab_strips <- function(g){
xlab <- gtable_filter(g, "xlab", trim = FALSE, complementary = FALSE)
g <- gtable_filter(g, "xlab", trim = FALSE, complementary = TRUE)
matches <- grepl("strip-top", g$layout$name)
g$layout[matches, "t"] <- xlab$layout[1,"t"]
g$layout[matches, "b"] <- xlab$layout[1,"b"]
return(g)
}
g_table_put_right_strips <- function(g, grid){
# Find x label and delete it
xlab <- gtable_filter(g, "xlab", trim = FALSE, complementary = FALSE)
g <- gtable_filter(g, "xlab", trim = FALSE, complementary = TRUE)
# delete unused strips
g <- gtable_filter(g, "strip_t-[4-9]", trim = FALSE, complementary = TRUE)
# deplace used strips
matches <- grepl("strip_t", g$layout$name)
g$layout[matches, "t"] <- xlab$layout[1,"t"]
g$layout[matches, "b"] <- xlab$layout[1,"b"]
# rename used strips
# fun <- function(strip){
# index <- grep("strip.text.x.text", names(strip$children))
# text <- strip$children[[index]]$label
# strip$children[[index]]$label <- sub(".*, ", "", text)
# strip$children[[index]]$label <- lapply(strip$children[[index]]$label,
# function(x) parse(text = x))
# return(strip)
# }
# g$grobs[matches] <- lapply(g$grobs[matches], fun)
# g$grob[matches] <- lapply(g$grob[matches], fun)
return(g)
}
facet_wrap_labeller <- function(g, labels = NULL) {
gg <- g$grobs
strips <- grep("strip_t", names(gg))
for(ii in seq_along(labels)) {
modgrob <- getGrob(gg[[strips[ii]]], "strip.text",
grep=TRUE, global=TRUE)
gg[[strips[ii]]]$children[[modgrob$name]] <- editGrob(modgrob,label=labels[ii])
}
g$grobs <- gg
return(g)
}
my.labeller <- function(variable, value) {
value <- paste(variable, "==", as.character(value))
value <- lapply(value, function(x) parse(text = x))
return(value)
}
## Format data for plots
format_plot <- function(df){
# Save true values
df$alpha.true <- df$alpha
df$gamma.true <- df$gamma
df$K.true <- df$K
# Transformations and names
df[["ln(2)/alpha"]] <- log(2)/df$alpha
df[["sigma^2 == 2*alpha*gamma^2"]] <- 2*df$alpha*df$gamma
# Melt
df_plot <- melt(df,
measure.vars = c("ln(2)/alpha", "sigma^2 == 2*alpha*gamma^2", "K"),
value.name = "parameter.value")
# Supress lines where group != variable
grp_var <- c(alpha_var = "ln(2)/alpha",
gamma_var = "sigma^2 == 2*alpha*gamma^2",
K_var = "K")
fun <- function(z){
if (z["grp"] == "base") return(TRUE)
return(unname(grp_var[z["grp"]] == z["variable"]))
}
masque <- apply(df_plot, 1, fun)
df_plot <- df_plot[masque,]
df_plot$ntaxa <- as.factor(df_plot$ntaxa)
df_plot$parameter.value <- round(df_plot$parameter.value, 2)
df_plot$parameter.value <- as.factor(df_plot$parameter.value)
return(df_plot)
}
summary_scores_K_select_true_plot <- format_plot(summary_scores_K_select_true)
results_summary_K_select_true_plot <- format_plot(results_summary_K_select_true)
## Plot functions
plot_results_score <- function(score, score_name){
p <- ggplot(summary_scores_K_select_true_plot,
aes_string(x = "parameter.value",
y = score,
linetype = "K_type",
color = "alpha_known"))
p <- p + aes(group = interaction(alpha_known, K_type))
p <- p + facet_wrap(ntaxa ~ variable, scales = "free")
p <- p + scale_linetype(name = "K",
labels = c("Known", "Estimated"))
p <- p + geom_line()
p <- p + scale_colour_discrete(name = expression(alpha),
breaks = c(TRUE, FALSE),
labels = c("Known", "Estimated"))
p <- p + labs(x = "",
y = score_name)
#p <- p + scale_x_continuous(trans = log_trans(10))
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1))
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
strip.background = element_blank()
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
g <- ggplotGrob(p)
g <- g_table_put_right_strips(g)
labels <- unname(sapply(levels(summary_scores_K_select_true_plot$variable),
function(z) parse(text = z)))
g <- facet_wrap_labeller(g, labels = labels)
grid.newpage()
grid.draw(g)
}
plot_results_score_smooth <- function(score, score_name, plot_points = TRUE){
replace_name <- function(i){
i <- as.integer(as.character(i))
i <- formatC(i, width = 3, format = "d", flag = "0")
i <- paste0("ntaxa == ", i)
return(i)
}
data <- results_summary_K_select_true_plot
data$ntaxa <- sapply(data$ntaxa, replace_name)
data$ntaxa <- as.factor(data$ntaxa)
p <- ggplot(data,
aes_string(x = "parameter.value",
y = score,
color = "K_type"),
shrink = TRUE)
# p <- p + aes(group = interaction(ntaxa, K_type))
p <- p + facet_grid(ntaxa ~ variable,
labeller = label_parsed,
scales = "free",
shrink = TRUE)
if (plot_points){
p <- p + geom_point(alpha = 0.2,
position = position_jitter(width = 0.4, height = 0))
} else {
sts <- quantile(data[[score]], probs = c(0.05, 0.95)) # Compute lower and upper whisker limits
p = p + coord_cartesian(ylim = c(sts[1], sts[2]))
}
p <- p + geom_boxplot(outlier.size = 0, alpha = 0.5)
p <- p + geom_smooth(aes(group = K_type), method = "loess", se = FALSE, size = 1.2)
p <- p + scale_colour_discrete(name = "Estimation")
p <- p + labs(x = "",
y = score_name)
#p <- p + scale_x_continuous(trans = log_trans(10))
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1))
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
strip.background = element_blank()
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
g <- ggplotGrob(p)
g <- g_table_replace_xlab_strips(g)
grid.newpage()
grid.draw(g)
}
plot_results_K_select_true <- function(name){
replace_name <- function(i){
i <- as.integer(as.character(i))
i <- formatC(i, width = 3, format = "d", flag = "0")
i <- paste0("ntaxa == ", i)
return(i)
}
data <- results_summary_K_select_true_plot
data$ntaxa <- sapply(data$ntaxa, replace_name)
data$ntaxa <- as.factor(data$ntaxa)
data$parameter.value <- data$parameter.value
p <- ggplot(data,
aes_string(x = "parameter.value",
y = "K_select",
color = "alpha_known"))
# p <- p + aes(group = interaction(ntaxa, K_type))
p <- p + facet_wrap(ntaxa ~ variable,
#labeller = label_parsed,
scales = "free",
shrink = TRUE)
# p <- p + geom_point(alpha = 0.2,
# position = position_jitter(width = 0.4, height = 0))
p <- p + geom_boxplot(outlier.size = 0.5, alpha = 0.5)
p <- p + geom_point(aes_string(x = "parameter.value", y = "K.true",
group = interaction("ntaxa", "variable")),
colour = "black", size = 2)
p <- p + scale_colour_discrete(name = expression(alpha),
breaks = c(TRUE, FALSE),
labels = c("Known", "Estimated"))
p <- p + labs(x = "",
y = name)
#p <- p + scale_x_continuous(trans = log_trans(10))
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1))
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
strip.background = element_blank()
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
g <- ggplotGrob(p)
g <- g_table_put_right_strips(g)
labels <- unname(sapply(levels(results_summary_K_select_true_plot$variable),
function(z) parse(text = z)))
g <- facet_wrap_labeller(g, labels = labels)
grid.newpage()
grid.draw(g)
}
plot_results_estim_vs_true <- function(estim, true, name, sub, val){
replace_name <- function(i){
i <- as.integer(as.character(i))
i <- formatC(i, width = 3, format = "d", flag = "0")
i <- paste0("ntaxa == ", i)
return(i)
}
data <- subset(results_summary_K_select_true_plot,
results_summary_K_select_true_plot[, sub] == val)
data$ntaxa <- sapply(data$ntaxa, replace_name)
data$ntaxa <- as.factor(data$ntaxa)
data$parameter.value <- data$parameter.value
if (sub == "K_type"){
p <- ggplot(data,
aes_string(x = "parameter.value",
y = estim,
color = "alpha_known"))
p <- p + scale_colour_discrete(name = expression(alpha),
breaks = c(TRUE, FALSE),
labels = c("Known", "Estimated"))
} else {
p <- ggplot(data,
aes_string(x = "parameter.value",
y = estim,
color = "K_type"))
p <- p + scale_colour_discrete(name = "K",
labels = c("Known", "Estimated"))
}
# p <- p + aes(group = interaction(ntaxa, K_type))
p <- p + facet_wrap(ntaxa ~ variable,
#labeller = label_parsed,
scales = "free",
shrink = TRUE)
# p <- p + geom_point(alpha = 0.2,
# position = position_jitter(width = 0.4, height = 0))
p <- p + geom_boxplot(outlier.size = 0.5, alpha = 0.5)
if (estim == "log_likelihood"){
# p <- p + geom_boxplot(aes_string(x = "parameter.value", y = true),
# colour = "red", outlier.size = 0.5, alpha = 0.1)
p <- p + stat_summary(fun.y = median, geom = "point",
aes_string(x = "parameter.value", y = true,
group = interaction("ntaxa", "variable")),
colour = "black", size = 2)
} else {
p <- p + geom_point(aes_string(x = "parameter.value", y = true,
group = interaction("ntaxa", "variable")),
colour = "black", size = 2)
}
p <- p + labs(x = "",
y = name)
#p <- p + scale_x_continuous(trans = log_trans(10))
p <- p + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1))
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12),
strip.background = element_blank()
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
g <- ggplotGrob(p)
g <- g_table_put_right_strips(g)
labels <- unname(sapply(levels(results_summary_K_select_true_plot$variable),
function(z) parse(text = z)))
g <- facet_wrap_labeller(g, labels = labels)
grid.newpage()
grid.draw(g)
}
## Gamma RMSE
plot_results_score("gamma.RMSE/gamma.true",
expression(paste("RMSE(", gamma^2, ")/", gamma^2)))
plot_results_score_smooth("gamma_estim",
expression(paste("Estimated ", gamma^2)))
plot_results_estim_vs_true("gamma_estim", "gamma.true",
expression(gamma^2),
"alpha_known", TRUE)
plot_results_estim_vs_true("gamma_estim", "gamma.true",
expression(gamma^2),
"alpha_known", FALSE)
plot_results_score_alpha("gamma.RMSE/gamma.true",
expression(paste("RMSE(", gamma^2, ")/", gamma^2)))
## beta_0 RMSE
plot_results_score("beta_0.RMSE",
expression(paste("RMSE(", beta[0], ")")))
plot_results_score_smooth("beta_0_estim",
expression(paste("Estimated ", beta[0])))
plot_results_score_alpha("beta_0.RMSE",
expression(paste("RMSE(", beta[0], ")")))
## alpha RMSE
plot_results_score("alpha.RMSE/alpha.true",
expression(paste("RMSE(", alpha, ")/", alpha)))
## log_likelihood
plot_results_score("log_likelihood.mean", "Log Likelihood")
plot_results_score_smooth("log_likelihood", "Log Likelihood")
## ARI
plot_results_score("mean.adjrandind", "ARI")
plot_results_score_smooth("adjrandind", "ARI")
## Euclidian distances
plot_results_score("mean.distance_means", "Euclidian distance")
plot_results_score_smooth("distance_means", "Euclidian distance")#, plot_points = FALSE)
## K match
plot_results_score("K.match", "Percent Exact Match")
## K_s
plot_results_K_select_true("K select")
## Sensibilité, spécificité, précision
plot_results_score_smooth("sensitivity", "Sensitivity")
plot_results_score_smooth("1 - specificity", "False Positive Rate")
plot_results_score_smooth("1 - precision", "False Discovery Rate")
## K_true vs K_select
nbr_inferences_K <- function(df){
len <- unique(floor(sqrt(nta)))
res <- vector(length = len + 1)
names(res) <- 0:len
for (K_e in 0:len){
res[K_e + 1] <- sum(df$K_select == K_e) / length(df$K_select)
}
names(res) <- 0:len
return(res)
}
incidences_K_select <- ddply(results_summary,
.(K, ntaxa),
nbr_inferences_K)
incidences_K_select <- melt(incidences_K_select,
id.vars = c("K", "ntaxa"),
variable.name = "K_select",
value.name = "frequence_observed")
incidences_K_select$K_select <- as.numeric(levels(incidences_K_select$K_select))[incidences_K_select$K_select]
incidences_K_select <- merge(results_summary, incidences_K_select)
p <- ggplot(incidences_K_select, aes(x = K, y = K_select, size = frequence_observed))
p <- p + facet_grid(. ~ ntaxa, labeller = my.labeller)
p <- p + geom_abline(intercept = 0, slope=1)
p <- p + geom_hline(aes(yintercept = floor(sqrt(ntaxa))))
p <- p + geom_smooth(method = "lm", se = FALSE, show_guide=FALSE)
p <- p + geom_point()
p <- p + scale_size(name = "Frequence Estimated")
# breaks = c("FALSE", "TRUE"),
# labels = c("Estimated", "Known"))
p <- p + theme_bw()
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12)
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
p
## Faire un box plot
p <- ggplot(results_summary, aes(x = as.factor(K), y = K_select))
p <- p + facet_grid(. ~ ntaxa, labeller = my.labeller)
p <- p + geom_boxplot()
p <- p + geom_abline(intercept = 0, slope=1)
p <- p + geom_hline(aes(yintercept = floor(sqrt(ntaxa))))
p <- p + geom_smooth(aes(group = 1), method = "lm", se = FALSE, show_guide=FALSE)
p <- p + scale_size(name = "Frequence Estimated")
# breaks = c("FALSE", "TRUE"),
# labels = c("Estimated", "Known"))
p <- p + labs(x = "True K",
y = "Selected K")
p <- p + coord_cartesian(ylim = c(-0.5, 16.5))
p <- p + theme_bw()
p <- p + theme(axis.text = element_text(size = 12),
strip.text = element_text(size = 12)
##legend.position = c(0, 1),
##legend.justification = c(0, 1)
)
p
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.