#' Select a subset of models
#'
#' @param instance instance generated by \code{sample_mle}
#' @param methods character, allowed methods e.g. "glmnet|xgbTree", if NA all methods are allowed
#' (i.e. NA is a shortcut to "glmnet|xgbTree|rpartCost|svmLinearWeights2")
#' @param M integer, number of models
#' @param M.start integer, first model; if M.start=NA, models are choosen randomly according to M.probs
#' @param M.probs character, one of "uniform", "learn.theta", "learn.theta.squared"
#' @param M.seed integer, random seed for random draw of models
select_setM <- function(instance,
methods = NA,
M=200,
M.start = NA,
M.probs = c("uniform", "learn.theta", "learn.theta.squared"),
M.seed = 1){
# return all models if required:
if(M >= length(instance$models)){
return(instance)
}
# derive number of model types
if(is.na(methods)){
methods <- "glmnet|xgbTree|rpartCost|svmLinearWeights2"
}
methods.num <- length(strsplit(methods, "\\|")[[1]])
# setM0 = setM after reduction to relevant methods:
setM0 <- instance$models[grepl(methods, instance$models)]
M0 <- length(setM0); stopifnot(M <= M0)
# if no start index is given: sample randomly according probs defined by M.probs
if(is.na(M.start)){
set.seed(M.seed)
M.probs <- match.arg(M.probs)
pr <- switch(M.probs,
uniform = rep(1,M0),
learn.theta = instance$learn.models$theta[setM0, "theta"],
learn.theta.squared = instance$learn.models$theta[setM0, "theta"]^2)
subs <- sample(M0, M, replace=FALSE, prob=pr)
}else{
b <- M/methods.num; stopifnot(b %% 1 == 0)
subs <- as.integer(sapply((0:(methods.num-1))*(M0/methods.num),function(x){x+M.start:(M.start+b-1)}))
}
sel <- instance$models %in% setM0[subs]
# rebuilt instance based on selected models
out <- instance[1:2]
out$models <- instance$models[sel]
out$hyperparams <- lapply(instance$hyperparams, function(h){h[rownames(h) %in% out$models, ,drop=FALSE]})
out$train.models <- subset_models(instance$train.models, sel)
out$learn.models <- subset_models(instance$learn.models, sel)
return(out)
}
subset_models <- function(models, sel){
out <- lapply(models[1:2], function(x){
if(is.list(x)){
x$pred <- x$pred[,sel]; x$thetahat <- x$thetahat[sel, ]; return(x)
}})
out$theta <- models$theta[sel, ]
return(out)
}
subset_obs <- function(rdm.eval, first.eval, n.eval, n.max=NULL){
if(n.eval > n.max){
return(1:n.max)
}
if(rdm.eval){
obs.eval <- sample(1:n.max, n.eval, replace=FALSE)
}else{
obs.eval <- first.eval:(first.eval+n.eval-1)
}
return(obs.eval)
}
unique_max <- function(x){
out <- which(x == max(x))
if(length(out)==1){
return(out)
}else{
return(sample(out, 1))
}
}
get_hp <- function(fmn, instance){
i <- which(sapply(instance$hyperparams, function(x)fmn %in% rownames(x)))
return(instance$hyperparams[[i]][fmn, , drop=FALSE])
}
# select_models -------------------------------------------------------------------------------
select_models <- function(instance,
comp,
n.eval,
analysis,
select.method = c("oracle", "optimal", "close", "best", "simplest.en"),
select.args = "",
select.limit = c("none", "sqrt", "one")){
select.method <- match.arg(select.method)
M <- length(instance$models)
## maximum number of models to select:
select.max <- switch(match.arg(select.limit),
none = Inf,
one = 1,
sqrt = round(sqrt(n.eval)))
# select.method = "oracle" --------------------------------------------------------------------
if(select.method == "oracle"){
a <- string2list(select.args)
a <- set_default(a, "type", paste0("learn.", analysis))
stopifnot(a$type %in% c("train.acc", "learn.acc", "train.cpe", "learn.cpe"))
tt <- switch(a$type,
train.acc = instance[["train.models"]]$theta$theta,
learn.acc = instance[["learn.models"]]$theta$theta,
train.cpe = pmin(instance[["train.models"]]$theta$theta1,
instance[["train.models"]]$theta$theta0),
learn.cpe = pmin(instance[["learn.models"]]$theta$theta1,
instance[["learn.models"]]$theta$theta0)
)
setS <- SEPM::select(comp,
method = "user",
sel=which(tt==max(tt)))
}
# select.method = "optimal" -------------------------------------------------------------------
if(select.method == "optimal"){
a <- string2list(select.args) %>%
set_default("fixed_prev", FALSE) %>%
set_default("rdm_eval", TRUE) %>%
set_default("method_select", "few") %>%
set_default("method_order", "param") %>%
set_default("target_order", "tstat") %>%
set_default("combine_order", "min") %>%
set_default("max_iter", 50) %>%
set_default("target_tol", 1e-4) %>%
set_default("method_ext", "basic") %>%
set_default("method_pred", "mbeta_approx")
prev_eval <- ifelse(as.logical(a$fixed_prev), instance$info['pop', 'prev'], NA)
setS <- SEPM::select_optimal(comparison = comp,
method_select = a$method_select,
method_order = a$method_order,
target_order = a$target_order,
combine_order = a$combine_order,
max_models = select.max,
n_eval = n.eval,
prev_eval = prev_eval,
n_val = instance$info['val', 'n'],
prev_val = instance$info['val', 'prev'],
rdm = as.logical(a$rdm_eval),
threshold = comp$hypothesis$threshold,
batch_size = 10,
max_iter = as.numeric(a$max_iter),
target_tol = as.numeric(a$target_tol),
method_ext = a$method_ext,
method_pred = a$method_pred,
return_simvals = FALSE,
steady_plot = FALSE,
save_plot = FALSE,
info_plot = FALSE,
ylim_plot = NULL)
}
# select.method = "close" ---------------------------------------------------------------------
if(select.method == "close"){
a <- string2list(select.args)
a <- set_default(a, "k", 1)
a <- set_default(a, "mode", "weighted")
setS <- SEPM::select_close(comp,
k= as.numeric(a$k),
mode = a$mode,
threshold = comp$hypothesis$threshold,
max_models = select.max,
regu = 1,
break_ties = TRUE)
}
# select.method = "best" ----------------------------------------------------------------------
if(select.method == "best"){
a <- string2list(select.args)
a <- set_default(a, "mode", "weighted")
setS <- SEPM::select_best(comp,
mode = a$mode,
threshold = comp$hypothesis$threshold,
max_models = select.max,
regu = 1,
break_ties = TRUE)
}
# select.method = "simplest.en" ---------------------------------------------------------------
if(select.method == "simplest.en"){
hp.en <- instance$hyperparams$glmnet
stopifnot(nrow(hp.en)>0)
mm <- instance$models %in% rownames(hp.en)
tt <- instance$train.models$val$thetahat$theta[mm]; theta.max <- max(tt)
incl <- tt > theta.max - 1*sqrt(theta.max*(1-theta.max)/instance$info["val", "n"])
pp <- hp.en$alpha * hp.en$lambda; pp[!incl] <- -Inf
setS <- SEPM::select(comp,
method = "user",
sel=which(instance$models %in% rownames(hp.en)[pp==max(pp)]))
}
return(setS)
}
set_default <- function(x, field, value){
stopifnot(is.list(x))
stopifnot(is.character(field))
if(is.null(x[[field]])){x[[field]] <- value}
return(x)
}
# derive_result_mle --------------------------------------------------------------------------
derive_result_mle_acc <- function(instance,
pred,
inf,
delta = 0,
final.choice="ci"){
result <- data.frame(load.id = instance$job$id,
opt.theta = max(instance$learn.models$theta$theta),
S = ncol(pred),
as.data.frame(inf$inference$info[3:5]))
mstar <- unique_max(inf$inference$result$lower)
final.mod.name <- as.character(inf$inference$result[mstar, "model.name"])
final.mod.index <- which(final.mod.name== instance$models)
final.mod.result <- derive_stats(final.mod.index, "final", instance)
final.mod.result$final.mod.thetabar <- inf$estimation$result$theta.bar[final.mod.name]
final.mod.result <- cbind(final.mod.result,
inf$inference$result[mstar, c("estimate", "corrected", "lower", "upper")])
colnames(final.mod.result)[(ncol(final.mod.result)-3):ncol(final.mod.result)] <-
paste0("final.acc.", c("thetahat", "thetatilde", "lower", "upper"))
return(cbind(result, final.mod.result))
}
derive_result_mle_cpe <- function(instance,
pred,
inf,
delta = 0){
result <- data.frame(load.id = instance$job$id,
opt.acc = max(instance$learn.models$theta$theta),
opt.se = max(instance$learn.models$theta$theta1),
opt.sp = max(instance$learn.models$theta$theta0),
S=ncol(pred), as.data.frame(inf$inference$info[3:5]))
mopt <- unique_max(pmin(instance$learn.models$theta$theta0,
instance$learn.models$theta$theta1 -delta))
result$opt.mod.name <- as.character(instance$models[mopt])
result$opt.sp.cpe <- instance$learn.models$theta$theta0[mopt]
result$opt.se.cpe <- instance$learn.models$theta$theta1[mopt]
mstar <- unique_max(pmin(inf$inference$result.0$lower,
inf$inference$result.1$lower - delta))
final.mod.name <- as.character(inf$inference$result.1[mstar, "model.name"])
final.mod.index <- which(final.mod.name== instance$models)
final.mod.result <- derive_stats(final.mod.index, "final", instance, details=TRUE)
final.mod.result$final.se.bar <- inf$estimation$result.1$theta.bar[final.mod.name]
final.mod.result$final.sp.bar <- inf$estimation$result.0$theta.bar[final.mod.name]
final.mod.result <- cbind(final.mod.result,
inf$inference$result.1[mstar, c("estimate", "corrected", "lower", "upper")])
colnames(final.mod.result)[(ncol(final.mod.result)-3):ncol(final.mod.result)] <-
paste0("final.se.", c("hat", "tilde", "lower", "upper"))
final.mod.result <- cbind(final.mod.result,
inf$inference$result.0[mstar, c("estimate", "corrected", "lower", "upper")])
colnames(final.mod.result)[(ncol(final.mod.result)-3):ncol(final.mod.result)] <-
paste0("final.sp.", c("hat", "tilde", "lower", "upper"))
return(cbind(result, final.mod.result))
}
derive_stats <- function(index,
name = "final.mod",
instance,
details = FALSE){
stopifnot(length(index)==1)
out <- data.frame(name = instance$models[index],
train.theta = instance$train.models$theta$theta[index],
learn.theta = instance$learn.models$theta$theta[index],
thetabar.val = instance$train.models$val$thetahat$theta[index])
if(details){
out$train.se = instance$train.models$theta$theta1[index]
out$learn.se = instance$learn.models$theta$theta1[index]
out$se.bar.val = instance$train.models$val$thetahat$theta1[index]
out$train.sp = instance$train.models$theta$theta0[index]
out$learn.sp = instance$learn.models$theta$theta0[index]
out$sp.bar.val = instance$train.models$val$thetahat$theta0[index]
}
names(out) <- paste(name, names(out), sep=".")
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.