#' Summary Methods for \code{beset} Objects
#'
#' These functions are all methods for class \code{beset} objects.
#'
#' @param object An object for which a summary is desired.
#'
#' @param n_pred (Optional) \code{integer} number of predictors that the best
#' model should contain. If specified, all other arguments are ignored.
#'
#' @param metric \code{Character} string giving prediction metric on which to
#' base model selection. Can be one of \code{"auc"} for area under the (ROC)
#' curve (only available for binomial family), \code{"mae"} for mean absolute
#' error (not available for binomial family), \code{"mae"} for mean absolute
#' error, \code{"mce"} for mean cross entropy, or \code{"mse"} for mean squared
#' error. Default is \code{"auto"} which plots MSE for Gaussian-family models
#' and MCE for all other families.
#'
#' @param oneSE \code{Logical} indicating whether or not to use the "one
#' standard error" rule. If \code{TRUE} (default) the simplest model within one
#' standard error of the optimal model is returned. If \code{FALSE} the model
#' with the optimal cross-validation performance is returned.
#'
#' @param robust \code{Logical} indicating whether or not to produce summary
#' output for only robust predictors, defined here as having non-zero beta
#' coefficients for every repetition of the cross-validation procedure.
#'
#' @param lambda Value of the elastic-net penalty parameter lambda at which
#' predictions are required. If omitted, cross-validated optimal or
#' 1SE-from-optimal lambda will be used, depending on \code{oneSE} argument.
#'
#' @param alpha Value of the elastic-net mixing parameter alpha at which
#' predictions are required. Must be a value of alpha that was tried during
#' training. If omitted, cross-validated optimal or 1SE-from-optimal alpha will
#' be used, depending on \code{oneSE} argument.
#'
#' @param ... Additional arguments passed to model summary methods.
#'
#' @import purrr
#' @export
summary.beset <- function(object, n_pred = NULL, alpha = NULL, lambda = NULL,
metric = "auto", oneSE = TRUE, ...){
if(inherits(object, "rf")){
return(summary.beset_rf(object, ...))
}
metric <- tryCatch(
match.arg(metric, c("auto", "aic", "auc", "mae", "mce", "mse", "rsq")),
error = function(c){
c$message <- gsub("arg", "metric", c$message)
c$call <- NULL
stop(c)
}
)
tryCatch(
if(
(metric == "auc" && object$family != "binomial") ||
(metric == "mae" && object$family == "binomial") ||
(metric == "aic" && inherits(object, "elnet"))
) error = function(c){
c$message <- paste(metric, "not available")
c$call <- NULL
stop(c)
}
)
if(metric == "auto"){
metric <- if(object$family == "gaussian") "mse" else "mce"
}
if(inherits(object, "elnet")){
best_model <- get_best(
object, alpha = alpha, lambda = lambda, metric = metric[1], oneSE = oneSE
)
best_idx <- with(
object$stats$fit,
which(alpha == best_model$alpha & lambda == best_model$best_lambda)
)
r2 <- object$stats$fit$rsq[best_idx]
r2_test <- object$stats$test$rsq[best_idx]
r2_cv <- purrr::flatten(object$stats$cv$rsq[best_idx])
structure(list(best = best_model, r2 = r2, r2_cv = r2_cv, r2_test = r2_test),
class = "summary_beset_elnet")
} else {
best_model <- get_best(
object, n_pred = n_pred, metric = metric, oneSE = oneSE
)
loglik <- stats::logLik(best_model)
best <- summary(best_model, ...)
best$loglik <- loglik
best$loglik_df <- attr(loglik, "df")
best_form <- best_model$formula
r2 <- object$stats$fit$rsq[object$stats$fit$form == best_form]
r2_test <- if(!is.null(object$stats$test))
object$stats$test$rsq[object$stats$test$form == best_form] else NULL
r2_cv <- purrr::flatten(
object$stats$cv$rsq[object$stats$cv$form == best_form])
n_pred <- object$stats$fit$n_pred[object$stats$fit$form == best_form]
near_equals <- object$stats$fit[object$stats$fit$n_pred == n_pred,]
best_rsq <- max(near_equals$rsq, na.rm = T)
near_best <- near_equals$form[near_equals$rsq > best_rsq - .01]
near_best <- near_best[near_best != best_form]
structure(list(best = best, best_form = best_form, near_best = near_best,
r2 = r2, r2_cv = r2_cv, r2_test = r2_test),
class = "summary_beset_glm")
}
}
#' @export
summary.nested <- function(object, metric = "auto", oneSE = TRUE, ...){
metric <- tryCatch(
match.arg(metric, c("auto", "aic", "auc", "mae", "mce", "mse", "rsq")),
error = function(c){
c$message <- gsub("arg", "metric", c$message)
c$call <- NULL
stop(c)
}
)
tryCatch(
if(
(metric == "auc" && object$family != "binomial") ||
(metric == "mae" && object$family == "binomial") ||
(metric == "aic" && inherits(object, "elnet"))
) error = function(c){
c$message <- paste(metric, "not available")
c$call <- NULL
stop(c)
}
)
if(metric == "auto"){
metric <- if(object$family == "gaussian") "mse" else "mce"
}
if(inherits(object, "elnet"))
summary.nested_elnet(object, metric = metric, oneSE = oneSE, ...)
else
summary.nested_beset(object, metric = metric, oneSE = oneSE, ...)
}
summary.nested_elnet <- function(object, metric, oneSE, robust = FALSE,...){
family <- object$family
n_folds <- object$n_folds
n_reps <- object$n_reps
repeats <- paste("Rep", 1:n_reps, "$", sep = "")
rep_idx <- map(repeats, ~ grepl(.x, names(object$beset)))
best_models <- map(
object$beset,
~ get_best(.x, metric = metric, oneSE = oneSE)
)
alphas <- map_dbl(best_models, "alpha")
lambdas <- map_dbl(best_models, "best_lambda")
betas <- map2(best_models, lambdas, ~ as.vector(coef(.x, s = .y)))
stnd <- map(best_models, ~ .x$x_sd / .x$y_sd)
stnd_betas <- map2(betas, stnd, ~ ifelse(.x[-1] == 0, 0, .x[-1] * .y))
betas <- betas %>% transpose %>% simplify_all
stnd_betas <- stnd_betas %>% transpose %>% simplify_all
names(betas) <- best_models[[1]] %>% coef %>% row.names
names(stnd_betas) <- names(betas)[-1]
betas <- list(
mean = map(betas, mean),
btwn_fold_se = map(betas, ~ sd(.x) / sqrt(n_folds)),
btwn_rep_range = map(betas, function(x)
map(rep_idx, ~ mean(x[.x])) %>% range)
) %>% transpose
if(robust && n_reps > 1){
not_robust <- map_lgl(betas, ~ 0 %in% .x$btwn_rep_range)
betas <- betas[!not_robust]
}
stnd_betas <- list(
mean = map(stnd_betas, mean),
btwn_fold_se = map(stnd_betas, ~ sd(.x) / sqrt(n_folds)),
btwn_rep_range = map(stnd_betas, function(x)
map(rep_idx, ~ mean(x[.x])) %>% range)
) %>% transpose
if(robust && n_reps > 1){
not_robust <- map_lgl(stnd_betas, ~ 0 %in% .x$btwn_rep_range)
stnd_betas <- stnd_betas[!not_robust]
}
best_idx <- pmap_int(
list(s = map(object$beset, ~.x$stats$test), a = alphas, l = lambdas),
function(s, a, l) with(s, which(alpha == a & lambda == l)))
fit_stats <- map2_df(object$beset, best_idx, ~.x$stats$fit[.y,])
fit_stats <- list(
mean = map(fit_stats[3:6], ~mean(., na.rm = TRUE)),
btwn_fold_se = map(fit_stats[3:6], ~ sd(.x, na.rm = TRUE) / sqrt(n_folds)),
btwn_rep_range = map(fit_stats[, 3:6], function(x)
map(rep_idx, ~ mean(x[.x])) %>% range)
) %>% transpose
cv_stats <- map2_df(object$beset, best_idx, ~.x$stats$cv[.y,])
cv_stats <- list(
mean = map(cv_stats[3:6], ~ mean(map_dbl(., "mean"), na.rm = TRUE)),
btwn_fold_se = map(cv_stats[3:6], ~ mean(map_dbl(., "btwn_fold_se"),
na.rm = TRUE)),
btwn_rep_range = map(
cv_stats[, 3:6], function(x)
map(rep_idx, ~ mean(map_dbl(x, "mean")[.x])) %>% range)
) %>% transpose
validation_metrics <- validate(object, metric = metric, oneSE = oneSE)
test_stats <- validation_metrics$stats
best_lambda <- list(
mean = map_dbl(rep_idx, ~ mean(lambdas[.x])) %>% mean,
btwn_fold_se = sd(lambdas) / sqrt(n_folds),
btwn_rep_range = map_dbl(rep_idx, ~ mean(lambdas[.x])) %>% range
)
best_alpha <- list(
mean = map_dbl(rep_idx, ~ mean(alphas[.x])) %>% mean,
btwn_fold_se = sd(alphas) / sqrt(n_folds),
btwn_rep_range = map_dbl(rep_idx, ~ mean(alphas[.x])) %>% range
)
structure(list(
stats = list(fit = fit_stats, cv = cv_stats, test = test_stats),
parameters = c(list(alpha = best_alpha, lambda = best_lambda),
validation_metrics$parameters),
coefs = list(unstandardized = betas, standardized = stnd_betas)
), class = "summary_nested_elnet"
)
}
summary.nested_beset <- function(object, metric = "auto", oneSE = TRUE, ...){
family <- object$family
n_folds <- object$n_folds
n_reps <- object$n_reps
repeats <- paste("Rep", 1:n_reps, "$", sep = "")
rep_idx <- purrr::map(repeats, ~ grepl(.x, names(object$fold_assignments)))
best_models <- purrr::map(
object$beset, ~ get_best(.x, metric = metric, oneSE = oneSE)
)
if(family != "gaussian"){
# Menard's standardization of y for logistic models
var_logit_yhat <- map_dbl(best_models, ~ var(predict(.x)))
rsq <- map_dbl(best_models, ~ r2d(.x)$R2fit)
y_stnd <- var_logit_yhat / rsq
} else {
y_stnd <- map_dbl(object$beset, ~ sd(.x$parameters$y))
}
betas <- matrix(0, ncol = ncol(object$beset[[1]]$parameters$x),
nrow = length(object$beset))
colnames(betas) <- colnames(object$beset[[1]]$parameters$x)
for(i in 1:nrow(betas)){
j <- coef(best_models[[i]])
betas[i, names(j)] <- j
}
betas <- as_tibble(betas)
stnd <- purrr::map2(
object$beset, y_stnd, ~ apply(.x$parameters$x, 2, sd) / .y
) %>% transpose %>% simplify_all
stnd_betas <- map2(betas, stnd, ~ .x * .y) %>% as_tibble
betas <- list(
mean = map(betas, mean),
btwn_fold_se = map(betas, ~ sd(.x) / sqrt(n_folds)),
btwn_rep_range = map(betas, function(x)
map(rep_idx, ~ mean(x[.x])) %>% range)
) %>% transpose
stnd_betas <- list(
mean = map(stnd_betas, mean),
btwn_fold_se = map(stnd_betas, ~ sd(.x) / sqrt(n_folds)),
btwn_rep_range = map(stnd_betas, function(x)
map(rep_idx, ~ mean(x[.x])) %>% range)
) %>% transpose
best_form <- map_chr(best_models, "formula")
best_idx <- match(best_form, object$beset[[1]]$stats$fit$form)
fit_stats <- map2_df(object$beset, best_form,
~ filter(.x$stats$fit, form == .y))
stat_names <- intersect(names(fit_stats),
c("auc", "mae", "mce", "mse", "rsq"))
fit_stats <- list(
mean = map(fit_stats[stat_names], ~mean(., na.rm = TRUE)),
btwn_fold_se = map(fit_stats[stat_names],
~ sd(.x, na.rm = TRUE) / sqrt(n_folds)),
btwn_rep_range = map(fit_stats[stat_names], function(x)
map(rep_idx, ~ mean(x[.x])) %>% range)
) %>% transpose
cv_stats <- map2_df(object$beset, best_form,
~ filter(.x$stats$cv, form == .y))
cv_stats <- list(
mean = map(cv_stats[stat_names], ~ mean(map_dbl(., "mean"), na.rm = TRUE)),
btwn_fold_se = map(cv_stats[stat_names], ~ mean(map_dbl(., "btwn_fold_se"),
na.rm = TRUE)),
btwn_rep_range = map(
cv_stats[, stat_names], function(x)
map(rep_idx, ~ mean(map_dbl(x, "mean")[.x])) %>% range)
) %>% transpose
validation_metrics <- validate(object, metric = metric, oneSE = oneSE)
test_stats <- validation_metrics$stats
form <- table(best_form)[order(table(best_form), decreasing = TRUE)]
n_pred <- map2_int(object$beset, best_idx, ~.x$stats$fit$n_pred[.y])
n_pred <- list(
median = median(n_pred),
btwn_fold_iqr = IQR(n_pred),
btwn_rep_range = map(rep_idx, ~ median(n_pred[.x])) %>% range
)
structure(
list(
stats = list(fit = fit_stats, cv = cv_stats, test = test_stats),
parameters = c(list(n_pred = n_pred, form = form),
validation_metrics$parameters),
coefs = list(unstandardized = betas, standardized = stnd_betas)
), class = "summary_nested_beset"
)
}
#' @export
summary.beset_rf <- function(object, robust = FALSE, ...){
type <- object$forests[[1]]$type
ntree <- object$forests[[1]]$ntree
mtry <- object$forests[[1]]$mtry
n_folds <- object$stats$parameters$n_folds
n_reps <- object$stats$parameters$n_reps
if(!is.null(n_reps)){
repeats <- paste("Rep", 1:n_reps, "$", sep = "")
rep_idx <- purrr::map(repeats, ~ grepl(.x, names(object$forests)))
}
stats_oob <- if (type == "classification") {
map_dbl(object$forests, ~.x$err.rate[ntree, "OOB"])
} else {
map_dbl(object$forests, ~ .x$rsq[ntree])
}
stats_test <- if (type == "classification") {
map_dbl(object$forests, ~.x$test$err.rate[ntree, "Test"])
} else {
map_dbl(object$forests, ~ .x$test$rsq[ntree])
}
oob_stats <- stats_oob
cv_stats <- stats_test
if(!is.null(n_reps)){
oob_stats <- list(
mean = mean(stats_oob),
btwn_fold_se = sd(stats_oob) / sqrt(n_folds),
btwn_rep_range = map(rep_idx, ~ mean(stats_oob[.x])) %>% range
)
cv_stats <- list(
mean = mean(stats_test),
btwn_fold_se = sd(stats_test) / sqrt(n_folds),
btwn_rep_range = map(rep_idx, ~ mean(stats_test[.x])) %>% range)
}
test_stats <- object$stats
if(!inherits(test_stats, "prediction_metrics")){
test_stats <- test_stats$stats
}
varimp <- importance(object)
if(robust && n_reps > 1){
varimp <- filter(varimp, min_import > 0)
}
structure(
list(
stats = list(oob = oob_stats, cv = cv_stats, test = test_stats),
parameters = list(
type = type, ntree = ntree, mtry = mtry,
n_folds = n_folds, n_reps = n_reps,
family = if(type == "classification") "binomial" else "gaussian"
),
vars = varimp
), class = "summary_beset_rf"
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.