SEPM.SIM: Statistical Evaluation of Prediction Models - Simulation Studies

Documented in study_mle

#' Select models based on validation performance and conduct an evaluation study
#'
#' This function reads in data instances produces via sample_mle() and emulates the process of
#' conducting an evaluation study for one or multiple selected prediction models.
#'
#' @param instance simulation instance generated by \code{sample_mle}
#' @param methods character, potentially subset available prediction models by method (=learning algorithm)
#' e.g. recover elastic net models by specifying methods="glmnet" (caret train.method),
#' no effect if methods=NA (default)
#' @param M integer, number of models to subsample from available models (restricted via methods argument),
#' needs to be less or euqal than number of available models (200 per default)
#' @param M.start integer, starting index for subsetting
#' @param M.probs character, "uniform" for random subset, "learn.theta" for P(selected)=learn.theta(=true model performance),
#' "learn.theta.neg" for P(selected)=1-learn.theta
#' @param M.seed integer, seed for random subsetting (i.e. if M.probs != "uniform")
#' @param n.eval integer, test (evaluation) sample size
#' @param first.eval integer, index of first evaluation observation (from all available)
#' @param rdm.eval logical, choose test samples randomly? (default: FALSE)
#'
#' @param analysis character, either "acc" or "cpe"
#'
#' @param estimate.method character, estimation method in SEPM package default ("beta.approx")
#' @param estimate.args character, specify additional estimation argument as character of form
#' "arg1=value1_arg2=value2_..."
#'
#' @param select.method character, selection method based on validation ranking, e.g. "rank" (default) or "se"
#' @param select.args character, further arguments defining selection rule e.g. "r=1" for
#' select.method="rank" to choose only best validation models or "c=1" for select.method="se"
#' (which defines the 'within1SE# rule)
#' @param select.limit integer, maximum number of models to evaluate
#'
#' @param infer.method character, defines the statistical test, e.g. "maxT", "Bonferroni" or "naive"
#' @param alternative character, either "greater" (default), "lower" or "two.sided"
#' @param alpha numeric, significance level (default: 0.025)
#' @param transform character, specifies transformation of test statistics, passed to \code{SEPM::infer}
#' @param data ignored (required for batchtools compatibility)
#' @param job ignored (required for batchtools compatibility)
#' @param delta numeric (default: 0)
#' @param shift numeric (default: 0.05)
#'
#' @return Returns a list which contains all relevant characteristics of the evaluation study.
#'
#' @export
study_mle <- function(instance,

                      methods = NA,
                      M = 200,
                      M.start = NA,
                      M.probs = c("uniform", "learn"),
                      M.seed = 1,

                      n.eval = 200,
                      first.eval = 1,
                      rdm.eval = FALSE,

                      analysis = c("acc", "cpe"),
                      delta = 0,
                      shift = 0.05,

                      select.method = c("close", "best", "optimal", "oracle", "simplest.en"),
                      select.limit = c("none", "sqrt", "one"),
                      select.args = "",

                      estimate.method = "beta.approx",
                      estimate.args = "",

                      infer.method = "maxT",
                      alternative = "greater",
                      alpha = 0.025,
                      transform = "none",

                      data = NULL,
                      job = NULL){


# Preparation ---------------------------------------------------------------------------------
  analysis <- match.arg(analysis)
  select.method <- match.arg(select.method)
  select.limit <- match.arg(select.limit)
  M.probs <- match.arg(M.probs)

  ## setup target and threshold (assuming we want to show a performance of theta_opt-epsilon)
  target <- switch(
    analysis,
    acc = "accuracy",
    cpe = "sensspec"
  )

  threshold <- switch(
    analysis,
    acc = max(instance$learn.models$theta$theta),
    cpe = rep(max(pmin(instance$learn.models$theta$theta1 - delta,
                       instance$learn.models$theta$theta0)), 2)
  ) - shift

  ## select set of 'all' (trained) models for simulation instance:
  instance <- select_setM(instance=instance, methods=methods,
                          M=M, M.start=M.start, M.probs=M.probs, M.seed=M.seed)

  ## get eval observations:
  obs.eval <- subset_obs(rdm.eval, first.eval, n.eval, n.max=instance$info["eval", "n"])

  ## Hypothesis definition:
  hyp <- SEPM::define_hypothesis(target=target, threshold=threshold,
                                 alternative=alternative, alpha=alpha)

# Learning phase ------------------------------------------------------------------------------
  ## comparison of predictions and labels based on validation data:
  comp_val <- SEPM::compare(hypothesis = hyp,
                            predictions = as.matrix(instance$train.models$val$pred),
                            labels = instance$train.models$val$labels)

  ## select models for evaluation:
  setS <- select_models(instance,
                        comp = comp_val,
                        n.eval = n.eval,
                        analysis = analysis,
                        select.method = select.method,
                        select.args = select.args,
                        select.limit = select.limit)

# Evaluation phase ----------------------------------------------------------------------------
  pred <- as.matrix(instance$learn.models$eval$pred[obs.eval, setS, drop=FALSE])

  comp_eval <- SEPM::compare(hypothesis = hyp,
                             predictions = pred,
                             labels = instance$learn.models$eval$label[obs.eval])

  ## statistical inference:
  inf <- comp_eval %>%
    SEPM::estimate(method = estimate.method, args = string2list(estimate.args)) %>%
    SEPM::infer(method = infer.method, transform = transform)


# Compute output ------------------------------------------------------------------------------
  result <- do.call(paste0("derive_result_mle_", analysis),
                    args = list(instance = instance,
                                pred = pred,
                                inf = inf,
                                delta = 0))

  if(is.null(job)){job <- list(id=NA, seed=NA, pars=NULL)}

  return(list(job = job,
              info = instance$info,
              selection = inf$inference[[1]]$model.name,
              final.hp = get_hp(fmn=as.character(result$final.name), instance),
              result = result))
}

maxwestphal/SEPM.SIM documentation built on April 11, 2024, 4:06 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

maxwestphal/SEPM.SIM
Statistical Evaluation of Prediction Models - Simulation Studies

R/study_mle.R
In maxwestphal/SEPM.SIM: Statistical Evaluation of Prediction Models - Simulation Studies

Defines functions study_mle

Documented in study_mle

R Package Documentation

Browse R Packages

We want your feedback!

maxwestphal/SEPM.SIM Statistical Evaluation of Prediction Models - Simulation Studies

R/study_mle.R In maxwestphal/SEPM.SIM: Statistical Evaluation of Prediction Models - Simulation Studies

Defines functions study_mle

Documented in study_mle

R Package Documentation

Browse R Packages

We want your feedback!

maxwestphal/SEPM.SIM
Statistical Evaluation of Prediction Models - Simulation Studies

R/study_mle.R
In maxwestphal/SEPM.SIM: Statistical Evaluation of Prediction Models - Simulation Studies