#' sboost Validation Function
#'
#' A k-fold cross validation algorithm for sboost.
#'
#' @param features feature set data.frame.
#' @param outcomes outcomes corresponding to the features.
#' @param iterations number of boosts.
#' @param k_fold number of cross-validation subsets.
#' @param positive is the positive outcome to test for; if NULL, the first in
#' alphabetical order will be chosen
#' @param verbose If true, progress bars will be displayed in console.
#' @return An \emph{sboost_validation} S3 object containing:
#' \describe{
#' \item{\emph{performance}}{Final performance statistics for all stumps.}
#' \item{\emph{training_summary_statistics}}{Mean and standard deviations for test statistics
#' generated by \code{\link{assess}} cumulative statistics for each of the training sets.}
#' \item{\emph{testing_summary_statistics}}{Mean and standard deviations for test statistics
#' generated by \code{\link{assess}} cumulative statistics for each of the testing sets.}
#' \item{\emph{training_statistics}}{sboost \emph{sboost_assessment} cumulative statistics objects
#' used to generate training_statistics.}
#' \item{\emph{testing_statistics}}{sboost \emph{sboost_assessment} cumulative statistics objects
#' used to generate testing_statistics.}
#' \item{\emph{classifier_list}}{sboost \emph{sboost_classifier} objects
#' created from training sets.}
#' \item{\emph{outcomes}}{Shows which outcome was considered as positive and which negative.}
#' \item{\emph{k_fold}}{number of testing and training sets used in the validation.}
#' \item{\emph{call}}{Shows the parameters that were used for validation.}
#' }
#' @seealso \code{\link{sboost}} documentation.
#' @examples
#' # malware
#' validate(malware[-1], malware[1], iterations = 5, k_fold = 3, positive = 1)
#'
#' # mushrooms
#' validate(mushrooms[-1], mushrooms[1], iterations = 5, k_fold = 3, positive = "p")
#' @export
validate <- function(features, outcomes, iterations = 1, k_fold = 6, positive = NULL, verbose = FALSE) {
# PREPARE INPUT
# --------------------------------------------------------------------------------
# test and prepare features and outcomes
if (is.data.frame(outcomes)) outcomes <- as.vector(outcomes[[1]])
processed_features <- process_feature_input(features)
categorical <- find_categorical(features)
otcm_def <- check_positive_value(outcomes, positive)
processed_outcomes <- process_outcome_input(outcomes, features, otcm_def)
# create variables
raw_classifier_list <- list();
classifier_list <- list();
training_statistics <- list();
testing_statistics <- list();
rows = nrow(features);
# MAIN VALIDATION LOOP
# --------------------------------------------------------------------------------
for (i in 1:k_fold) {
if (verbose) print(paste0("Training classifier ", i, " of ", k_fold, "..."))
training <- -(((i - 1) / k_fold) * rows):-((i / k_fold) * rows)
testing <- ((((i - 1) / k_fold) * rows) + 1):((i / k_fold) * rows)
# create classifier
raw_classifier_list[[i]] <- make_classifier(processed_features[training, ], processed_outcomes[training], categorical, iterations, verbose)
classifier_list[[i]] <- process_classifier_output(raw_classifier_list[[i]], features, outcomes, otcm_def, match.call(), training)
# test classifier
training_statistics[[i]] <- get_cumulative_statistics(classifier_list[[i]], raw_classifier_list[[i]], processed_features[training, ], processed_outcomes[training])
testing_statistics[[i]] <- get_cumulative_statistics(classifier_list[[i]], raw_classifier_list[[i]], processed_features[testing, ], processed_outcomes[testing])
}
validation <- process_validation_output(training_statistics, testing_statistics, classifier_list, k_fold, match.call())
return(validation)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.