Nothing
#' sboost Assessment Function
#'
#' Assesses how well an sboost classifier classifies the data.
#'
#' @param object \emph{sboost_classifier} S3 object output from sboost.
#' @param features feature set data.frame.
#' @param outcomes outcomes corresponding to the features.
#' @param include_scores if true feature_scores are included in output.
#' @return An \emph{sboost_assessment} S3 object containing:
#' \describe{
#' \item{\emph{performance}}{Last row of cumulative statistics (i.e. when all stumps are included in assessment).}
#' \item{\emph{cumulative_statistics}}{\emph{stump} - the index of the last decision stump added to the assessment.\cr
#' \emph{true_positive} - number of true positive predictions.\cr
#' \emph{false_negative} - number of false negative predictions.\cr
#' \emph{true_negative} - number of true negative predictions.\cr
#' \emph{false_positive} - number of false positive predictions.\cr
#' \emph{prevalence} - true positive / total.\cr
#' \emph{accuracy} - correct predictions / total.\cr
#' \emph{sensitivity} - correct predicted positive / true positive.\cr
#' \emph{specificity} - correct predicted negative / true negative.\cr
#' \emph{ppv} - correct predicted positive / predicted positive.\cr
#' \emph{npv} - correct predicted negative / predicted negative.\cr
#' \emph{f1} - harmonic mean of sensitivity and ppv.}
#' \item{\emph{feature_scores}}{If include_scores is TRUE, for each feature in the classifier lists scores for each row in the feature set.}
#' \item{\emph{classifier}}{sboost \emph{sboost_classifier} object used for assessment.}
#' \item{\emph{outcomes}}{Shows which outcome was considered as positive and which negative.}
#' \item{\emph{call}}{Shows the parameters that were used for assessment.}
#' }
#' @seealso \code{\link{sboost}} documentation.
#' @examples
#' # malware
#' malware_classifier <- sboost(malware[-1], malware[1], iterations = 5, positive = 1)
#' assess(malware_classifier, malware[-1], malware[1])
#'
#' # mushrooms
#' mushroom_classifier <- sboost(mushrooms[-1], mushrooms[1], iterations = 5, positive = "p")
#' assess(mushroom_classifier, mushrooms[-1], mushrooms[1])
#' @export
assess <- function(object, features, outcomes, include_scores = FALSE) {
# PREPARE INPUT
# --------------------------------------------------------------------------------
if (is.data.frame(outcomes)) outcomes <- as.vector(outcomes[[1]])
processed_features <- process_feature_input(features)
processed_outcomes <- process_outcome_input(outcomes, features, object$outcomes)
processed_classifier <- process_classifier_input(object, features)
# ASSESS CLASSIFIER
# --------------------------------------------------------------------------------
cumulative_statistics = get_cumulative_statistics(object, processed_classifier, processed_features, processed_outcomes)
if (include_scores) {
feature_scores <- score_classifier_features(object, processed_classifier, processed_features)
} else {
feature_scores <- NULL
}
classifier_assessment <- process_assessment_output(cumulative_statistics, feature_scores, object, match.call())
return(classifier_assessment)
}
# calls cpp-code for contingency table
# classifier, features, and outcomes must already be processed
get_cumulative_statistics <- function(object, classifier, features, outcomes) {
# CALL C++ CODE TO GET CONTINGENCY TABLE
# --------------------------------------------------------------------------------
statistics <- get_contingency_cpp(features, outcomes, classifier)
colnames(statistics) <- c("true_positive", "false_negative", "true_negative", "false_positive")
# CALCULATE STATISTICS
# --------------------------------------------------------------------------------
statistics <- data.frame(statistics)
statistics <- dplyr::mutate(statistics, next_stump = object$classifier$stump)
statistics <- dplyr::select(statistics, .data$next_stump, .data$true_positive, .data$false_negative, .data$true_negative, .data$false_positive)
statistics <- dplyr::mutate(statistics,
prevalence = (.data$true_positive + .data$false_negative) / (.data$true_positive + .data$true_negative + .data$false_positive + .data$false_negative),
accuracy = (.data$true_positive + .data$true_negative) / (.data$true_positive + .data$true_negative + .data$false_positive + .data$false_negative),
sensitivity = .data$true_positive / (.data$true_positive + .data$false_negative),
specificity = .data$true_negative / (.data$true_negative + .data$false_positive),
ppv = .data$true_positive / (.data$true_positive + .data$false_positive),
npv = .data$true_negative / (.data$true_negative + .data$false_negative),
f1 = (2 * .data$ppv * .data$sensitivity) / (.data$ppv + .data$sensitivity))
return(statistics)
}
# calls cpp-code for classifier feature scores
# object must be sboost_classifier, classifier and features must already be processed
score_classifier_features <- function(object, classifier, features) {
# CALL C++ CODE TO SCORE STUMPS
# --------------------------------------------------------------------------------
cpp_scores <- score_classifier_features_cpp(classifier, features)
# ADD TOGETHER SCORES OF STUMPS ON THE SAME FEATURES
# --------------------------------------------------------------------------------
scores <- data.frame(matrix(rep(as.numeric(NA), nrow(features) * length(unique(object$classifier$feature))), nrow = nrow(features)))
feature_names <- c()
for (i in 1:ncol(cpp_scores)) {
if (object$classifier$feature[[i]] %in% feature_names) {
# If a stump with this feature has already been added, add this stump score to the score for that feature
scores[which(feature_names == object$classifier$feature[[i]])] <- scores[which(feature_names == object$classifier$feature[[i]])] + cpp_scores[,i]
} else {
# If a stump with this feature has not already been added, add stump scores to next empty column
feature_names <- c(feature_names, object$classifier$feature[[i]])
scores[length(feature_names)] <- cpp_scores[,i]
}
}
names(scores) <- feature_names
return(scores)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.