#' @title imbDis - Imbalanced Discrimination
#'
#' @description
#' This function creates an object of type \code{imbDis}, an S3 Class.
#' imbDis objects can be passed into the associated methods as arguments.
#' Examples of associated methods are \code{auc}, \code{brier} and \code{logLoss}.
#'
#' @param labels Vector of ground-truth labels.
#' @param pred Vector of predicted probabilities.
#' @param case A value present in the \code{labels} argument, that denotes the case class label.
#' @param bins Vector of class imbalance frequencies. Values must be larger than 0 and smaller than 1. Default is seq(0.05, 0.5, 0.05).
#'
#' @return A simMetric object - S3 class that contains the input parameters, standardized labels,
#' and sample size.
#' @export
#' @examples
#' # using the mtcars dataset
#' LR = glm(vs ~ mpg + hp + wt, data = mtcars, family = "binomial")
#' preds = predict(LR, mtcars, type = "response") # equiv. to LR$fitted.values
#'
#' # define the imbDis object
#' imbD = imbDis(mtcars$vs, preds, 1, seq(0.1, 0.9, 0.1))
#'
#' # metric results
#' auc(imbD)
#' brier(imbD)
#' logLoss(imbD)
#'
imbDis = function(labels, pred, case, bins = seq(0.05,0.5,0.05)){
# check values
.param_check(labels, pred, case, bins)
# initialize the class via list object using provided arguments
obj = list(labels = labels, pred = pred, case = case, bins = bins)
attr(obj, "class") = "imbDis" # sets the class
# define control and add to list
obj$control = unique(labels[labels != case])
# define standardized labels and add to list
obj$labs_01 = .standardizeLabels(labels, case)
# define sample size and bin case sizes
obj$sample.size = .calc_samplesize(obj$labs_01, bins)
obj$bin.caseSizes = round(bins*obj$sample.size)
return(obj)
}
# @title Parameter Check
#
# @description
# Helper function to ensure parameters are valid.
#
# @param labels Vector of ground-truth labels
# @param pred Vector of predicted probabilities
# @param case An integer or string that is the case class. Must be a value in labels.
# @param bins Vector of different class imbalance frequencies. Default is seq(0.05, 0.5, 0.05)
.param_check = function(labels, pred, case, bins){
# function to check if arguments of a imbDis object are valid
if(length(unique(labels)) != 2)
stop('\'labels\' may only have 2 distinct values.')
if(!is.numeric(pred))
stop('\'pred\' contains non-numeric values.')
if(any(pred < 0 | pred > 1))
stop('At least one element of \'pred\' is less than 0 or greater than 1.')
if(length(labels) != length(pred))
stop('Length of \'labels\' does not match length of \'pred\'.')
if(!case %in% labels)
stop('Value of \'case\' is not present in \'labels\'.')
if(!is.numeric(bins))
stop('\'bins\' is non-numeric.')
if(any(bins < 0 | bins > 1))
stop('At least one element of \'bins\' is less than 0 or greater than 1.')
if(any(bins == 0 | bins == 1))
stop('Elements of \'bin\' cannot be exactly 0 or 1.')
}
# @title Standardized Labels
#
# @description
# Helper function to convert labels vector to 0-1 labels. 1 corresponds to the case label.
#
# @param labels Vector of ground-truth labels.
# @param case An integer or string that is the case class. Must be a value in labels.
# @return Standardized labels vector.
.standardizeLabels = function(labels, case){
# converts a binary vector of labels to a vector
# where all case values are 1 and controls are 0
labels_01 = rep(0, length(labels))
labels_01[labels == case] = 1
return(labels_01)
}
# @title Standardised Labels
#
# @description
# Helper function to convert labels vector to 0-1 labels. 1 corresponds to the case label.
#
# @param labels Vector of ground-truth labels.
# @param case An integer or string that is the case class. Must be a value in labels.
# @return Standardised labels vector.
.standardiseLabels = .standardizeLabels # alternate spelling
# @title Calculates the sample size
#
# @description
# Helper function to convert labels vector to 0-1 labels. 1 corresponds to the case label.
#
# @param labels Vector of ground-truth labels.
# @param case An integer or string that is the case class. Must be a value in labels.
# @return Sample size integer.
.calc_samplesize = function(labels_01, bins){
## total cases and controls in labels
tot_cases = sum(labels_01)
tot_controls = sum(labels_01 == 0)
## proportion of cases and controls in labels
prop_cases = mean(labels_01)
prop_control = 1 - prop_cases
## sample size calculations
samp_size = round(ifelse(prop_cases <= max(bins), # if the prop(cases) < max(bins)
tot_cases/max(bins), # use the cases procedure
tot_controls/max(1-bins))) # else use the controls procedure
return(samp_size)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.