R/null_gini.R

Defines functions null_gini

Documented in null_gini

#' null_gini
#'
#' @description
#' Return the curve for a gini curve for threshold in the prediction. Need to plot gini curve
#'
#' Note: Predictions should be annualised (independent of exposure)
#'
#' @param actual Array[Numeric] - Values we are aiming to predict.
#' @param predicted Array[Numeric] - Values that we have predicted.
#' @param weight Optional: Array[Numeric] - Weighting of predictions. If NUll even weighting is used
#' @param na.rm logical. Should missing values be removed?
#'
#' @return dataframe with columns
#' prediction - value of predictions (sorted)
#' cum_act - cumulative fraction of actuals
#' cum_pop - cumulative fraction of weight
#' @export
#'
#' @examples
#'
#' actual <- rnorm(100, mean=100, sd=10)
#' weight <- rep(1,100)
#' predicted <- actual + rnorm(100, mean=0, sd=1)
#'
#' null_gini(actual, predicted, weight)
#'
null_gini <- function(actual, predicted, weight=NULL, na.rm=FALSE){

  # Data prep and checking --------------------------------------------------
  checkmate::assert_numeric(actual, lower=0)
  checkmate::assert_numeric(predicted, len=length(actual))


  # Use no weighting if none given
  if (is.null(weight)){
    weight <- rep(1, length(actual))
  }else{
    checkmate::assert_numeric(weight, len=length(actual), lower=0)
  }

  # Combine actuals and predictions
  data <- data.frame(actual=actual, weight=weight, predicted=predicted)

  if (na.rm==TRUE){
    data <- data %>% dplyr::filter(!is.na(actual) & !is.na(weight) & !is.na(predicted))
  }


  # Calculate gini curve
  data_gini <- data %>%
    dplyr::group_by(predicted) %>%
    dplyr::summarise_all(sum) %>%
    dplyr::arrange(desc(predicted)) %>%
    dplyr::transmute(predicted=predicted,
              cum_act=cumsum(actual * weight)/sum(actual * weight),
              cum_pop=cumsum(weight)/sum(weight))

  # Add leading row of 0
  data_gini <- rbind(data.frame(predicted=NA, cum_act=0, cum_pop=0), data_gini)

  return(data_gini)

}
gloverd2/admr documentation built on Dec. 2, 2020, 11:16 p.m.