R/psi.R

Defines functions psi_table psi_vector

Documented in psi_table psi_vector

#' Function to calculate the vector psi given 2 vector of counts
#'
#' @param old A vector of original count distribution
#' @param new A vector with the new count distribution
#' @return A vector with psi index (value) a label and the table for with counts, percents, woe.
#' @examples
#'
#' o <- sample(letters[1:5], size = 9000, prob = 1:5, replace = TRUE)
#' n <- sample(letters[1:5], size = 1000, prob = 1:5 + 4, replace = TRUE)
#'
#' table(o)
#' table(n)
#'
#' psi_vector(
#'   table(o),
#'   table(n)
#' )
#'
#' p <- psi_vector(
#'   table(o),
#'   table(n)
#' )
#'
#' p
#'
#' sum(p)
#'
#' psi_label(sum(p))
#'
#' @export
psi_vector <- function(old, new) {
  new <- new / sum(new)

  old <- old / sum(old)

  out <- (new - old) * log(new / old)

  out
}

#' Function to calculate the vector table given 2 vector of counts
#'
#' @param old A vector of original distribution
#' @param new A vector with the new distribution
#' @return A table with psi index (value) a label and the table for with counts, percents, woe.
#' @examples
#'
#' o <- factor(sample(letters[1:5], size = 9000, prob = 1:5, replace = TRUE))
#' n <- factor(sample(letters[1:5], size = 1000, prob = 1:5 + 4, replace = TRUE))
#'
#' psi_table(o, n)
#' @importFrom dplyr full_join mutate
#' @importFrom tibble tibble
#' @importFrom rlang .data
#' @export
psi_table <- function(old, new) {

  # old <- o; new <- n

  d <- dplyr::full_join(
    dplyr::count(tibble::tibble("category" = old), .data$category, name = "count_old"),
    dplyr::count(tibble::tibble("category" = new), .data$category, name = "count_new"),
    by = "category"
  )

  d <- dplyr::mutate(
    d,
    percent_old = .data$count_old / sum(.data$count_old),
    percent_new = .data$count_new / sum(.data$count_new),
    psi = psi_vector(.data$count_old, .data$count_new)
  )

  d
}
jbkunst/risk3r documentation built on March 19, 2024, 10:49 p.m.