#' @include performance_class.R continuous_class.R discrete_class.R
NULL
#' Bin reference class generator
#'
#' @name Binary_Performance-class
#' @description Binary_Performance object generator class that implements
#' the Performance class methods
#' @field ones sum of \code{y == 0}
#' @field zeros sum of \code{y == 1}
#' @export Binary_Performance
#' @exportClass Binary_Performance
Binary_Performance <- setRefClass("Binary_Performance", fields = c(
ones = "numeric",
zeros = "numeric"),
contains = "Performance")
Binary_Performance$methods(initialize = function(y=.self$y, ...) {
callSuper(y=y, ...)
ones <<- sum((y == 1) * w)
zeros <<- sum((y == 0) * w)
})
#' @describeIn bin_ Bin a Continuous object using Binary_Performance
#' @param .self a Binary_Performance object
#' @param b a Continuous bin object
#' @param min.iv minimum information value to split
#' @param min.cnt minimum number of observations per bin
#' @param min.res minimum number of response counts per bin
#' @param max.bin maximum number of resulting bins
#' @param mono montoncity constraint of discretization process
#' @param exceptions numeric vector of values to exclude from discretization#'
#' @return modifies the Bin object in place
setMethod("bin_",
signature = c(.self="Binary_Performance", b="Continuous"),
function(.self, b, min.iv=0.01, min.cnt=10, min.res=0,
max.bin=10, mono=0, exceptions=numeric(0)) {
f <- !is.na(b$x)
b$tf@tf <- .Call("bin", as.double(b$x[f]), as.double(.self$y[f]),
as.double(.self$w[f]), as.double(min.iv), as.integer(min.cnt),
as.integer(min.res), as.integer(max.bin), as.integer(mono),
as.double(exceptions))
b$tf@exceptions <- setNames(rep(0, length(exceptions)), exceptions)
})
#' @describeIn bin_ Bin a Discrete object using Binary_Performance
#' @return modifies the Bin object in place
setMethod("bin_",
signature = c(.self="Binary_Performance", b="Discrete"),
function(.self, b, ...) {
b$tf@tf <- as.list(levels(b$x))
names(b$tf@tf) <- levels(b$x)
})
#' Subsequent call from the bin function passed to Binary_Performance object
#'
#' @name Binary_Performance_bin
#' @description This bin function should not be directly called by the user.
#' The Classing bin function is subsequently called from the
#' \code{\link{Bin_bin}} wrapper function.
NULL
Binary_Performance$methods(bin = bin_)
#' Summarize method implementation for Binary_Performance
#'
#' @name Binary_Performance_summarize
#' @param x discretized independent variable as a factor
#' @param y response variable
#' @param w weight variable
#' @description this function summarizes the relationship between the
#' independent and response variables. The inherited Performance class demands
#' that it be implemented.
#' @return a matrix of summary information for every level in the factor x
NULL
Binary_Performance$methods(summarize = function(x, y, w) {
N1 <- tapply((y == 1) * w, x, sum)
N0 <- tapply((y == 0) * w, x, sum)
N <- tapply(w, x, sum)
P1 <- N1 / ones
P0 <- N0 / zeros
WoE <- log(P1 / P0)
IV <- (P1 - P0) * WoE
res <- cbind(N = N, `#1` = N1, `#0` = N0, `%N` = N / sum(ones, zeros),
`%1` = N1 / ones, `%0` = N0 / zeros, `P(1)` = N1 / N,
WoE = WoE, IV = IV, Pred = WoE)
res[is.na(res) | is.infinite(res)] <- 0
res
})
#' Update method implementation for Binary_Performance
#'
#' @name Binary_Performance_update
#' @param b Bin object to update
#' @description update is called after every applicable bin operation that
#' modifies the Transform object. The result of calling update is used for
#' displaying the Bin matrix as well as plotting the Bin object.
#' @return a list of matrices with summarized information.
NULL
Binary_Performance$methods(update = function(b, ...) {
f <- b$factorize()
m <- summarize(f, y, w)
lvls <- levels(f)
ids <- list(
normal = NULL,
exceptions = match(names(b$tf@exceptions), lvls, 0),
missing = match("Missing", lvls, 0))
ids$normal <- seq.int(nrow(m))[-c(ids$missing, ids$exceptions)]
out <- lapply(ids, function(x) m[x,,drop=FALSE])
out$Total <- matrix(colSums(m, na.rm=TRUE), nrow=1,
dimnames = list("Total", colnames(m)))
out$Total[,c("WoE", "Pred")] <- 0
out$Total[,"P(1)"] <- out$Total[,"#1"]/out$Total[,"N"]
out
})
# internal helper function for plotting
make_bars_ <- function(v, width=0.70, ...) {
left <- pmin(v, 0)
right <- pmax(v, 0)
center <- seq_along(left)
top <- center - 0.5 * width
bottom <- center + 0.5 * width
rect(left, bottom, right, top, ...)
center
}
#' Plot method implementation for Binary_Performance
#'
#' @name Binary_Performance_plot
#' @param b Bin object to update
#' @description plot displays a horizontal bar chart showing summarized
#' performance information. The large bars represent the observed WoE within
#' each bin. The smaller bars within represent the WoE that will be substituted
#' during prediction. Record count percentages are shown on the right side of
#' the plot while a sequential index used for referencing bin levels is printed
#' along the left.
NULL
Binary_Performance$methods(plot = function(b, ...) {
on.exit(par(oma=rep(0, 4))) # restore them on exit
tmp <- head(b$as.matrix(), -1)
lbls <- rev(row.names(tmp))
woe <- rev(tmp[,"WoE"])
val <- rev(tmp[,"Pred"])
pctN <- sprintf("%0.1f%%", rev(tmp[, "%N"] * 100))
## find the max and min
xlim <- range(c(woe, val)) + c(-0.5, 0.5)
## set margin based on nchars
width <- max(nchar(lbls))
par(oma=c(0, width/6, 0, 0))
graphics::plot(NA, xlim=xlim, ylim=c(0.5, length(woe) + 0.5),
xlab = "Weight of Evidence", ylab=NA, yaxt="n", main = b$name)
abline(v = 0, lty=3)
center <- make_bars_(woe, col=rgb(0, 0, 0, alpha = 0.30))
center <- make_bars_(val, width=0.2, col="red")
text(x = min(xlim), y = center, labels = sprintf(" [%02d]",
rev(seq_along(lbls))), cex=0.80)
text(x = max(xlim) - 0.1, y = center, labels = pctN, cex=0.80)
axis(side = 2, labels = lbls, at = center, las = 2, lwd.ticks = 0,
cex.axis = 0.80)
})
#' Summary method implementation for Binary_Performance
#'
#' @name Binary_Performance_summary
#' @param tf Transform object to summarize
#' @description summary returns the appropriate pieces from the Transform object
#' to summarize the relationship in a conciese one-line entry.
#' @return a name vector of summary information
NULL
Binary_Performance$methods(summary = function(tf, ...) {
## return the information value of the bin
tot <- tf@repr$Total[,c("IV", "N", "#1", "#0", "P(1)")]
nas <- unname(colSums(tf@repr$missing[,"N", drop=F], na.rm=T))
exc <- unname(colSums(tf@repr$exception[,"N", drop=F], na.rm=T))
out <- c(tot, "N missing"=nas, "N Exceptions"=exc)
out[is.na(out)] <- 0
out
})
#' Sort_value method implementation for Binary_Performance
#'
#' @name Binary_Performance_sort_value
#' @param b Bin object to update
#' @description sort_value returns the information value for the requested Bin
#' @return the value to use for sorting bins using Binary_Performance
NULL
Binary_Performance$methods(sort_value = function(b, ...) {
b$tf@repr$Total[,"IV"]
})
#' Return an object that tells openxlsx how to print it to Excel
#'
#' @name Binary_Performance_get_excel_table
#' @param b Bin object to update
#' @return Returns a \code{openxlsx-table} object that \code{binnrtools} can
#' use to format a bin object for Excel
NULL
Binary_Performance$methods(get_excel_table = function(b, ...) {
structure(
list(
data = b$as.matrix(),
name = b$name,
format_info = list(
percent_cols = 4:7,
comma_cols = 1:3,
rounded_cols = 8:10,
bar_cols = 10
)
),
class="openxlsx-table"
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.