#' @include bin_class.R
NULL
#' @title Continuous reference class generator
#'
#' @name Continuous-class
#' @description Continuous object generator class used to wrap binned
#' numeric variables.
#' @export Continuous
#' @exportClass Continuous
Continuous <- setRefClass("Continuous", contains = "Bin")
Continuous$methods(initialize = function(...) {
callSuper(...)
})
#' Collapse adjacent levels of a Continuous bin object
#'
#' @name Continuous_collapse
#' @param i numeric vector of bin levels to collapse. Must be adjacent.
#' @return modifies the transform object in place.
NULL
Continuous$methods(collapse = function(i) {
d <- unique(pmax(pmin(tail(i, -1), length(tf@tf) - 1), 2))
tf@tf <<- tf@tf[-d]
callSuper()
}
)
#'
#' a level of a Continuous bin into multiple new levels
#'
#' @name Continuous_expand
#' @param i numeric vector of length 1 indiicating bin level to expand.
#' @details The requested level is divided into quintiles if possible. Duplicate
#' levels are removed.
#' @return modifies the transform object in place.
NULL
Continuous$methods(expand = function(i) {
stopifnot(length(i) == 1)
f <- !(is.na(x) | x %in% as.numeric(names(tf@exceptions)))
a <- min(max(1, i), length(tf@tf)) # can't be smaller than 1
z <- max(min(i + 1, length(tf@tf)), a) # or larger than max els
vals <- x[x > tf@tf[a] & x <= tf@tf[z] & f]
nvals <- length(unique(vals))
if (nvals > 1) {
step_ <- max(0.20, (1/nvals))
q <- tail(head(quantile(vals, seq(0, 1, step_)), -1), -1)
} else {
q <- NULL
}
tf@tf <<- sort(unique(c(tf@tf, q)))
callSuper()
}
)
#' Helper function to format Continuous bin labels
#'
#' @name Continuous_fmt_numeric_cuts
#' @return character vector of bin labels
NULL
Continuous$methods(fmt_numeric_cuts = function() {
fmt <- sprintf("(%%%1$ds - %%%1$ds]", max(nchar(tf@tf))) ## get width of largest value
sprintf(fmt, head(tf@tf, -1), tail(tf@tf, -1))
})
#' Factorize for Continuous bins
#'
#' @name Continuous_factorize
#' @param newdata Numeric vector on which to apply the transformation. Defaults
#' to the \code{x} field of the Continuous object
#' @details \code{factorize} returns a list with two fields:
#' \itemize{
#' \item{factor }{ Factor with the bin labels applied to \code{x}}
#' \item{types }{ list of logical vectors for missing, exception, and normal}
#' }
#' @return \code{list} with two fields. See details.
NULL
Continuous$methods(factorize = function(newdata=.self$x) {
lbls <- fmt_numeric_cuts()
i <- findInterval(newdata, tf@tf, left.open = T)
out <- lbls[i]
out[is.na(out)] <- "Missing"
f <- newdata %in% as.numeric(names(tf@exceptions))
out[f] <- newdata[f]
lvls <- c(lbls, names(tf@exceptions), "Missing")
factor(out, levels = lvls)
})
#' Weight-of-Evidence subistitution for Continuous bins
#'
#' @name Continuous_predict
#' @param newdata numeric vector to apply performance substition. Defaults to
#' data used to create the Continuous object.
#' @return numeric variable with bin performance values substituted for
#' the inputs.
NULL
Continuous$methods(predict = function(newdata=.self$x) {
stopifnot(is.numeric(newdata))
callSuper(newdata=newdata)
})
#' Generate SAS code for Continuous object
#'
#' @name Continuous_gen_code_sas
#' @description generate SAS code representing the transformation from input
#' numeric values to the substituted performance values. Also generates code
#' calculating difference from min/max/neutral and adverse action code
#' assignments.
#' @param pfx character prefix to prepend to variable names
#' @param coef numeric coefficient to multiply performance values by. Passed in
#' by the Scorecard model object. Defaults to 1.
#' @param method method used for calculating the reference level for adverse
#' action codes. Three possible choices:
#' \itemize{
#' \item{"min" }{Calculate difference from minimum of perf values - default}
#' \item{"max" }{Calculate difference from maximum of perf values}
#' \item{"neutral" }{Calculate difference from zero}#'
#' }
#' @param i numeric value enumerating the variables. Passed in from other code.
#' @return a character vector of SAS code
NULL
Continuous$methods(gen_code_sas = function(pfx="", coef=1, method="min", i=1, ...) {
val <- tail(head(tf@tf, -1), -1)
p <- tf@subst * coef
ref <- switch(method,"min" = min(p), "max" = max(p), "neutral" = 0, min(p))
E <- names(tf@exceptions)
m <- if (length(tf@nas) == 0) 0 else tf@nas * coef
e <- if (length(tf@exceptions) == 0) 0 else tf@exceptions * coef
## WoE Substitution
c(sprintf("\n/*** %s ***/", name),
sprintf("if missing(%s)\n then %s_V%02d_w = %s;", name, pfx, i, m),
sprintf("else if %s = %s\n then %s_V%02d_w = %s;", name, E, pfx, i, e),
sprintf("else if %s <= %s\n then %s_V%02d_w = %s;", name, val, pfx, i,
head(p, -1)),
sprintf("else %s_V%02d_w = %s;" , pfx, i, tail(p, 1)),
## Reason Codes
sprintf("\nif missing(%s)\n then %s_AA_code_%02d = \"&%s_AA_%02d\";",
name, pfx, i, pfx, i),
sprintf("else if %s = %s\n then %s_AA_code_%02d = \"&%s_AA_%02d\";",
name, E, pfx, i, pfx, i),
sprintf("else if %s <= %s\n then %s_AA_code_%02d = \"&%s_AA_%02d\";",
name, val, pfx, i, pfx, i),
sprintf("else %s_AA_code_%02d = \"&%s_AA_%02d\";", pfx, i, pfx, i),
## Distance Calculations
sprintf("\n%s_AA_dist_%02d = %s - %s_V%02d_w;", pfx, i, ref, pfx, i))
})
#' Set monotoncity for Continuous bins
#'
#' @name Continuous_mono
#' @param m the monotonic relationship to enforce.
#' @description \code{mono} calls the \code{bin} function with the requested
#' monotoncity. The variable is discretized while enforcing the monotonicity.
#' The possible values are:
#' \itemize{
#' \item{0 }{ No monotoncity enforced - the default.}
#' \item{1 }{ Increasing monotoncically with the \code{y}}
#' \item{-1 } {Decreasing monotoncically with the \code{y}}
#' \item{2 }{ Either increasing or decreasing montonically with the \code{y}}
#' }
NULL
Continuous$methods(mono = function(m) {
args$mono <<- m
do.call(perf$bin, c(list(b=.self), args))
update()
})
#' Set the exception values for Continuous bins
#'
#' @name Continuous_exceptions
#' @param e numeric vector of exception values to withhold from binning
NULL
Continuous$methods(exceptions = function(e) {
stopifnot(is(e, "numeric"))
args$exceptions <<- e
tf@exceptions <<- setNames(rep(0, length(e)), e)
update()
})
#' Explicity set bin boundaries for Continuous objects
#'
#' @name Continuous_set_cutpoints
#' @param cuts space-separated list of numeric cutpoints
#' @description \code{set_equal} sets the performance summary value of \code{i1}
#' equal to that of \code{i2}. This can be used to force two bins to have the
#' same substituted value.
NULL
Continuous$methods(set_cutpoints = function(cuts) {
cuts <- sort(unique(c(-Inf, cuts, Inf)))
tf@tf <<- cuts
update()
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.