#' Extract a value for optimal number of bands.
#'
#' @param yVar A logical vector of dependent variable.
#' @param xVar A factor or logical vector of independent variable.
#' @param eps A number from 1 to 2 which manage how many cluster function returns.
#' @param distanceMethod A character, check ?dist.
#' @param clustMethod A character, check ?hclust.
#' @return A number of optimal bands.
#' @examples
#' data(lendclub)
#' getOptimalK(lendclub$loan_status, lendclub$purpose)
#' getOptimalK(lendclub$loan_status, lendclub$purpose, eps = 1.01)
#' getOptimalK(lendclub$loan_status, lendclub$purpose, eps = 4)
#' @export
#' @importFrom stats cutree
getOptimalK <- function (yVar, xVar, eps = 1.05, distanceMethod = "euclidean",
clustMethod = "ward.D2"){
# checking -----------------------------------
if(!is.logical(yVar)){
stop("yVar must be logical")
}
if(!is.factor(xVar) && !is.logical(xVar)){
stop("xVar must be factor or logical")
}
if(!is.numeric(eps)){
stop("eps must be a number")
}
# exit if IV too low -----------------
iv <- getIV(yVar, xVar)
if(sum(iv[,"iv"]) < 0.01){
return (1)
}
# calc ---------------------------------------
n <- length(unique(xVar)) - any(is.na(xVar))
badrate <- getBadRate(yVar, xVar)
hc <- getHclustObj(badrate, distanceMethod, clustMethod)
# recursive function for fiding optimal k fold ------------------------------
getOptimalKRec <- function (hc, k = 2, ivOld = 0){
hcGroup <- cutree(hc, k)
xVar2 <- as.factor(hcGroup[match(xVar,names(hcGroup))])
ivNew <- sum(getIV(yVar, xVar2)[,"iv"])
if(k>=n){return(c(k, ivNew))}
if(ivNew > ivOld * eps){
getOptimalKRec(hc, k+1, ivNew)}else{
c(k-1, ivOld)
}
}
k <- getOptimalKRec (hc)
if(k[2] < 0.01){ #when IV for banding very low then no sens to make a split
return (1)
}
k[1]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.