#' Code WOE value for each band of variable.
#'
#' @param x An object of Clusterrr class
#' @param xVar A vector of variable to match with WOE
#' @return Vector with coded WOE values
#' @examples
#' data(lendclub)
#' x <- doClustering(lendclub, "grade", "loan_status")
#' codeWOE(x, lendclub$grade)
#' #how to build fast model using package?
#' #assuming 2 characteristic was chosen to build a logistic regression model:
#' x <- doClustering(lendclub, "purpose", "loan_status")
#' purposeCoded <- codeWOE(x, lendclub$purpose)
#' x <- doClustering(lendclub, "grade", "loan_status")
#' gradeCoded <- codeWOE(x, lendclub$grade)
#' dt <- data.frame(y = lendclub$loan_status,
#' x1 = purposeCoded,
#' x2 = gradeCoded)
#' #divide population into 2 sets for learning and validation:
#' dt.train <- dt[1:500000,]
#' dt.test <- dt[500000:nrow(lendclub),]
#' #build fast model:
#' model <- glm(data = dt.train, formula = y ~ x1 + x2, family = "binomial")
#' dt.test$prediction <- predict(model, dt.test, type = "response")
#' #check the strength of fit:
#' #pROC::auc(ifelse(dt.test$y,1,0), dt.test$prediction ) * 2 - 1 #gini
#' #pROC::plot.roc(ifelse(dt.test$y,1,0), dt.test$prediction ) # ROC curve
#' @export
codeWOE <- function (x, xVar){
if(class(x) != "Clusterrr")
stop("needs Clusterrr object")
woe <- x$woe
hcGroup <- x$hcGroup
#matching WOE codes with the variable
xVar2 <- as.factor(hcGroup[match(xVar,names(hcGroup))])
#if the WOE is to high in the band, assume it is equal 10
woe[woe == "-Inf"] <- -10
woe[woe == "Inf"] <- 10
if(length(woe)==1 | all(woe==1)) {
woeVect <- rep(1, length(xVar))
}else{
woeVect <- woe[match(xVar2, names(woe))]
names(woeVect) <- names(xVar2)
}
woeVect
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.