bin.R
In ldt: Automated Uncertainty Analysis

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(ldt)

## -----------------------------------------------------------------------------
data <- cbind(data.berka$y, data.berka$x[,1:7])

## ----datatail-----------------------------------------------------------------
tail(data)

## ----datasummary--------------------------------------------------------------
sapply(as.data.frame(data), summary)

## ----columndesc, echo=FALSE, results='asis'-----------------------------------
for (c in colnames(data)){
  cat(paste0("- ", c, ": ", data.berka$descriptions[which(sapply(data.berka$descriptions,function(d)d$id==c))][[1]]$description), "\n\n")
}

## ----search-------------------------------------------------------------------

search_res <- search.bin(data = get.data(data, endogenous = 1, weights = data.berka$w),
                         combinations = get.combinations(sizes = c(1,2,3),
                                                         numTargets = 1),
                         metric <- get.search.metrics(typesIn = c(), 
                                                      typesOut = c("auc"),
                                                      simFixSize = 20,
                                                      trainRatio = 0.8,
                                                      seed = 123),
                         items = get.search.items(bestK = 0,
                                                  inclusion = TRUE, 
                                                  type1 = TRUE, 
                                                  mixture4 = TRUE))
print(search_res)

## ----summary------------------------------------------------------------------
search_sum <- summary(search_res)

## ----prepareplot--------------------------------------------------------------

inclusion_mat <- search_res$results[sapply(search_res$results, function(a)a$typeName == "inclusion")][[1]]$value
inclusion_mat <- inclusion_mat[!(rownames(inclusion_mat) %in% c("label", "(Intercept)")), ]
sorted_inclusion_mat <- inclusion_mat[order(inclusion_mat[,1], decreasing = TRUE),]
selected_vars <- rownames(sorted_inclusion_mat)[1:4]

mixture_mat <- search_sum$results[sapply(search_sum$results, function(a)a$typeName == "mixture")][[1]]$value
moments <- lapply(selected_vars, function(v)mixture_mat[rownames(mixture_mat)==v,])
gld_parms <- lapply(moments, function(c) s.gld.from.moments(c[[1]], c[[2]],
                        c[[3]], c[[4]],
                        start = c(0.25, 0.25),
                        type = 4,
                        nelderMeadOptions = get.options.neldermead(100000,1e-6)))


## ----plot, fig.height=4, fig.subcap=selected_vars, fig.cap="Determinants of Loan Default: Variables with the highest inclusion weights, automatically selected. Each plot presents a combined distribution of all estimated coefficients, estimated by generalized lambda distribution.", out.width='.40\\linewidth', fig.ncol=2----
 
# plots
probs <- seq(0.01,0.99,0.01)
i <- 0
for (gld in gld_parms){
  i <- i + 1
  x <- s.gld.quantile(probs, gld[1],gld[2],gld[3],gld[4])
  y <- s.gld.density.quantile(probs, gld[1],gld[2],gld[3],gld[4])

  plot(x, y, type = "l", xaxt = "n", xlab = NA, ylab = NA, col = "blue", lwd = 2, 
       main = selected_vars[i])
  lower <- x[abs(probs - 0.05)<1e-10]
  upper <- x[abs(probs - 0.95)<1e-10] 
  axis(1, at = c(lower, 0, upper), labels = c(round(lower, 2), 0, round(upper, 2)))
  xleft <- x[x <= lower]
  xright <- x[x >= upper]
  yleft <- y[x <= lower]
  yright <- y[x >= upper]
  polygon(c(min(x), xleft, lower), c(0, yleft, 0), col = "gray", density = 30, angle = 45)
  polygon(c(max(x), upper, xright), c(0, 0, yright), col = "gray", density = 30, angle = -45)
  text(mean(x), mean(y), "90%", col = "gray")
}