# utility functions for binning numerical data
# written by Kenji Kondo @ 2019/1/1
# Inner function used in executeBinning
isBinningFeasible <- function(x_vec) {
return(is.integer(x_vec) | is.numeric(x_vec) | is.ordered(x_vec))
}
#' Create bins by Equal Width Binning
#'
#' @param left left value of the original interval.
#' @param right right value of the original interval.
#' @param nbin number of bins to be created.
#'
#' @return a numeric vector which indicates the boundaries of bins, with `nbin` elements.
#'
#' @export
#' @importFrom assertthat assert_that
createEqualWidthBins <- function(left, right, nbin){
nbin <- as.integer(nbin)
assert_that(length(nbin) == 1 & nbin > 1)
breaks <- (0:(nbin - 1)) * ((right - left) / (nbin - 1)) + left
return(breaks)
}
#' Create bins by Equal Freq Binning
#'
#' @param x_vec A reference integer or numeric or ordered vector to be binned.
#' @param nbin.max An integer value which indicates the maximum counts of bins.
#' Note that this function makes `min(nbin.max, length(x_vec))` counts of bins.
#'
#' @return a numeric vector which indicates the boundaries of bins, with `nbin` elements.
#'
#' @export
#' @importFrom assertthat assert_that
createEqualFreqBins <- function(x_vec, nbin.max) {
nbin.max <- as.integer(nbin.max)
assert_that(nbin.max > 0 & length(x_vec) > 0)
nbin <- min(nbin.max, length(x_vec))
percents <- seq(0, 1, 1 / (nbin - 1))
if (is.ordered(x_vec)) {
breaks <- unique(quantile(x_vec, percents, type=1))
names(breaks) <- NULL
} else { # integer or numeric cases
breaks <- unique(as.numeric(quantile(x_vec, percents)))
}
return(breaks)
}
#' Execute binning for numerical data.
#'
#' @param x_vec an integer or numeric or ordered vector, to be binned.
#' @param breaks a numeric vector which indicates the boundaries of bins, of length (number of bins - 1).
#' If NULL is set, bins are generated using the method which is specified by the `method` argument.
#' @param nbin.max a maximum number of bins which is generated by this function. Only used when `breaks` is not set.
#' @param method used for specifying binning method. "freq": equal freq binning (default), "width": equal width binning.
#' Ignore if `breaks` is set.
#'
#' @return a list which has two members `labels` and `breaks`.
#' * `labels`: an integer vector of `length(x_vec)`.
#' `(labels[i]==k)` indicates the i-th element of x_vec is in the k-th bin.
#' * `breaks`: a numeric vector which indicates the boundaries of bins, of length (number of bins - 1).
#'
#' @export
#' @importFrom assertthat assert_that
executeBinning <- function(x_vec, breaks=NULL, nbin.max=100, method="freq") {
# Check arguments
assert_that(isBinningFeasible(x_vec))
# If breaks is NULL, generate bins by self.
if (is.null(breaks)) {
left <- min(x_vec)
right <- max(x_vec)
if (method == "freq") {
breaks <- createEqualFreqBins(x_vec, nbin.max)
} else if (method == "width") {
breaks <- createEqualWidthBins(left, right, nbin.max)
} else {
assert_that(FALSE, msg="wrong 'method' argument.")
}
}
if (is.ordered(x_vec)) {
labels <- as.integer(rep(1, length(x_vec)))
for (i in seq(length(breaks))) {
labels[x_vec >= breaks[i]] <- i + 1
}
} else { # integer or numeric cases
# Calc labels for each element
labels <- cut(x_vec, breaks=c(-Inf, breaks, Inf), labels=FALSE, right=FALSE)
}
return(list(labels=labels, breaks=breaks))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.