#' Merge specific levels of a factor
#'
#' This function allows the user to programatically merge levels of a factor
#' together to create aggregate levels. This is designed to work for data that
#' has been discretized by, e.g. \code{\link[modellingTools]{vector_bin}},
#' but will work for arbitrary factors. In the case of factors with numeric
#' levels, the user can specify to merge the names of the old levels when
#' naming the new level
#' @param var Vector for which \code{\link[base]{is.factor}} returns
#' \code{TRUE}
#' @param lv Character vector containing 2 or more levels of \code{var} to
#' merge.
#' @param merge_names Logical. Should names of numeric factors be merged?
#' Default is \code{FALSE}.
#' @return A new factor level corresponding to \code{var}, with values that were
#' in \code{lv} replaced with a new factor level. The name of thre level depends
#' on \code{merge_names}.
#' @details
#' When not merging names, if levels \code{A}, \code{B}, and \code{C} are to be
#' merged, the resulting level will be \code{AxBxC}. If the factor levels
#' contain numbers, specifying \code{merge_names = TRUE} will cause
#' \code{merge_levels} to call \code{\link[modellingTools]{get_vector_cutpoints}}
#' on the new level, and then use only the minimum and maximum cutpoints- this
#' will result in the merge of \code{[1,2]}, \code{[2,3]}, and \code{[3,4]}
#' being named \code{[1,4]} instead of \code{[1,2]x[2,3]x[3,4]}. While this
#' seems like obviously what one would want the default behaviour to be, the
#' user must specify this explicitly to avoid unexpected results.
#' @family utility functions
#' @seealso woe_single
#' @examples
#' x <- factor(1:10)
#' merge_levels(x,c(1,2,3))
#' merge_levels(x,c(1,2,3),merge_names = TRUE)
#'
#' y <- modellingTools::vector_bin(x,bins = 3)
#' @export
merge_levels <- function(var,lv,merge_names = FALSE) {
# Check if var is a factor
if (!is.factor(var)) {
stop(stringr::str_c("Variable is not a factor"))
}
# Check if supplied levels actually exist
levels_exist <- lv %in% levels(var)
if (!any(levels_exist)) {
stop("None of the supplied levels exist within supplied variable")
} else if (!all(levels_exist)) {
which_dont_exist <- lv[which(!levels_exist)]
warning(stringr::str_c("The following level is not present in supplied variable: ",
which_dont_exist))
lv <- lv[which(levels_exist)]
}
# Convert it to a character
var <- as.character(var)
# Make sure > 1 levels provided
if (length(lv) < 2) stop("Please provide > 1 levels to combine")
new_level <- stringr::str_c(lv[1],"x")
if (length(lv) > 2) {
for (l in 2:(length(lv) - 1)) {
new_level <- stringr::str_c(new_level,lv[l],"x")
}
}
new_level <- stringr::str_c(new_level,lv[length(lv)])
if (merge_names) {
new_bin_cutpoints <- modellingTools::get_vector_cutpoints(new_level)
new_level <- stringr::str_c("[",
min(new_bin_cutpoints),
",",
max(new_bin_cutpoints),
"]")
}
for (i in 1:length(var)) {
if (var[i] %in% lv) var[i] <- new_level
}
return(factor(var))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.