#' Transform a quantitative variable into a qualitative variable
#'
#' This function transforms a quantitative variable into a qualitative
#' one by breaking it into classes with the same frequencies.
#'
#' @param var variable to transform
#' @param nbclass number of classes
#' @param include.lowest argument passed to the \code{cut} function
#' @param right argument passed to the \code{cut} function
#' @param dig.lab argument passed to the \code{cut} function
#' @param ... arguments passed to the \code{cut} function
#' @details
#' This is just a simple wrapper around the \code{cut} and \code{quantile}
#' functions.
#' @return
#' The result is a factor.
#' @seealso
#' \code{\link{cut}}, \code{\link{quantile}}
#' @examples
#' data(iris)
#' sepal.width3cl <- quant.cut(iris$Sepal.Width, 3)
#' table(sepal.width3cl)
#' @export
`quant.cut` <-
function(var, nbclass, include.lowest = TRUE, right = FALSE, dig.lab = 5, ...) {
breaks <- unique(stats::quantile(var, probs = seq(0, 1, length = nbclass + 1), na.rm = TRUE))
cut(var, breaks = breaks, dig.lab = dig.lab, right = right, include.lowest = include.lowest, ...)
}
#' Recode values of a variable to missing values, using exact or regular expression matching.
#'
#' This function recodes selected values of a quantitative or qualitative
#' variable by matching its levels to exact or regular expression matches.
#'
#' @param x variable to recode. The variable is coerced to a factor if necessary.
#' @param ... levels to recode as missing in the variable. The values are coerced to character strings, meaning that you can pass numeric values to the function.
#' @param verbose print a table of missing levels before recoding them as missing. Defaults to \code{FALSE}.
#' @param regex use regular expressions to match values that include the "*" or "|" wildcards. Defaults to \code{TRUE}.
#' @param as.numeric coerce the recoded variable to \code{numeric}. The function recommends the option when the recode returns only numeric values. Defaults to FALSE.
#' @return
#' The result is a factor with properly encoded missing values. If the recoded variable contains only numeric values, it is converted to an object of class \code{numeric}.
#' @seealso
#' \code{\link{regex}}
#' @author François Briatte <f.briatte@@gmail.com>
#' @examples
#' data(hdv2003)
#' ## With exact string matches.
#' hdv2003$nivetud <- recode.na(hdv2003$nivetud, "Inconnu")
#' ## With regular expressions.
#' hdv2003$relig <- recode.na(hdv2003$relig, "[A|a]ppartenance", "Rejet|NSP")
#' ## Showing missing values.
#' hdv2003$clso <- recode.na(hdv2003$clso, "Ne sait pas", verbose = TRUE)
#' ## Test results with freq.
#' freq(recode.na(hdv2003$trav.satisf, "Equilibre"))
#' ## Truncate a count variable (recommends numeric conversion).
#' freq(recode.na(hdv2003$freres.soeurs, 5:22))
#' @export recode.na
recode.na <- function(x, ..., verbose = FALSE, regex = TRUE, as.numeric = FALSE) {
if (!is.factor(x)) x <- factor(x)
m <- as.character(c(...))
r <- which(grepl("\\*|\\|", m))
q <- m[r]
# grepl
r1 <- NULL
if (length(q) && regex) {
r1 <- lapply(q, function(i) which(grepl(gsub("\\*", "", i), levels(x))))
r1 <- unlist(r1)
}
q <- m
if (length(r)) m <- m[-r]
# exact
r2 <- NULL
if (length(q)) {
r2 <- lapply(q, function(i) which(levels(x) %in% i))
r2 <- unlist(r2)
}
# match missing levels
q <- levels(x)[unique(c(r1, r2))]
m <- factor(x[x %in% q])
# missing counts
r <- matrix(table(m))
rownames(r) <- levels(m)
colnames(r) <- "n"
message("Recoded ", sum(r), " values to NA.")
if (sum(r) && verbose) print(r)
# subset and relevel
if (sum(r)) {
x[which(x %in% q)] <- NA
x <- factor(x, levels = levels(x)[-which(levels(x) %in% levels(m))])
}
# detect numeric strings
numbers <- !grepl("\\D", gsub("\\.\\s", "", paste0(levels(x), collapse = "")))
if (numbers && !as.numeric) {
message(
"Recoded variable contains only numeric characters. ",
"Consider using as.numeric = TRUE."
)
}
# numeric coercion
if (as.numeric) x <- as.numeric(x)
return(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.