#' Discretize a variable
#'
#' A wrapper for base-r's cut() with some improved features.
#' @param x (numeric vector) A vector of datapoints to discretize.
#' @param breaks (integer scalar or numeric vector) The number of levels to create, or a numeric vector of values to use as cut-offs.
#' @param equal_range (logical scalar) Whether to use equal ranges. If false, will use equal sizes. Defaults to TRUE.
#' @param labels (character scalar) Which labels to use. By default, it will use the intervals. Can also be "midpoint", which uses the interval midpoints, "numbers" will use integers.
#' @param include_end (logical scalar) Whether to include datapoints equal to the last break value. Default=TRUE.
#' @param right (logical scalar) Whether to use intervals that are closed on the right. Default=TRUE.
#' @param ordered_factor (logical scalar) Should the result be an ordered factor? Default=FALSE.
#' @param ... (additional arguments) Any other arguments passed to cut().
#' @export
#' @examples
#' x = discretize(rnorm(100), 5)
#' hist(x)
#' y = discretize(rnorm(100), 5, equal_range = F)
#' hist(y)
discretize = function(x, breaks, equal_range = T, labels = "interval", include_end = T, right = T, ordered_factor = F, ...) {
#init
midpoints = F
#handle breaks
if (length(breaks) == 1) {
#equal sizes
if (!equal_range) {
breaks = quantile(x, probs = seq(0, 1, length.out = breaks+1), na.rm = T)
}
}
#figure out input
labels = match.arg(labels, choices = c("interval", "midpoint", "integer", "number"))
if (labels == "number") labels = "integer"
#switch
if (labels == "interval") {
labels = NULL #the default for cut
} else if (labels == "integer") {
labels = F
} else if (labels == "midpoint") {
labels = NULL #the default for cut, then fix afterwards
midpoints = T
}
#cut
y = cut(x = x, breaks = breaks, labels = labels, include.lowest = include_end, right = right, ordered_result = ordered_factor, ...)
#fix labels
if (midpoints) {
#get the values from the intervals
interval_vals = stringr::str_match_all(string = levels(y), pattern = "[\\d\\.]+")
#get the midpoints
interval_mids = purrr::map_dbl(interval_vals, ~mean(as.numeric(.)))
#replace
y = plyr::mapvalues(y, from = levels(y), to = interval_mids)
}
y
}
#' Calculate mean absolute difference for values in a vector
#'
#' Calculates all the pairwise absolute differences, then averages them.
#' @param x (numeric vector) A vector of values.
#' @param na.rm (logical scalar) Whether to ignore missing values. Default=TRUE.
#' @export
#' @examples
#' mean_abs_diff(iris[[1]])
mean_abs_diff = function(x, na.rm = T) {
dist(unlist(as.vector(x))) %>% as.vector() %>% mean(., na.rm = na.rm)
}
#' Exclude missing datapoints
#'
#' Exclude datapoints that are NA, NULL or NaN.
#'
#' Does not remove NA etc. recursively. See the complex list example.
#' @param x (an interatable object) An object to subset based on missingness.
#' @param .NA (logical scalar) Whether to exclude NA (default TRUE).
#' @param .NULL (logical scalar) Whether to exclude NULL (default TRUE).
#' @param .NaN (logical scalar) Whether to exclude NaN (default TRUE).
#' @export
#' @examples
#' x = list(1, NA, 2, NULL, 3, NaN, 4, Inf)
#' exclude_missing(x)
#' exclude_missing(x, .NA = F)
#' exclude_missing(x, .NULL = F)
#' exclude_missing(x, .NaN = F)
#' exclude_missing(x, .Inf = F)
#' #complex list
#' x = list(1, NA, 2, NULL, 3, NaN, 4, Inf, 1:3, c(1, NA, 3))
#' exclude_missing(x) #does not remove NAs recursively
exclude_missing = function(x, .NA = T, .NULL. = T, .NaN = T, .Inf = T) {
#check empty
if (length(x) == 0) return(x)
#NULL
if (.NULL.) x = x[!sapply(x, is.null)]
#check empty
if (length(x) == 0) return(x)
#NA
if (.NA) x = x[sapply(x, function(y) {
if (is.null(y)) return(T)
if (!is_simple_vector(y)) return(T) #the functions below fail on list objects
if (length(y) != 1) return(T) #if length isn't 1, the below functions give errors
if (is.infinite(y)) return(T)
if (is.nan(y)) return(T)
!is.na(y)
})]
#check empty
if (length(x) == 0) return(x)
#NaN
if (.NaN) x = x[sapply(x, function(y) {
if (is.null(y)) return(T)
if (!is_simple_vector(y)) return(T) #the functions below fail on list objects
if (length(y) != 1) return(T) #if length isn't 1, the below functions give errors
!is.nan(y)
})]
#check empty
if (length(x) == 0) return(x)
#Inf
if (.Inf) x = x[sapply(x, function(y) {
if (is.null(y)) return(T)
if (!is_simple_vector(y)) return(T) #the functions below fail on list objects
if (length(y) != 1) return(T) #if length isn't 1, the below functions give errors
if (is.na(y)) return(T)
is.finite(y)
})]
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.