Nothing
#' Winsorize a numeric vector
#'
#' @param x A vector of values
#' @param cutpoints Cutpoints under and above which are defined outliers. Default is (median - five times interquartile range, median + five times interquartile range). Compared to bottom and top percentile, this takes into account the whole distribution of the vector.
#' @param probs A vector of probabilities that can be used instead of cutpoints. Quantiles are computed as the inverse of the empirical distribution function (type = 1)
#' @param replace Values by which outliers are replaced. Default to cutpoints. A frequent alternative is NA.
#' @param verbose Boolean. Should the percentage of replaced values printed?
#' @examples
#' v <- c(1:4, 99)
#' winsorize(v)
#' winsorize(v, replace = NA)
#' winsorize(v, probs = c(0.01, 0.99))
#' winsorize(v, cutpoints = c(1, 50))
#' @export
winsorize <- function(x, probs = NULL, cutpoints = NULL , replace = c(cutpoints[1], cutpoints[2]), verbose = TRUE){
dummy = is.integer(x)
if (!is.null(probs)){
stopifnot(is.null(cutpoints))
stopifnot(length(probs)==2)
cutpoints <- stats::quantile(x, probs, type = 1, na.rm = TRUE)
} else if (is.null(cutpoints)){
l <- stats::quantile(x, c(0.25, 0.50, 0.75), type = 1, na.rm = TRUE)
cutpoints <- c(l[2]-5*(l[3]-l[1]), l[2]+5*(l[3]-l[1]))
} else{
stopifnot(length(cutpoints)==2)
}
if (is.integer(x)) cutpoints <- round(cutpoints)
bottom <- x < cutpoints[1]
top <- x > cutpoints[2]
if (verbose){
length <- length(x)
message(paste(sprintf("%3.2f", 100*sum(bottom, na.rm = TRUE)/length),"% observations replaced at the bottom"))
message(paste(sprintf("%3.2f", 100*sum(top, na.rm = TRUE)/length),"% observations replaced at the top"))
}
x[bottom] <- replace[1]
x[top] <- replace[2]
if (dummy){
x <- as.integer(x)
}
x
}
#' @export
#' @rdname winsorize
winsorise <- winsorize
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.