R/detect_outlier.R

#' Find outlier values
#' ref: https://www.r-bloggers.com/outlier-detection-and-treatment-with-r/
#' @export
detect_outlier <- function (vec, type = "iqr", threshold = NULL) {

  # Fill the data with "normal". Leave NAs as NAs.
  ret <- factor(if_else(is.na(vec), as.character(NA), "Normal"),
                levels = c("Lower", "Normal", "Upper"),
                ordered=TRUE)
  type <- tolower(type)

  switch(type, iqr = {
    q <- quantile(vec, na.rm = TRUE)
    # q is with 0%, 25%, 50%, 75%, 100% quartiles
    # IQR is difference between 75% and 25%
    IQR <- q[4]-q[2]
    upper_whisker <- q[4]+1.5*IQR
    lower_whisker <- q[2]-1.5*IQR

    ret[(vec < lower_whisker)] <- "Lower"
    ret[(vec > upper_whisker)] <- "Upper"
  }, percentile = {
    if(is.null(threshold)){
      threshold <- 0.95
    }
    if (threshold <= 0 || threshold >= 1) {
      stop("threshold must be between 0 and 1")
    } else if (threshold < 0.5) {
      threshold <- 1-threshold
    }
    q <- quantile(vec, probs = c(1-threshold, threshold), na.rm = TRUE)
    ret[(vec < q[1])] <- "Lower"
    ret[(vec > q[2])] <- "Upper"
  }, standard_deviation = {
    if(is.null(threshold)){
      threshold <- 2
    }
    if (threshold <= 0) {
      stop("threshold must be larger than 0")
    }
    m <- mean(vec, na.rm = TRUE)
    s <- sd(vec, na.rm = TRUE)
    ret[(vec < m - threshold * s) ] <- "Lower"
    ret[(vec > m + threshold * s)] <- "Upper"
  })
  ret
}
exploratory-io/exploratory_func documentation built on April 23, 2024, 9:15 p.m.