R/bagplot.R

Defines functions gg_bagplot

Documented in gg_bagplot

#' @title Bagplot
#' @description Produces a bivariate bagplot. A bagplot is analagous to a
#' univariate boxplot, except it is in two dimensions. Like a boxplot, it
#' shows the median, a region containing 50% of the observations, a region
#' showing the remaining observations other than outliers, and any outliers.
#'
#' @param data A data frame or matrix containing the data.
#' @param var1 The name of the first variable to plot (a bare expression).
#' @param var2 The name of the second variable to plot (a bare expression).
#' @param scatterplot A logical argument indicating if a regular bagplot is required
#' (\code{FALSE}), or if a scatterplot in the same colors is required (\code{TRUE}).
#' @param col The colors to use in the order: median, bag, loop and outliers.
#' @param ... Other arguments are passed to the \code{\link[aplpack]{compute.bagplot}} function.
#' @return A ggplot object showing a bagplot or scatterplot of the data.
#' @author Rob J Hyndman
#' @references Rousseeuw, P. J., Ruts, I., & Tukey, J. W. (1999).
#'   The bagplot: A bivariate boxplot. \emph{The American Statistician}, \bold{52}(4), 382–387.
#' @examples
#' gg_bagplot(n01, v1, v2)
#' gg_bagplot(n01, v1, v2, scatterplot = TRUE)
#' @rdname bagplot
#' @seealso
#'  \code{\link[aplpack]{bagplot}}
#' @importFrom aplpack compute.bagplot
#' @importFrom ggplot2 geom_polygon geom_point ggplot aes
#' @importFrom dplyr select filter
#' @export

gg_bagplot <- function(data, var1, var2,
  col = c(hdr_palette(color = "#00659e", prob = c(0.5, 0.99)), "#000000"),
  scatterplot = FALSE, ...) {
  data <- data |> select({{ var1 }}, {{ var2 }})
  bp <- aplpack::compute.bagplot(as.matrix(data), na.rm = TRUE, approx.limit = 1000, ...)
  cn <- colnames(data)
  p <- data |>
    ggplot(aes(x = {{ var1 }}, y = {{ var2 }}))
  if (scatterplot) {
    # Bag points
    if (!is.null(bp$pxy.bag)) {
      p <- p + geom_point(aes(x = {{ var1 }}, y = {{ var2 }}),
        data = as.data.frame(bp$pxy.bag), color = col[2]
      )
    }
    # Loop points
    if (!is.null(bp$pxy.outer)) {
      p <- p + geom_point(aes(x = {{ var1 }}, y = {{ var2 }}),
        data = as.data.frame(bp$pxy.outer), color = col[3]
      )
    }
    # Deepest point
    colnames(bp$xy) <- cn
    deep <- bp$xy |>
      as.data.frame() |>
      dplyr::filter(bp$hdepths == max(bp$hdepths))
    p <- p + geom_point(aes(x = {{ var1 }}, y = {{ var2 }}),
      data = deep, color = col[1]
    )
  } else {
    loop <- as.data.frame(bp$hull.loop)
    bag <- as.data.frame(bp$hull.bag)
    # Show loop polygon
    if (!is.null(loop)) {
      colnames(loop) <- cn
      p <- p + geom_polygon(aes(x = {{ var1 }}, y = {{ var2 }}), data = loop, fill = col[3])
    }
    # Show bag polygon
    if (!is.null(bag)) {
      colnames(bag) <- cn
      p <- p + geom_polygon(aes(x = {{ var1 }}, y = {{ var2 }}), data = bag, fill = col[2])
    }
  }
  if (!is.null(bp$pxy.outlier)) {
    outliers <- as.data.frame(as.matrix(bp$pxy.outlier))
    colnames(outliers) <- cn
    p <- p + geom_point(aes(x = {{ var1 }}, y = {{ var2 }}), data = outliers, col = col[4])
  }
  if (!scatterplot) {
    # Show median
    p <- p + geom_point(aes(x = bp$center[1], y = bp$center[2]), col = col[1], size = 2)
  }
  return(p)
}

Try the weird package in your browser

Any scripts or data that you put into this service are public.

weird documentation built on May 29, 2024, 1:24 a.m.