R/ggBoxplot.R

Defines functions ggBoxplot

Documented in ggBoxplot

#' Box plot using ggplot
#'
#' This function provides a simple interface to create a \code{\link{ggplot}}
#' box plot, organising different boxplots by levels of a factor is desired,
#' and showing row numbers of outliers.
#'
#' This function is based on JasonAizkalns' answer to a question on Stack
#' Exchange (Cross Validated; see
#' \url{https://stackoverflow.com/questions/33524669/labeling-outliers-of-boxplots-in-r}).
#'
#' @param dat Either a vector of values (to display in the box plot) or a
#' dataframe containing variables to display in the box plot.
#' @param y If \code{dat} is a dataframe, this is the name of the variable to
#' make the box plot of.
#' @param x If \code{dat} is a dataframe, this is the name of the variable
#' (normally a factor) to place on the X axis. Separate box plots will be
#' generate for each level of this variable.
#' @param labelOutliers Whether or not to label outliers.
#' @param outlierColor If labeling outliers, this is the color to use.
#' @param theme The theme to use for the box plot.
#' @param \dots Any additional arguments will be passed to
#' \code{\link{geom_boxplot}}.
#' @return A \code{\link{ggplot}} plot is returned.
#' @author Jason Aizkalns; implemented in this package (and tweaked a bit) by
#' Gjalt-Jorn Peters.
#'
#' Maintainer: Gjalt-Jorn Peters <gjalt-jorn@@userfriendlyscience.com>
#' @seealso \code{\link{geom_boxplot}}
#' @keywords hplot
#' @examples
#'
#' ### A box plot for miles per gallon in the mtcars dataset:
#' ggBoxplot(mtcars$mpg);
#'
#' ### And separate for each level of 'cyl' (number of cylinder):
#' ggBoxplot(mtcars, y='mpg', x='cyl');
#'
#' @export ggBoxplot
ggBoxplot <- function(dat, y = NULL, x = NULL,
                      labelOutliers = TRUE,
                      outlierColor = 'red',
                      theme = ggplot2::theme_bw(), ...) {
  if (is.null(x) && is.null(y)) {
    if (is.numeric(dat)) {
      if (length(dat) < 5) {
        stop("If both arguments 'x' and 'y' are NULL, the first argument, 'dat, ",
             "should be a vector of values, but it's only ", length(dat),
             " elements long, which isn't enough to generate a boxplot.");      }
      varname <- deparse(substitute(dat));
      ### Take variable only in case a variable in a dataframe was specified
      varname <- extractVarName(varname);
      tmpDf <- data.frame(dat);
      names(tmpDf) <- varname;
      tmpDf$outlier <- ifelse(iqrOutlier(tmpDf[, varname]),
                              1:nrow(tmpDf),
                              as.numeric(NA));
      resPlot <- ggplot2::ggplot(tmpDf,
                                 ggplot2::aes_string(y=varname)) +
        ggplot2::geom_boxplot(ggplot2::aes(x=factor(varname)), ...) +
        ggplot2::xlab("") + theme +
        ggplot2::theme(axis.text.x = ggplot2::element_blank(),
                       axis.ticks.x = ggplot2::element_blank());
      if (labelOutliers) {
        resPlot <- resPlot +
        ggrepel::geom_text_repel(ggplot2::aes_string(x='1',
                                                     label = 'outlier'),
                                 na.rm = TRUE,
                                 color = outlierColor);
      }
      return(resPlot);
    } else {
      stop("If both arguments 'x' and 'y' are NULL, the first argument, 'dat, ",
           "should be a vector, but instead, it has class '", class(dat),
           "'.");
    }
  } else {

    if (is.null(y)) {
      stop("Argument 'y' should be a text string specifying a variable in the ",
           "dataframe specified by 'dat'; instead, no 'y' is specified.");
    }

    if (length(y) > 1) {
      warning("If argument 'y' is specified, it should be the name of a ",
              "variable in the dataframe specified by argument 'dat'. However, ",
              "'y' had ", length(y), "elements. Discarding all but the last one.");
    }

    if (!(y %in% names(dat))) {
      stop("Argument 'y' should be a text string specifying a variable in the ",
           "dataframe specified by 'dat', but '", y, "' isn't among ",
           "names(dat). Please check your spelling, and remember that R is ",
           "case sensitive: it matters whether you use capitals or not!");
    }

    if (is.null(x)) {

      dat$outlier <- ifelse(iqrOutlier(dat[, y]),
                            1:nrow(dat),
                            as.numeric(NA));

      resPlot <-
        ggplot2::ggplot(dat, ggplot2::aes_string(y=y)) +
        ggplot2::geom_boxplot(ggplot2::aes(x=factor(y)), ...) +
        ggplot2::xlab("") + theme +
        ggplot2::theme(axis.text.x = ggplot2::element_blank(),
                       axis.ticks.x = ggplot2::element_blank());
      if (labelOutliers) {
        resPlot <- resPlot +
          ggrepel::geom_text_repel(ggplot2::aes_string(x='1',
                                                       label = 'outlier'),
                                   na.rm = TRUE,
                                   color = outlierColor);
      }
      return(resPlot);

    } else {

      if (length(x) > 1) {
        warning("If argument 'x' is specified, it should be the name of a ",
                "variable (a factor, normally) in the dataframe",
                " specified by argument 'dat'. However, ",
                "'x' had ", length(x), "elements. Discarding all but the last one.");
      }


      if (!(x %in% names(dat))) {
        stop("Argument 'x' should be a text string specifying a variable in the ",
             "dataframe specified by 'dat', but '", x, "' isn't among ",
             "names(dat). Please check your spelling, and remember that R is ",
             "case sensitive: it matters whether you use capitals or not!");
      } else if (!is.factor(dat[, x])) {
        dat[, x] <- factor(dat[, x]);
      }

      ### Based on JasonAizkalns' answer at
      ### https://stackoverflow.com/questions/33524669/labeling-outliers-of-boxplots-in-r

      dat <-
        plyr::ddply(dat, x,
                    function(datF) {
                      datF$outlier <- iqrOutlier(datF[, y]);
                      return(datF);
                    });



      dat$outlier <- ifelse(dat$outlier,
                            1:nrow(dat),
                            as.numeric(NA));

      resPlot <- ggplot2::ggplot(dat, ggplot2::aes_string(y=y, x=x)) +
        ggplot2::geom_boxplot(...) +
        theme;
      if (labelOutliers) {
        resPlot <- resPlot +
          ggrepel::geom_text_repel(ggplot2::aes_string(label = 'outlier'),
                                   na.rm = TRUE,
                                   color=outlierColor);
      }
      return(resPlot);

    }
  }
}

Try the ufs package in your browser

Any scripts or data that you put into this service are public.

ufs documentation built on July 9, 2023, 6:07 p.m.