R/box_mean_plot.R

Defines functions box_mean_plot

Documented in box_mean_plot

#' Box plots of grouped data with optional mean value
#'
#' @description
#' Creates a standard box plot of grouped data (line: median, box: 25 to 75 percentiles, whiskers: 1.5*IQR (interquartile range)).
#'
#' @details
#' To include the mean value for each group, set `means` argument to `TRUE`. To change between arithmetic or geometric
#' mean, set the `mean.type` argument to "arithmetic" or "geometric".
#'
#' The order of groups shown in the x axis and color groups defaults to alphabetical.
#' To change it, supply `x.order` or `group.order` arguments.
#' These should be character vectors with the desired order of each factor.
#' Do not include categories that don't exist in the supplied `data.frame`.
#' To bring just one category first, supply `x.first` or `group.first` arguments.
#' If `x.order` is supplied, `x.first` will be ignored. The same holds for `group.order` and `group.first`.
#'
#' Adjust other supplied arguments to customize the plot aesthetically.
#'
#' @param d `data.frame` with data to be plotted.
#' @param x Character. The name of the column to be used for the x axis (categorical data).
#' If `NULL`, `color.group` needs to be supplied. Defaults to `NULL`.
#' @param y Character. The name of the column to be used for the y axis (numeric data).
#' @param color.group Character. The name of the column to be used for color grouping (categorical data).
#' If `NULL`,`x` needs to be supplied. Defaults to `NULL`.
#' @param x.axis Character. Sets the title of the x axis. If `NULL`, x axis title is given the value of `x`.
#' Defaults to `NULL`.
#' @param y.axis Character. Sets the title of the y axis. If `NULL`, y axis title is given the value of `y`.
#' Defaults to `NULL`.
#' @param legend.title Character. Sets the title of the legend. If `NULL`, legend title is given the value of `color.group`.
#' Defaults to `NULL`.
#' @param x.order Character vector of length equal to the number of x categories.
#' Sets the order of categories in the x axis. If `NULL`, x categories are ordered alphabetically. Defaults to `NULL`.
#' @param group.order Character vector of length equal to the number of color grouping categories.
#' Sets the order of color groups. If `NULL`, color groups are ordered alphabetically. Defaults to `NULL`.
#' @param x.first Character. Places a specific x axis category first. Ignored if `x.order` is supplied.
#' Defaults to `NULL`.
#' @param group.first Character. Places a specific color group category first.
#' Ignored if `group.order` is supplied. Defaults to `NULL`.
#' @param means Logical. Sets whether or not to plot group means. Defaults to `FALSE`.
#' @param boxwidth Numeric. Sets the width of the bars. Defaults to 0.7.
#' @param whisker.width Numeric. Sets the width of the error bar whiskers. Defaults to 1.
#' @param mean.size Numeric. Sets the size of group mean points. Defaults to 1.
#' @param points Logical. Sets whether or not to plot individual data points. Defaults to `TRUE`.
#' Note that, even if set to `FALSE`, outliers will still be shown as individual data points, as is the norm in box plots.
#' @param jitterwidth Numeric. The horizontal dispersion of individual data points. Defaults to 1.
#' @param pointsize Numeric. Sets the size of individual data points. Defaults to 1.
#' @param mean.type Character. Sets from arithmetic to geometric mean. Defaults to "arithmetic".
#'
#' @return A plot based on `ggplot2`.
#'
#' @import ggplot2
#' @import ggthemes
#'
#' @export
#'
box_mean_plot <- function(d,x=NULL,y,color.group=NULL,x.axis=NULL,y.axis=NULL,legend.title=NULL,
                          x.order=NULL,group.order=NULL,x.first=NULL,group.first=NULL,
                          means=FALSE,boxwidth=0.7,whisker.width=1,mean.size=1,points=TRUE,jitterwidth=1,pointsize=1,
                          mean.type="arithmetic") {

  if(!is.data.frame(d)) {
    stop("Object supplied as d is not a data.frame")
  }

  if(!(y %in% names(d))) {
    stop(paste("Supplied",y,"field does not exist in d."))
  }

  if(!(class(d[[y]]) %in% c("numeric","integer"))) {
    stop(paste("Supplied",y,"is not a numeric field in d."))
  }

  if(is.null(x) & is.null(color.group)) {
    stop("Supplied x and color.group are both NULL. Please specify at least one of them.")
  }

  if(!is.null(x)) {
    if(!(x %in% names(d))) {
      stop(paste("Supplied",x,"field does not exist in d."))
    } else {
      if(!is.null(x.order)) {
        if(FALSE %in% (x.order %in% d[[x]])) {
          stop(paste("Categories in supplied x.order don't exist in",x,"field"))
        } else {
          d[[x]] <- factor(d[[x]],levels=x.order)
        }
      } else {
        d[[x]] <- factor(d[[x]])
      }
    }
  }

  if(!is.null(color.group)) {
    if(!(color.group %in% names(d))) {
      stop(paste("Supplied",color.group,"field does not exist in d."))
    } else {
      if(!is.null(group.order)) {
        if(FALSE %in% (group.order %in% d[[color.group]])) {
          stop(paste("Categories in supplied group.order don't exist in",color.group,"field"))
        } else {
          d[[color.group]] <- factor(d[[color.group]],levels=group.order)
        }
      } else {
        d[[color.group]] <- factor(d[[color.group]])
      }
    }
  }

  if(!is.null(x.first)) {
    if(!(x.first %in% d[[x]])) {
      stop(paste(x.first,"does not exist in",x,"field"))
    } else if(!is.null(x.order)) {
      warning("x.order was supplied. x.first will be ignored.")
    } else {
      d[[x]] <- relevel(d[[x]],x.first)
    }
  }

  if(!is.null(group.first)) {
    if(!(group.first %in% d[[color.group]])) {
      stop(paste(group.first,"does not exist in",color.group,"field"))
    } else if(!is.null(group.order)) {
      warning("group.order was supplied. group.first will be ignored.")
    } else {
      d[[color.group]] <- relevel(d[[color.group]],group.first)
    }
  }

  if(!(mean.type %in% c("arithmetic","geometric"))) {
    stop("Supplied mean.type is not 'arithmetic' or 'geometric'.")
  }

  show.outliers <- ifelse(points==TRUE,NA,1)

  if(is.null(x)) {
    mapping <- aes(x="constant",y=!!rlang::sym(y),fill=!!rlang::sym(color.group))

    box.position <- position_dodge(width=boxwidth)

    box.data <- geom_boxplot(position=box.position,
                             width=boxwidth,
                             colour="black",
                             outlier.shape=show.outliers,
                             outlier.size=1.5*pointsize)

    point.position <- position_jitterdodge(jitter.width=jitterwidth*0.5*boxwidth,dodge.width=boxwidth)

    aesthetics <- list(scale_fill_grey(limits=levels(d[[color.group]])),
                       theme(axis.title.x=element_blank(),axis.text.x=element_blank()),
                       labs(fill=ifelse(is.null(legend.title),color.group,legend.title)))

  } else if(is.null(color.group)) {
    mapping <- aes(x=!!rlang::sym(x),y=!!rlang::sym(y))

    box.position <- position_identity()

    box.data <- geom_boxplot(position=box.position,
                             width=boxwidth,
                             colour="black",fill="grey20",
                             outlier.shape=show.outliers,
                             outlier.size=1.5*pointsize)

    point.position <- position_jitter(width=jitterwidth*0.5*boxwidth)

    aesthetics <- xlab(ifelse(is.null(x.axis),x,x.axis))

  } else {
    mapping <- aes(x=!!rlang::sym(x),y=!!rlang::sym(y),fill=!!rlang::sym(color.group))

    box.position <- position_dodge(width=boxwidth)

    box.data <- geom_boxplot(position=box.position,
                             width=boxwidth,
                             colour="black",
                             outlier.shape=show.outliers,
                             outlier.size=1.5*pointsize)

    point.position <- position_jitterdodge(jitter.width=jitterwidth*0.5*boxwidth,dodge.width=boxwidth)

    aesthetics <- list(scale_fill_grey(limits=levels(d[[color.group]])),
                       xlab(ifelse(is.null(x.axis),x,x.axis)),
                       labs(fill=ifelse(is.null(legend.title),color.group,legend.title)))
  }


  if(points==TRUE) {
    point.data <- geom_point(size=1.5*pointsize,
                             position=point.position,
                             show.legend=FALSE,
                             shape=1)
  } else {
    point.data <- NULL
  }



  #-----------------------------CALCULATING MEANS AND MAXIMUM PLOTTED VALUES-------------------------------

  if(means==TRUE) {
    if(mean.type=="arithmetic") {
      # Arithmetic means
      mean.data <- stat_summary(fun=mean,
                                geom="point",
                                position=box.position,
                                shape=4,stroke=1.5*mean.size,colour="black")

    } else if(mean.type=="geometric") {
      # Geometric means
      mean.data <- stat_summary(fun=function(x) {log(x) |> mean(na.rm=TRUE) |> exp()},
                                geom="point",
                                position=box.position,
                                shape=4,stroke=1.5*mean.size,colour="black")

    } else {
      stop("Supplied mean.type is not 'arithmetic' or 'geometric'.")
    }
  } else {
    mean.data <- NULL
  }



  #-----------------------------------------START PLOTTING-----------------------------------------

  plot <- ggplot(data=d,mapping=mapping) +

    #use a theme for formatting
    theme_classic() +
    theme(axis.ticks.x=element_blank()) +

    #Stick the bars to the x axis and expand the y axis to +5% of the maximum data
    scale_y_continuous(expand=expansion(mult=c(0,0.075)),limits=c(0,NA)) +

    #add the axis titles
    ylab(ifelse(is.null(y.axis),y,y.axis)) +

    stat_boxplot(geom="errorbar",
                 width=whisker.width*boxwidth*0.2,
                 position=box.position) +

    box.data +

    point.data +

    mean.data +

    aesthetics

  return(plot)

}
dimitriskokoretsis/datavis documentation built on Oct. 14, 2022, 3:35 p.m.