R/bars.R

Defines functions bars

Documented in bars

#' Grouped Bar Plots with Error Bars
#'
#' @description Grouped bar plots with error bars
#'
#' @param formula A two-sided formula specifying the response variable and the grouping factors
#' @param data An optional data frame containing the variables
#' @param heightFun The function used to calculate the bar height for a group (default=mean)
#' @param errorFun The function used to calculate the error bar for a group (default=ciMean). No bars drawn if \code{errorFun=FALSE}
#' @param yLabel The y-axis label (defaults to the name of the response variable)
#' @param xLabels The x-axis bar labels (defaults to factor labels of the appropriate grouping variable)
#' @param main The plot title
#' @param ylim The y-axis limit: lower bound defaults to 0, default upper bound estimated
#' @param barFillColour The colours to fill the bars (defaults to a rainbow palette with saturation .3)
#' @param barLineWidth The width of the bar border lines (default=2)
#' @param barLineColour The colour of the bar border lines (default="black")
#' @param barSpaceSmall The size of the gap between bars within a cluster, as a proportion of bar width (default=.2)
#' @param barSpaceBig The size of the gap separating clusters of bars, as a proportion of bar width (default=1)
#' @param legendLabels The text for the legend (defaults to factor labels of the appropriate grouping variable). No legends drawn if \code{legendLabels=FALSE} or if only one grouping variable is specified
#' @param legendDownShift How far below the top is the legend, as proportion of plot height? (default=0)
#' @param legendLeftShift How far away from the right edge is the legend, as proportion of plot? (default=0)
#' @param errorBarLineWidth The line width for the error bars (default=1)
#' @param errorBarLineColour The colour of the error bars (default="grey40")
#' @param errorBarWhiskerWidth The width of error bar whiskers, as proportion of bar width (default=.2)
#'
#' @details Plots group means (or other function, if specified) broken down by
#' one or two grouping factors. Confidence intervals (or other function) are
#' plotted. User specifies a two sided formula of the form
#' \code{response ~ group1 + group2}, where \code{response} must be numeric
#' and \code{group1} and \code{group2} are factors. The \code{group1} variable
#' defines the primary separation on the x-axis, and the x-axis labels by
#' default print out the levels of this factor. The \code{group2} variable
#' defines the finer grain separation, and the legend labels correspond to
#' the levels of this factor. Note that \code{group2} is optional.
#'
#' @return Invisibly returns a data frame containing the factor levels, group
#' means and confidence intervals. Note that this function is usually called
#' for its side effects.
#'
#' @export
bars <- function(
  formula, # two-sided formula specifying the response variable and the grouping factors
  data=NULL, # optional data frame containing the variables
  heightFun=mean, # function used to calculate the bar height for a group
  errorFun=ciMean, # function used to calculate the error bar for a group
  yLabel=NULL, # y-axis label: defaults to the name of the response varialbe
  xLabels=NULL, # x-axis bar labels: defaults to the levels of the 1st factor
  main="", # plot title
  ylim=NULL, # y-axis limit: lower bound defaults to 0, default upper bound estimated
  barFillColour=NULL, # colours to fill the bars (defaults to ???)
  barLineWidth=2, # width of the bar border lines
  barLineColour="black", # colour of the bar border lines
  barSpaceSmall=.2, # size of the gap between bars within a cluster (as a proportion of bar width)
  barSpaceBig=1, # size of the gap separating clusters of bars (as a proportion of bar width)
  legendLabels=NULL, # text for the legend (defaults to the levels of the 2nd factor, if present)
  legendDownShift=0, # how far down is the legend? (as proportion of plot)
  legendLeftShift=0, # how far left is the legend? (as proportion of plot)
  errorBarLineWidth=1, # line width for the error bars
  errorBarLineColour="grey40", # colour of the error bars
  errorBarWhiskerWidth=.2 # width of error bar whiskers (as proportion of bar width)
  ){

  ####### handy functions for later #######

  # function to add a confidence interval
  addOneErrorBar <- function( x, y, ci, wwd, lwd, col  ) {
    graphics::lines( c(x,x),ci, col=col, lwd=lwd )
    graphics::lines( c(x-wwd/2,x+wwd/2), c(ci[2],ci[2]), col=col, lwd=lwd )
    graphics::lines( c(x-wwd/2,x+wwd/2), c(ci[1],ci[1]), col=col, lwd=lwd )
  }

  # function to get a factor level
  factorLevels <- function( j ) {
    levels(data[,gpName[j]])
  }

  # function checking that an input is numeric and of a specific length
  isNumbers <- function( x, n=NULL ) {
    out <- class(x) %in% c("numeric","integer")
    if( !is.null(n)) out <- out && length(x)==n
    return(out)
  }

#   TODO: support transparency (ie. 8-digit hex), check if I've missed anything
#   # function checking that a vector of inputs specify colours
#   isColours <- function (x) {
#     isOK <- rep.int(FALSE,length(x))
#     if( is(x,"character")) {
#       pattern <- paste0( "^#", paste(rep.int("[0-9A-Fa-f]",6),collapse=""), "$" )
#       isOK[ grep(pattern,x) ] <- TRUE
#       isOK <- isOK | (x %in% colours())
#     }
#     if( isNumbers(x) ){
#       isOK <- TRUE # numbers are rounded and plugged into colours()
#     }
#     return(all(isOK))
#   }

  ####### setup and input checking #######

  # CHECK that those inputs that should be exactly one number are in fact one number
  if( !isNumbers( barLineWidth, 1) ) stop( "'barLineWidth' must be a single number")
  if( !isNumbers( barSpaceSmall, 1) ) stop( "'barSpaceSmall' must be a single number")
  if( !isNumbers( barSpaceBig, 1) ) stop( "'barSpaceBig' must be a single number")
  if( !isNumbers( legendDownShift, 1) ) stop( "'legendDownShift' must be a single number")
  if( !isNumbers( legendLeftShift, 1) ) stop( "'legendLeftShift' must be a single number")
  if( !isNumbers( errorBarLineWidth, 1) ) stop( "'errorBarLineWidth' must be a single number")
  if( !isNumbers( errorBarWhiskerWidth, 1) ) stop( "'errorBarWhiskerWidth' must be a single number")

#  (currently doesn't work properly, so the check is removed)
#  # CHECK that the colour inputs are vectors of colours, but no more
#  if( !is.null(barFillColour) && !isColours( barFillColour )) stop( "'barFillColour' must specify colours" )
#  if( !isColours( barLineColour )) stop( "'barLineColour' must specify colours" )
#  if( !isColours( errorBarLineColour )) stop( "'errorBarLineColour' must specify colours" )

  # CHECK that titles & labels can be coerced to character
  if( methods::is( try( as.character(main), silent=TRUE), "try-error" ))
    stop( "'main' cannot be coerced to character" )
  if( !is.null(yLabel) && methods::is( try( as.character(yLabel), silent=TRUE), "try-error" ))
    stop( "'yLabel' cannot be coerced to character" )
  if( !is.null(xLabels) && methods::is( try( as.character(xLabels), silent=TRUE), "try-error" ))
    stop( "'xLabels' cannot be coerced to character" )
  if( !is.null(legendLabels) && methods::is( try( as.character(legendLabels), silent=TRUE), "try-error" ))
    stop( "'legendLabels' cannot be coerced to character" )

  # WARN if the title or y-axis label lengths don't make sense
  if( length( main ) !=1 ) warning( "'main' does not usually have more than one element" )
  if(  !is.null(yLabel) && length( yLabel ) !=1 ) warning( "'yLabel' does not usually have more than one element" )

  # CHECK that ylim is numeric of length 2
  if( !is.null(ylim) && !isNumbers( ylim, 2)) stop( "'ylim' must be a numeric vector with two elements")

  # CHECK that formula is a two sided formula
  if( !methods::is(formula,"formula")) stop( "'formula' input must be a formula" )
  if( length(formula) !=3 ) stop( "'formula' must be two sided")

  # get the variable names
  responseName <- all.vars(formula[[2]]) # string containing name of the response variable
  gpName <- all.vars(formula[[3]]) # vector of strings naming the grouping variables
  nFactors <- length( gpName ) # how many grouping variables were input

  # CHECK variable numbers in the formula: one reponse, one or two groups
  if( length( responseName ) != 1) stop( "'formula' must have one response variable")
  if( nFactors == 0) stop( "'formula' must have at least one grouping variable")
  if( nFactors > 2) stop( "'formula' cannot have more than two grouping variables")

  # CHECK data frame:
  if( !is.null(data) ) {

    # if it's specified, it needs to be data frame, because a matrix can't
    # contain both factors and numeric variables
    if( !methods::is(data,"data.frame") ) stop ( "'data' is not a data frame")

  } else {

    # copy variables into a data frame if none is specified, and
    # CHECK that the variables are appropriate for a data frame
    data <- try( eval( stats::model.frame( formula = formula ),
                       envir=parent.frame() ), silent=TRUE)
    if( methods::is(data,"try-error") ) {
      stop( "cannot create data frame from variables in 'formula'.
            Possible reason: variables are not all the same length")
    }
  }

  # CHECK that all grouping variables are factors
  if( !all(sapply( gpName, function(x){methods::is(data[,x],"factor")} )) ) {
    stop( 'grouping variables must be factors')
  }

  # CHECK that the response is numeric or integer
  if( !isNumbers( data[,responseName]) ) {
    stop( 'response variable must be numeric')
  }

  # CHECK that heightFun is a function that takes arguments
  if( !methods::is(heightFun,"function")) stop( "'heightFun' must be a function")
  if( length(formals(heightFun))==0) stop( "'heightFun' must accept inputs")

  # CHECK that errorFun is a function, or else equals FALSE
  if( !methods::is(errorFun,"function") ) { # if it's not a function
    if( length(errorFun) != 1 || errorFun != FALSE ) { # and it's not FALSE
      stop( "'errorFun' must be a function, or FALSE") # complain
    }
  } else { # okay it's a function, CHECK that it takes arguments:
    if( length(formals(errorFun))==0) stop( "'errorFun' must accept inputs")
  }

  # get number of levels for each grouping factor
  nLevels <- sapply( gpName, function(x){nlevels(data[,x])})

  # CHECK more label lengths, now that we know the number of levels
  if( !is.null(xLabels) && length(xLabels) != nLevels[1] )
    stop( paste0( "'xLabels' has ", length(xLabels), " elements, but '",
                    gpName[1], "' has ", nLevels[1], " levels" ))
  if( nFactors==2 && !is.null(legendLabels) && length(legendLabels) != nLevels[2] )
    stop( paste0( "'legendLabels' has ", length(legendLabels), " elements, but '",
                  gpName[2], "' has ", nLevels[2], " levels" ))

  # see if error bars and/or legend are to be plotted
  showErrorBars <- methods::is(errorFun,"function")
  showLegend <- is.null(legendLabels) || length(legendLabels) > 1 || legendLabels != FALSE
  showLegend <- showLegend & nFactors==2 # currently no legends for the one-factor case

  # REMOVE missing data and CHECK cell counts
  completeCases <- !apply( is.na( data[,c(gpName,responseName)] ), 1, any )
  data <- data[ completeCases, ]
  if( any(!completeCases)) {
    warning( paste( sum(!completeCases), "cases removed due to missing data" ))
  }
  counts <- table(data[,gpName])
  if( min(counts) < 2 ) stop( "at least 2 complete cases are needed for each group" )


  ####### compute bar heights and error bar locations #######

  # compute heights
  barHeights <- stats::aggregate( formula, data, heightFun )

  # CHECK that heightFun produced one number as output. Multiple outputs would
  # mean that barHeights[[nFactors+1]] would be a matrix/data frame. So we
  # check to see if it's a numeric vector instead
  if( !isNumbers( barHeights[[nFactors+1]] )) {
    stop( "'heightFun' must output a single number" )
  }

  # compute error bars if requested
  if( showErrorBars ) {
    errorBars <- stats::aggregate( formula, data, errorFun )

    # CHECK that errorFun produced two numbers as output
    dims <- dim(errorBars[[nFactors+1]])
    if( is.null(dims) || length(dims) !=2 || dims[2] !=2 ||
        !isNumbers(errorBars[[nFactors+1]][,1]) ||
        !isNumbers(errorBars[[nFactors+1]][,2]) ) {
      stop( "'errorFun' must output two numbers" )
    }
  }


  ####### compute default values where needed #######

  # customising the axes & title
  if( is.null( yLabel )) yLabel <- responseName
  if( is.null( xLabels )) xLabels <- factorLevels(1)
  if( is.null( ylim )) {
    ylim <- c(0, max(barHeights[[nFactors+1]])*1.2)
    if( showErrorBars ) ylim <- c(0, max( ylim[2], errorBars[[nFactors+1]]*1.1))
  }

  # customising the style of the bars
  if (is.null( barFillColour )) {
    if( nFactors==1 ) barFillColour <-grDevices::rainbow(nLevels[1], s=.3)
    if( nFactors==2 ) barFillColour <- grDevices::rainbow(nLevels[2], s=.3)
  }

  # legend
  if( is.null( legendLabels )) legendLabels <- factorLevels(nFactors)


  ####### bar plot #######

  old.lwd <- graphics::par()$lwd # store old graphics parameters
  graphics::par( lwd=barLineWidth ) # set new graphics parameters

  if( nFactors==2 ) { # format height data for the two-factor case
    h <- matrix( barHeights[[3]], nrow=nLevels[2],
                 ncol=nLevels[1], byrow=TRUE )
    b <- TRUE

  } else { # format height data for the one-factor case
    h <- barHeights[[2]]
    b <- FALSE
  }

  # barplot
  xloc <- graphics::barplot( height=h, beside=b, legend.text=FALSE, width=1,
                   ylab=yLabel, space=c(barSpaceSmall,barSpaceBig),
                   ylim=ylim, col=barFillColour, border=barLineColour,
                   main=main, font.main=1, names.arg=xLabels )

  ####### legend #######

  if( showLegend ){

    graphics::legend( x="topright", # default location
            inset=c(legendLeftShift,legendDownShift), # user specified shift
            xjust=0,yjust=0, # location refers to top right of legend area
            legend=legendLabels, # labels
            fill=barFillColour, # colours
            bty="n", # no box
            xpd=TRUE # allow legend to move outside plot region
          )  # barLineColour ???
  }


  ####### error bars #######

  if( showErrorBars ) {

    # match the row order of errorBars to the plot order!
    if(nFactors==2) { # two-factor case is awkward
      k <- 0
      ind <- vector( length=dim(errorBars)[1])
      for( i in 1:nLevels[1]) {
        for( j in 1:nLevels[2]) {
          k <- k+1
          ind[k] <- which( errorBars[,1]==factorLevels(1)[i] &
                           errorBars[,2]==factorLevels(2)[j] )
        }
      }
    } else { # one-factor case is easy
      ind <- 1:nLevels[1]
    }

    # recycle colours
    tmp <- as.numeric( gl( length(errorBarLineColour),1, length(xloc) ))
    errorBarLineColour <- errorBarLineColour[tmp]

    # draw the error bars
    for( k in 1:dim(errorBars)[1]) {
      addOneErrorBar( xloc[k],barHeights[[nFactors+1]][i],
                      errorBars[[nFactors+1]][ind[k],], errorBarWhiskerWidth,
                      errorBarLineWidth, errorBarLineColour[k] )
    }


  }


  ####### clean up #######

  # reset graphics
  #par( old.par )
  graphics::par( lwd=old.lwd )


  # concatenate for output
  out <- cbind( barHeights[,1:nFactors,drop=FALSE],
                barHeights[,nFactors+1,drop=FALSE] )
  if( showErrorBars ) {
    out <- cbind( out, errorBars[[nFactors+1]])
  }

  return(invisible(out))

}

Try the lsr package in your browser

Any scripts or data that you put into this service are public.

lsr documentation built on Dec. 11, 2021, 9:10 a.m.