R/wrap.hist.R

Defines functions wrap.hist

Documented in wrap.hist

#' Histogram plots
#'
#' @description Creates histogram plot for numerical dependent variables and prints
#' descriptive statistics in a summary table. The function delegates the primary
#' computations to \code{\link[ggplot2]{ggplot}}.
#'
#' @param dv1 Column vector containing the dependent variable
#' @param likert A logical argument: If \code{FALSE}, the function assumes a
#' continuous dependent variable; if \code{TRUE}, the function assumes a discrete
#' dependent variable that takes on only integer values
#' @param percent A logical argument: if \code{TRUE}, the plot displays percentages
#' along the y axis; if \code{FALSE}, the plot displays counts
#' @param binwidth Numeric argument representing bin width
#' @param xlim Numeric vector containing x-axis limits
#' @param xmajor Numeric argument representing spacing of x-axis labels
#' @param xlab String containing the x-axis label
#' @param ylim Numeric vector containing y-axis limits
#' @param ymajor Numeric argument representing spacing of y-axis limits
#' @param ylab String containing the y-axis label
#' @param title String containing plot title
#' @param size.axis.text.x,size.axis.text.y,size.title Numeric arguments
#' containing font sizes
#'
#' @seealso \code{\link[ggplot2]{ggplot}}
#'
#' @examples
#' ## Histogram for a continuous dependent variable
#' wrap.hist(dv1 = bdata$DV8, likert = FALSE)
#'
#' ## Histogram for a discrete dependent variable
#' wrap.hist(dv1 = bdata$DV1, likert = TRUE)
#'
#' @import stringr ggplot2
#' @importFrom scales percent_format
#' @export
wrap.hist <- function(dv1,likert=F,percent=T,binwidth=1,xlim=NULL,
                      xmajor=NULL,xlab=NULL,ylim=NULL,ymajor=NULL,
                      ylab=NULL,title=NULL,size.axis.text.y = 12,
                      size.axis.text.x=12,size.title=24) {

  # Error checks
  if(is.numeric(dv1)==F) {return("Error: dv1 must be numeric.")}
  if(likert==T&binwidth!=1) {return("Error: Must input binwidth=1 for Likert scale DVs.")}

  if(is.null(title)==T & grepl("\\$",toString(substitute(dv1)))==T) {title <- toString(substring(deparse(substitute(as.numeric(dv1))),str_locate_all(pattern=coll('$'),deparse(substitute(as.numeric(dv1))))[[1]][1]+1,nchar(deparse(substitute(as.numeric(dv1))))-1))}
  if(is.null(title)==T & grepl("\\[",toString(substitute(dv1)))==T) {df_temp <- get(substr(deparse(substitute(dv1)),1,which(strsplit(deparse(substitute(dv1)), "")[[1]]=="[")-1),envir = .GlobalEnv); title <- names(df_temp)[substitute(dv1)[[3]]]}
  dv1 <- dv1[!is.na(dv1)]
  df <- data.frame(dv1)
  if(is.null(binwidth)==F&is.null(xmajor)==T) {xmajor <- binwidth}

  if(is.null(xlim)==T&is.null(binwidth)==T) {xlim=c(min(dv1,na.rm=T),max(dv1,na.rm=T))}

  # Ensure that ggplot displays the uppermost bin in full
  if(is.null(xlim)==T&is.null(binwidth)==F) {
    xlim1 <- min(dv1,na.rm=T)
    xlim2 <- max(dv1,na.rm=T)
    xlim2 <- ceiling((xlim2-xlim1)/binwidth)*binwidth+xlim1
    xlim <- c(xlim1,xlim2)
  }

  max <- 0; for (i in 1:nlevels(as.factor(dv1))) {temp <- sum(as.factor(dv1)==levels(as.factor(dv1))[i],na.rm=T); if(temp>max) {max <- temp}}

  if(is.null(ylim)==T&percent==T) {ylim=c(0,1)}
  if(percent==T&is.null(ymajor)==T) {ymajor=.1}
  if(percent==F&is.null(ymajor)==T) {ymajor=ceiling(length(dv1)/((xlim[2]-xlim[1])/binwidth)/5)}
  if(likert==T) {offset=1/2}
  if(likert==F) {offset=0}

  # display percentages on y axis
  if(percent==T) {
    plot <- ggplot(data=df,aes(dv1))+labs(y=ylab,x=xlab,title=title)+geom_histogram(aes(y = (..count..)/sum(..count..)),binwidth=binwidth,breaks=seq(xlim[1]-offset,xlim[2]+offset,binwidth),col="black",fill="gray80")+scale_x_continuous(breaks=seq(xlim[1],xlim[2],xmajor),expand=c(0,0))+scale_y_continuous(expand=c(0,0),labels=percent_format(accuracy=1),breaks=seq(0,1,ymajor))+expand_limits(y=ylim)+theme(plot.title=element_text(face="bold",color="black",size=size.title))+theme(plot.title = element_text(color="black",hjust = 0.5))+theme(plot.background = element_rect(fill = "white", colour = "white"))+ theme(panel.grid.major.y = element_line(colour="black", size=0.5),panel.grid.major.x=element_blank(),panel.grid.minor.y=element_blank(),panel.grid.minor.x=element_blank())+theme(axis.text.y=element_text(color="black",size=size.axis.text.y))+theme(axis.text.x=element_text(color="black",size=size.axis.text.x))+ theme(strip.background = element_rect(fill="white"))+theme(strip.text.x = element_text(size = 12,face="bold"))+theme(panel.background = element_rect(colour = "black", fill = "white", size = 0))
  }

  # display counts on y axis
  if(percent==F) {
    if(is.null(ylim)==T) {
      ylim <- c(0,max(ggplot_build(ggplot(data=df,aes(dv1))+labs(y=ylab,x=xlab,title=title)+geom_histogram(aes(y = (..count..)),binwidth=binwidth,breaks=seq(xlim[1]-offset,xlim[2]+offset,binwidth),col="black",fill="gray80")+scale_x_continuous(breaks=seq(xlim[1],xlim[2],xmajor),expand=c(0,0))+scale_y_continuous(expand=c(0,0),breaks=seq(0,0,ymajor))+expand_limits(y=ylim)+theme(plot.title=element_text(face="bold",color="black",size=size.title))+theme(plot.title = element_text(color="black",hjust = 0.5))+theme(plot.background = element_rect(fill = "white", colour = "white"))+ theme(panel.grid.major.y = element_line(colour="black", size=0.5),panel.grid.major.x=element_blank(),panel.grid.minor.y=element_blank(),panel.grid.minor.x=element_blank())+theme(axis.text.y=element_text(color="black"))+theme(axis.text.x=element_text(color="black"))+ theme(strip.background = element_rect(fill="white"))+theme(strip.text.x = element_text(size = 12,face="bold"))+theme(panel.background = element_rect(colour = "black", fill = "white", size = 0)))$data[[1]]$ymax))
    }
    plot <- ggplot(data=df,aes(dv1))+labs(y=ylab,x=xlab,title=title)+geom_histogram(aes(y = (..count..)),binwidth=binwidth,breaks=seq(xlim[1]-offset,xlim[2]+offset,binwidth),col="black",fill="gray80")+scale_x_continuous(breaks=seq(xlim[1],xlim[2],xmajor),expand=c(0,0))+scale_y_continuous(expand=c(0,0),breaks=seq(ylim[1],ylim[2],ymajor))+expand_limits(y=ylim)+theme(plot.title=element_text(face="bold",color="black",size=size.title))+theme(plot.title = element_text(color="black",hjust = 0.5))+theme(plot.background = element_rect(fill = "white", colour = "white"))+ theme(panel.grid.major.y = element_line(colour="black", size=0.5),panel.grid.major.x=element_blank(),panel.grid.minor.y=element_blank(),panel.grid.minor.x=element_blank())+theme(axis.text.y=element_text(color="black",size=size.axis.text.y))+theme(axis.text.x=element_text(color="black",size=size.axis.text.x))+ theme(strip.background = element_rect(fill="white"))+theme(strip.text.x = element_text(size = 12,face="bold"))+theme(panel.background = element_rect(colour = "black", fill = "white", size = 0))
  }

  summary <- ggplot_build(plot)$data[[1]][c(1,2,4,5)]
  colnames(summary) <- c("Percentage","Count","Lower Bound","Upper Bound")
  summary$Percentage <- paste(round(summary$Percentage*100,2),"%",sep="")
  summary <- summary[c(3,4,1,2)]
  print(summary,row.names=F)
  if(sum(summary$Count)<length(dv1)) {print("WARNING: The total number of data points displayed in the histogram is less than the length of the dv1 vector, suggesting that one or more data points are not currently being displayed. Consider expanding x-axis limits.")}
  return(plot)
}
michaelkardas/behavioralwrappers documentation built on Jan. 2, 2020, 7:46 a.m.