R/gg_prop_plot.R

Defines functions gg_prop_plot

Documented in gg_prop_plot

#' Creates a proportions ggplot of one or two variables.
#'
#' @description The function uses a data set and a variable (or two variables) to generate a ggplot with proportions.
#' It uses the "original" data set, i.e., it assumes that no prior `count` or aggregations were performed.
#' It detects if the data has a prior grouping variable. If a `group_by` of one variable is detected, the function will use the grouping variable on the x-axis and the input variable as the fill aesthetics.
#' If no prior grouping variable is detected, the function will use the input variable as the x-axis and the proportions of values as y-axis.
#' The function fails if a more than one variable grouping is detected.
#'
#'
#' @param data The data set
#' @param vari The variable on which proportions are calculated
#' @param title The plot's title
#' @param subtitle The plot's subtitle
#' @param caption The plot's caption
#' @param arrange_desc Should the plot arrange the bars in descending order (relative to proportions)
#' @param add_labels Should labels be added on the bars (geom_label is used)
#'
#' @return A ggplot2 object
#'
#' @examples
#' data <-
#'   tibble(fruit =
#'            c("apple", "orange", "orange", "paer", "mango", "mango", "mango"),
#'          gender =
#'            c("M", "M", "F", "F", "M", "F", "F"),
#'          favorite_color =
#'            c("Blue", "Red", "Green", "Blue", "Red", "Green", "Blue")
#'   )
#'
#' data %>%
#'   gg_prop_plot(fruit)
#'
#' data %>%
#'   gg_prop_plot(fruit, arrange_desc = F)
#'
#' data %>%
#'   group_by(gender) %>%
#'   gg_prop_plot(fruit)
#'
#' data %>%
#'   group_by(favorite_color, gender) %>%
#'   gg_prop_plot(fruit)
#'
#' @importFrom magrittr %>%
#'
#' @export

gg_prop_plot <- function(data,
                         vari,
                         title = NULL,
                         subtitle = NULL,
                         caption = NULL,
                         arrange_desc = T,
                         add_labels = T){

  # See if grouping variables exist (and which are they). Fail if too many groups.
  has_grouping <- dplyr::is.grouped_df(data)
  grouping_variables <- dplyr::group_vars(data)
  if (NROW(grouping_variables) > 1){
    stop("More than one grouping variables detected, but this function works with up to a single grouping variable.")
  }

  # See if a descending order is required
  if (arrange_desc) {
    # reorder according to appearance frequency
    level_order <- levels(forcats::fct_infreq(dplyr::pull(data, {{vari}})))
  } else {
    # just use the regular lexicographic order
    level_order <- levels(factor(dplyr::pull(data, {{vari}})))
  }

  # create the tibble that will be used for plotting
  prep_data <- data %>%
    saridr::prop({{vari}}, leave_n = T) %>%
    dplyr::mutate({{vari}} := factor({{vari}}, levels = level_order))

  # update the caption (use breakline if caption provided otherwise just add n)
  if (is.null(caption)) {
    updated_caption <- paste0("n=", NROW(data))
  } else {
    updated_caption <- paste0("\nn=", NROW(data))
  }

  # plot according to groupings

  if (!has_grouping){
    final_plot <-
      ggplot2::ggplot(prep_data,
                      ggplot2::aes(y = prop, x = {{vari}},
                                   label = paste0(round(prop*100), "% ",
                                                  "(", n4prop,")"))) +
      ggplot2::geom_col(fill = saridr::sarid_colors$light_blue) +
      ggplot2::ylab("Respondent rate [%]") +
      ggplot2::xlab("") +
      ggplot2::scale_y_continuous(label = scales::percent_format(1)) +
      ggplot2::labs(title = title, subtitle = subtitle,
                    caption = updated_caption)
  } else {
    # has a single group
    grouping_variable <- rlang::ensym(grouping_variables)

    final_plot <-
      ggplot2::ggplot(prep_data,
                      ggplot2::aes(y = prop, x = !!grouping_variable,
                                   fill = {{vari}},
                                   label = paste0(round(prop*100), "% ",
                                                  "(", n4prop,")"))) +
      ggplot2::geom_col(position = ggplot2::position_stack()) +
      ggplot2::ylab("Respondent rate [%]") +
      ggplot2::xlab("") +
      ggplot2::scale_y_continuous(label = scales::percent_format(1)) +
      ggplot2::labs(title = title, subtitle = subtitle,
                    caption = updated_caption)

  }

  # check if the function also needs to plot labels
  if (add_labels) {
    final_plot <- final_plot + ggplot2::geom_label(position = ggplot2::position_stack(),
                                                   show.legend = F)
  }

  # return the final plot:
  return(final_plot)

}
sarid-ins/saridr documentation built on Nov. 10, 2020, 9:07 p.m.