R/ggpareto.R

#' @title ggpareto
#'
#' @description Function to produce Pareto chart using ggplot2 for different
#' call subjects, split by call area
#'
#' @param df Dataframe containing data to generate plot.
#' @param filter Filter string to pass to dplyr to filter df.
#'
#' @return Returns plot.
#'
#' @export

ggpareto <- function(
  df,
  filter
) {

  df %<>%
    df_checker() %>%
    dplyr::filter_(
      filter
    )

  title <- gsub('\'', '',
                unlist(strsplit(unlist(strsplit(filter, ' & '))[1], ' == '))[2])
  xlabel <- 'Call Area'
  ylabel <- 'Count of Complaints'

  x <- na.omit(df[, 'Call_Area']) %>%
    dplyr::rename(modality = Call_Area)

  Df <- x %>%
    dplyr::group_by(
      modality
    ) %>%
    dplyr::summarise(
      frequency = n()
    ) %>%
    dplyr::arrange(
      -frequency
    )

  Df$modality <- ordered(
    Df$modality,
    levels = unlist(Df$modality, use.names = FALSE)
  )

  Df %<>%
    dplyr::mutate(
      modality_int = as.integer(modality),
      cumfreq = cumsum(frequency),
      cumperc = cumfreq / nrow(x) * 100
    )

  nr <- nrow(Df)
  N <- sum(Df$frequency)

  Df_ticks <- data.frame(xtick0 = rep(nr +.55, 11),
                         xtick1 = rep(nr +.59, 11),
                         ytick = seq(0, N, N/10))

  y2 <- c('  0%', ' 10%', ' 20%', ' 30%', ' 40%', ' 50%',
          ' 60%', ' 70%', ' 80%', ' 90%', ' 100%')

  g <- ggplot(
    Df,
    aes(x = modality, y = frequency)
  ) +
    geom_bar(
      stat='identity',
      aes(fill='red')
    ) +
    geom_text(
      aes(x= modality, y = 0, label = frequency, vjust = -1),
      size = 2.5
    ) +
    geom_line(
      aes(x = modality_int, y = cumfreq)
    ) +
    geom_point(
      aes(x = modality_int, y = cumfreq),
      pch = 19
    ) +
    geom_text(
      aes(
        x = modality_int,
        y = cumfreq,
        label = paste0(round(cumperc, digits = 1), '%')
      ),
      vjust = -1,
      size = 2.5
    ) +
    scale_y_continuous(
      breaks = seq(0, N, N/10),
      limits = c(-.02 * N, N * 1.02),
      labels = round(seq(0, N, N/10))
    ) +
    scale_x_discrete(
      breaks = Df$modality
    ) +
    guides(
      fill = FALSE, color = FALSE
    ) +
    annotate(
      "rect", xmin = nr + .55, xmax = nr + 1,
      ymin = -.02 * N, ymax = N * 1.02, fill = 'white'
    ) +
    annotate(
      "text", x = nr + .8, y = seq(0, N, N/10),
      label = y2, size = 1.5
    ) +
    geom_segment(
      x = nr + .55, xend = nr + .55, y = -.02 * N,
      yend = N * 1.02, color = 'grey50'
    ) +
    geom_segment(
      data = Df_ticks,
      aes(x = xtick0, y = ytick, xend = xtick1, yend = ytick)
    ) +
    geom_segment(
      aes(
        x = .5,
        xend = nr+.5,
        y = seq(0, N, N/10)[9],
        yend = seq(0, N, N/10)[9],
        color = 'red'
      )
    ) +
    labs(
      title = paste0('Pareto Chart of ', title),
      y = ylabel,
      x = xlabel
    ) +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 90, size = 7, vjust = .5, hjust = 1),
      axis.text.y = element_text(size = 7),
      title = element_text(size = 7)
    )

  return(list(data = Df, plot = g))
}
kimjam/srms documentation built on May 20, 2019, 10:21 p.m.