R/ggcc_gen.R

#' @title check_tests
#' @description checks control chart data for triggering of tests.
#'
#' @param df Dataframe from ggcc_gen.
#' @param avg Calculated average.
#' @param pseudo_sd Calculated pseudo sd.
#'
#' @return returns string indicated which tests were triggered to be used as footer.
check_tests <- function(df, avg, pseudo_sd) {
  zones <- as.numeric(scale(df$numeric, center = avg, scale = pseudo_sd))

  # last point outside 3 sd range
  test1 <- abs(tail(zones, 1)) > 3

  # 6 points in a row increasing or decreasing
  test3 <- any(sum(tail(diff(df$numeric), 6) > 0) == 6,
               sum(tail(diff(df$numeric), 6) < 0) == 6)

  # 2 out of last 3 points on same side of 2 sd range
  test5 <- any(sum(tail(zones, 3) < -2) > 1,
               sum(tail(zones, 3) > 2) > 1)

  # 4 out of last 5 points on same side of 1 sd range
  test6 <- any(sum(tail(zones, 5) < -1) > 3,
               sum(tail(zones, 5) > 1) > 3)

  # 8 points greater than 2 sd from center line
  test8 <- sum(abs(tail(zones, 8)) > 2) == 8

  tests <- paste(c(1, 3, 5, 6, 8)[c(test1, test3, test5, test6, test8)],
                 collapse = ', ')

  paste('Tests triggered :',
        ifelse(tests == '', 'None', tests))
}

#' @title ggcc_gen
#' @description Function to create control chart.
#'
#' @param df Dataframe of data. Preprocessing must be done beforehand to have one character column and one numeric column.
#' @param num_type Set equal to count or proportion. This will change the calculation of the limits.
#' @param sample_size If num_type is proportion, provide sample size.
#' @param title Chart title. If NULL, chart title will be Control Chart
#'
#' @return Returns list containing ggplot of control chart and footer indicating triggered tests.
#'
#' @export
ggcc_gen <- function(df,
                     num_type = c('count', 'proportion'),
                     sample_size,
                     title = NULL) {
  # make sure df is correctly formatted
  ncol_is_right <- function(x) {
    ncol(x) == 2
  }

  assertthat::on_failure(ncol_is_right) <- function(call, env) {
    paste0(deparse(call$x), ' must have 2 columns.')
  }

  assertthat::assert_that(ncol_is_right(df))

  types_are_right <- function(x) {
    all(any(sapply(x, is.numeric)),
        any(sapply(x, is.character)))
  }

  assertthat::on_failure(types_are_right) <- function(call, env) {
    paste0(deparse(call$x), ' must have one character column and one numeric column.')
  }

  assertthat::assert_that(types_are_right(df))

  num_type_valid <- function(x) {
    x %in% c('count', 'proportion')
  }

  assertthat::on_failure(num_type_valid) <- function(call, env) {
    paste0(deparse(call$x), ' must be count or proportion')
  }

  assertthat::assert_that(num_type_valid(num_type))
  # char_name <- names(df)[vapply(df, is.character, logical(1))]
  # num_name <- names(df)[vapply(df, is.numeric, logical(1))]

  if (is.null(title)) {
    title <- 'Control Chart'
  }

  names(df)[vapply(df, is.character, logical(1))] <- 'character'
  names(df)[vapply(df, is.numeric, logical(1))] <- 'numeric'

  df %<>%
    dplyr::mutate(range = c(NA, abs(diff(numeric))))

  if (num_type == 'count') {
    avgr <- round(mean(df$range, na.rm = TRUE), digits = 1)
    lclr <- 0
    uclr <- round(3.267*avgr, digits = 1)

    pseudo_sd <- avgr / 1.128
    avg <- round(mean(df$numeric), digits = 1)
    lcl <- round(mean(df$numeric) - 3*pseudo_sd, digits = 1)
    ucl <- round(mean(df$numeric) + 3*pseudo_sd, digits = 1)
    lzb <- round(mean(df$numeric) - 2*pseudo_sd, digits = 1)
    uzb <- round(mean(df$numeric) + 2*pseudo_sd, digits = 1)
    lzc <- round(mean(df$numeric) - pseudo_sd, digits = 1)
    uzc <- round(mean(df$numeric) + pseudo_sd, digits = 1)
  } else if (num_type == 'proportion') {
    avg <- round(mean(df$numeric), digits = 4)
    pseudo_sd <- sqrt(avg*(1 - avg) / sample_size)
    lcl <- max(c(0, round(mean(df$numeric) - 3*pseudo_sd, digits = 4)))
    ucl <- min(c(1, round(mean(df$numeric) + 3*pseudo_sd, digits = 4)))
    lzb <- max(c(0, round(mean(df$numeric) - 2*pseudo_sd, digits = 4)))
    uzb <- min(c(1, round(mean(df$numeric) + 2*pseudo_sd, digits = 4)))
    lzc <- max(c(0, round(mean(df$numeric) - pseudo_sd, digits = 4)))
    uzc <- min(c(1, round(mean(df$numeric) + pseudo_sd, digits = 4)))
  }
  rects <- data.frame(ystart = lcl, yend = ucl)

  footer <- check_tests(df, avg = avg, pseudo_sd = pseudo_sd)

  control_chart <- ggplot() +
    # frequency points
    geom_point(
      data=df,
      aes(x = character, y = numeric),
      size = 1
    ) +
    geom_line(
      data=df,
      aes(x = character, y = numeric, group = 1),
      alpha = 0.4
    ) +
    #point labels
    geom_text(
      data=df,
      aes(x = character, y = numeric,label = numeric),
      vjust = 'inward',
      hjust = 'inward'
    ) +
    #mean line
    geom_hline(
      data=df,
      aes(yintercept = avg),
      color = 'black',
      alpha = .5
    ) +
    # LCL line
    geom_hline(
      data=df,
      aes(yintercept = lcl),
      color = 'black',
      linetype = 'dashed',
      alpha = 0.25
    ) +
    # UCL line
    geom_hline(
      data=df,
      aes(yintercept = ucl),
      color = 'black',
      linetype = 'dashed',
      alpha = 0.25
    ) +
    # lzb line
    geom_hline(
      data=df,
      aes(yintercept = lzb),
      color = 'black',
      linetype = 'dashed',
      alpha = 0.25
    ) +
    # uzb line
    geom_hline(
      data=df,
      aes(yintercept = uzb),
      color = 'black',
      linetype = 'dashed',
      alpha = 0.25
    ) +
    # lzc line
    geom_hline(
      data=df,
      aes(yintercept = lzc),
      color = 'black',
      linetype = 'dashed',
      alpha = 0.25
    ) +
    # uzc line
    geom_hline(
      data=df,
      aes(yintercept = uzc),
      color = 'black',
      linetype = 'dashed',
      alpha = 0.25
    ) +
    # blue shading
    geom_rect(
      data=rects,
      aes(xmin = -Inf, xmax = Inf, ymin = ystart, ymax = yend),
      alpha = 0.4,
      fill = 'lightblue'
    ) +
    ylab(ifelse(num_type == 'count',
                'Frequency Count', 'Proportion')) +
    xlab('') +
    ggtitle(title)

  return(list(control_chart = control_chart,
              footer = footer))
}
kimjam/srms documentation built on May 20, 2019, 10:21 p.m.