R/qs_preprocess.R

#' @title qs_preprocess
#' @description Function to preprocess Quality Indicator Metrics
#'
#' @param inpath Path to raw .xlsx
#' @param outpath Path to write cleaned dataset to.
#' @param write Boolean to indicate whether to write to outpath. Defaults to FALSE
#'
#' @return Invisibly returns cleaned dataframe.
#' @export
qs_preprocess <- function(
  inpath = '~/metrics/Quality Indicator Metricx.xlsx',
  outpath = '~/metrics/qs_metrics.xlsx',
  write = FALSE
) {
  df <- openxlsx::read.xlsx(
    xlsxFile = inpath,
    sheet = paste0('Quality Indicators ', lubridate::year(last_month()))
  ) %>% df_checker()

  df %<>%
    dplyr::select(-Status.vs.YE.Target, -Commentary, -`3-5.year.Aspiration`)

  fillblanks <- c('Process.Area', 'Metric', 'Metric.Owner', 'Metric.Definition',
                  'Methodology', 'Frequency', 'Customer.Impact', 'Function')
  df[fillblanks] %<>% lapply(zoo::na.locf)

  targetname <- paste0(lubridate::year(last_month()), '.Target')
  df$target.numeric <- as.numeric(stringr::str_match(df[, targetname], '\\d+'))

  ind <- match('Jan', names(df))
  df %<>%
    dplyr::mutate(Metric = gsub('\\r|\\n', '', Metric)) %>%
    tidyr::gather(
      range,
      value,
      -c(1:(ind-1), ncol(.))
    ) %>%
    dplyr::mutate(
      date = as.POSIXct(
        strptime(paste0(range, '-01-',
                        lubridate::year(last_month())),
                 format = '%b-%d-%Y')
      ),
      value = stringr::str_match(value, '^\\d+(\\s)?(\\.\\d{1,2})?$')[, 1] %>%
        gsub('\\s', '', .) %>%
        as.numeric(),
      display.value = ifelse(Metric == 'Avg. # Audit Observation / Inspection',
                             as.character(round(value, 2)),
                             as.character(round(value)))
    ) %>%
    dplyr::filter(date <= last_month()) %>%
    dplyr::mutate(
      projection = `2017.YTD` * 12 / lubridate::month(last_month()),
      status = NA
    )

  mask <- !is.na(df$target.numeric)
  df$status[mask] <- ifelse(df$`2017.YTD`[mask] > df$target.numeric[mask],
                                              'Off Target',
                                              ifelse(df$projection[mask] > df$target.numeric[mask],
                                                     'At Risk to Miss Target',
                                                     'On Target'))


  mask <- df$Metric == 'Avg. # Audit Observation / Inspection'
  df[, grep('Baseline', names(df))][mask] %<>%  as.character(round(., 2))
  df[, grep('YTD', names(df))][mask] %<>%  as.character(round(., 2))

  colnames <- names(df)
  colnames[grep('Baseline', colnames)] <- paste0('Baseline (',
                                                 lubridate::year(last_month()) - 1,
                                                 ')')
  colnames[grep('YTD', colnames)] <- paste0('YTD (',
                                            lubridate::year(last_month()),
                                            ')')
  colnames[grep('Target', colnames)] <- paste0('Target (',
                                               lubridate::year(last_month()),
                                               ')')

  names(df) <- colnames

  if (write) {
    names(df) %<>% gsub('\\.', ' ', .)
    df$date <- as.Date(df$date)
    openxlsx::write.xlsx(x = df, file = outpath)
  }

  invisible(df)
}
kimjam/srms documentation built on May 20, 2019, 10:21 p.m.