R/qo_preprocess.R

#' @title qo_preprocess
#' @description Function clean ScorecardData.xlsx for Quality Operations scorecard.
#'
#' @param inpath File path to ScorecardData.xlsx file.
#' @param outpath Path to write cleaned dataframe to.
#' @param write Boolean to indicate if dataframe should be written to outpath. Defaults to TRUE.
#'
#' @return Writes to outpath. Invisibly returns cleaned dataframe.
#' @export
qo_preprocess <- function(
  inpath = '~/metrics/ScorecardData.xlsx',
  outpath = '~/metrics/qo_new_format.xlsx',
  write = TRUE
) {

  ops <- openxlsx::read.xlsx(xlsxFile = inpath, startRow = 3) %>%
    df_checker()

  ops[c('Category', 'Process.Area', 'Metric')] %<>% lapply(., zoo::na.locf)

  # columns to keep
  cols <- c(
    match(c('Category', 'Process.Area', 'Metric', 'Metric.Definition',
            'Customer.Rationale', 'Site.Source', 'Function'),
          names(ops)),
    grep('Baseline', names(ops)), grep('YTD', names(ops)),
    grep('Target', names(ops)),
    (1:length(names(ops)))[!is.na(as.numeric(names(ops)))]
  )

  ops <- ops[cols]
  ops[c(grep('Baseline', names(ops)), grep('YTD', names(ops)),
        grep('Target', names(ops)))] %<>% lapply(., as.numeric)

  # duplicate row for EM-DQ, currently not used
  dupe <- min(
    (1:nrow(ops))[ops$Metric == '%First Pass Acceptance (External Manufacturing Finished Goods)']
  )
  ops <- ops[c(1:nrow(ops), dupe), ]
  ops$Site.Source[nrow(ops)] <- 'External Manufacturing'
  ops$Function[nrow(ops)] <- 'Supplier Responsible'

  dates <- xlsx2posix(as.numeric(names(ops)[!is.na(as.numeric(names(ops)))]))

  ind <- match(
    lubridate::floor_date(Sys.Date(), 'month') - months(1),
               as.Date(dates)
    ) + sum(stringr::str_detect(names(ops), '[:alpha:]'))

  ind_value <- names(ops)[ind]

  ops %<>%
    dplyr::mutate(Current.Month = as.numeric(ops[, ind])) %>%
    tidyr::gather(
      date,
      value,
      !! -c(1:(sum(stringr::str_detect(names(.), '[:alpha:]'))-1), ncol(.))
    ) %>%
    dplyr::filter(date <= ind_value) %>%
    dplyr::mutate(date = xlsx2posix(as.numeric(date)),
                  Site.Source = zoo::na.locf(Site.Source),
                  Metric.Definition = zoo::na.locf(Metric.Definition),
                  Customer.Rationale = zoo::na.locf(Customer.Rationale),
                  value = as.numeric(value))

  # find trend for path six months and associated p values
  trending <- ops %>%
    dplyr::filter(
      date >= lubridate::floor_date(lubridate::with_tz(Sys.time(), 'GMT'),
                                    'month') - months(6)
    ) %>%
    dplyr::group_by(Metric,
                    Site.Source,
                    Function) %>%
    tidyr::nest() %>%
    dplyr::mutate(model = purrr::map_lgl(data, ~ !all(is.na(.x$value)))) %>%
    dplyr::filter(model) %>%
    dplyr::select(-model) %>%
    dplyr::mutate(
      data = purrr::map(data, ~ .x %>% dplyr::mutate(dummy = 1:6)),
      fit = purrr::map(data, ~ broom::tidy(lm(data = .x, value ~ dummy))),
      slope = purrr::map_dbl(fit,
                             ~ ifelse('dummy' %in% .x$term,
                                      .x$estimate[.x$term == 'dummy'],
                                      NA)),
      pval = purrr::map_dbl(fit,
                            ~ ifelse('dummy' %in% .x$term,
                                     .x$p.value[.x$term == 'dummy'],
                                     NA))
    )

  ops %<>%
    dplyr::left_join(
      y = trending %>%
        dplyr::select(-data, -fit),
      by = c('Metric', 'Site.Source', 'Function')
    ) %>%
    dplyr::mutate(
      Metric.Type = ifelse(grepl('%', Metric),
                           'Proportion',
                           'Numeric'),
      Display.Value = ifelse(Metric.Type == 'Proportion',
                             paste0(round(value * 100, 1), '%'),
                             paste(round(value))),
      trend.dir = ifelse(grepl('*First Pass*', Metric),
                         'up', 'down'),
      Trend = ifelse(trend.dir == 'up',
                     ifelse(pval > .1,
                            '0',
                            ifelse(slope > 0,
                                   '1', '-1')),
                     ifelse(pval > .1,
                            '0',
                            ifelse(slope < 0,
                                   '1', '-1'))),
      Trend = replace(Trend, is.na(Current.Month), NA),
      Trend = replace(
        Trend,
        Metric %in% c('# Supplier NCs Created', '# Open Supplier NCs',
                      '# Aged Supplier NCs') &
          Trend == '-1' &
          Current.Month < 5,
        '0'
      )
    )

  # handle indicators for Nypro Helathcare Baja, Inc.
  cond1 <- ops$Site.Source %in% c('Ortho', 'Rochester')
  cond2 <- ops$Metric %in% c('# Supplier NCs Created', '# Open Supplier NCs',
                             '# Aged Supplier NCs',
                             'Cycle time for Supplier NC Closure')
  cond3 <- ops$date > strptime('2016-09-01', format = '%Y-%m-%d')
  cond3[is.na(cond3)] <- FALSE

  ops$Display.Value[cond1 & cond2 & cond3] %<>% paste0(., '*')

  baseline_name <- paste0('Baseline (',
                          lubridate::year(last_month()) - 1,
                          ')')
  ytd_name <- paste0('YTD (',
                     lubridate::year(last_month()),
                     ')')

  ops[, baseline_name] <- ifelse(
    ops$Metric.Type == 'Proportion',
    paste0(round(ops[, grep('Baseline', names(ops))] * 100, 1), '%'),
    paste(round(ops[, grep('Baseline', names(ops))]))
  )
  ops[, baseline_name][grepl('#|Stop', ops$Metric)] %<>% paste(., '*')
  ops[, ytd_name] <- ifelse(
    ops$Metric.Type == 'Proportion',
    paste0(round(ops[, grep('YTD', names(ops))] * 100, 1), '%'),
    paste(round(ops[, grep('YTD', names(ops))]))
  )
  ops[, ytd_name][grepl('#|Stop', ops$Metric)] %<>% paste(., '*')
  ops %<>% dplyr::mutate(
    Current.Month = ifelse(
      Metric.Type == 'Proportion',
      paste0(round(Current.Month * 100, 1), '%'),
      paste(round(Current.Month)))
  )

  if (write) {
    openxlsx::write.xlsx(x = ops %>%
                           dplyr::mutate(date = as.Date(date)) %>%
                           setNames(gsub('\\.', ' ', names(.))),
                         file = outpath)
  }

  invisible(ops %>% dplyr::rename(`6 Month Trend` = Trend))
}
kimjam/srms documentation built on May 20, 2019, 10:21 p.m.