R/ib_trending_parser.R

#' @title ib_trending_parser
#'
#' @description A function to parse install base trending file.
#'
#' @param filepath Filepath to Excel Workbook.
#' @param type Parse WW or COUNTRY sheet in install base function.
#' @param years Years in installbase file.
#' @param write Defaults to FALSE. Writes to Excel workbook if TRUE.
#' @param outpath Path to write dataframe to.
#'
#' @return Returns parsed dataframe. WW is grouped by Region and Equipment.
#' COUNTRY is grouped by Country and Equipment.
#'
#' @export

ib_trending_parser <- function(
  filepath,
  type = c('WW', 'COUNTRY'),
  years = 2013:2017,
  write = FALSE,
  outpath = NA
) {
  type_is_valid <- function(x) {
    x %in% c('WW', 'COUNTRY')
  }

  assertthat::on_failure(type_is_valid) <- function(call, env) {
    paste0(deparse(call$x), " must be 'WW', or 'COUNTRY'.")
  }

  assertthat::assert_that(type_is_valid(type))

  ib <- openxlsx::read.xlsx(
    filepath,
    sheet = type,
    startRow = switch(type,
                      WW = 6,
                      COUNTRY = 5)
  )

  if (type == 'WW') {
    names(ib)[3:ncol(ib)] %<>%
      paste0(rep(years, each = 12), .) %>%
      stringr::str_extract(., '[0-9]{6}')

    ib %<>%
      # dplyr::rename(Region = X1) %>%
      # .[!grepl('X', names(.))] %>%
      # fragile, hopefully will be unnecessary once access to SSAS cube is granted
      dplyr::slice(1:match('CANADA Total', Region)) %>%
      dplyr::mutate(Region = zoo::na.locf(Region)) %>%
      dplyr::filter(!is.na(Equipment)) %>%
      dplyr::mutate(
        Region = dplyr::case_when(
          .$Region == 'ASIA PACIFIC' ~ 'ASPAC',
          .$Region == 'GREATER CHINA' ~ 'CHINA',
          .$Region == 'LATIN AMERICA' ~ 'LAR',
          .$Region == 'US & BERMUDA' ~ 'NAR',
          .$Region == 'WW' ~ 'GLOBAL',
          TRUE ~ .$Region),
        Equipment = dplyr::case_when(
          .$Equipment == '350' ~ '250',
          .$Equipment == 'IMMUNO SYS'  ~ '3600',
          .$Equipment == 'INTEGRATED SYS' ~ '5600',
          .$Equipment == 'ECQ' ~ 'ECI',
          .$Equipment == 'LABAUT' ~ 'enGen',
          .$Equipment %in% c('AUTOVUE I', 'AUTOVUE U') ~ 'AUTOVUE IU',
          .$Equipment == 'VISION BV' ~ 'VISION MAX BV',
          TRUE ~ .$Equipment
        )) %>%
      dplyr::group_by(Region, Equipment) %>%
      dplyr::summarise_each(dplyr::funs(sum)) %>%
      tidyr::gather(key = YYYYMM, value = installbase, c(-Region, -Equipment))
  } else if (type == 'COUNTRY') {
    countries <- srms::srms_table('country_codes')

    names(ib)[4:ncol(ib)] %<>%
      paste0(rep(years, each = 12), .) %>%
      stringr::str_extract(., '[0-9]{6}')

    ib %<>%
      dplyr::select(-WW.Region) %>%
      dplyr::filter(Equipment != '0') %>%
      dplyr::mutate(Country = replace(Country, Country == '0', NA),
                    Country = zoo::na.locf(Country)) %>%
      dplyr::mutate(
        Equipment = dplyr::case_when(
          .$Equipment == '350' ~ '250',
          .$Equipment == 'IMMUNO SYS'  ~ '3600',
          .$Equipment == 'INTEGRATED SYS' ~ '5600',
          .$Equipment == 'ECQ' ~ 'ECI',
          .$Equipment == 'LABAUT' ~ 'enGen',
          .$Equipment %in% c('AUTOVUE I', 'AUTOVUE U') ~ 'AUTOVUE IU',
          .$Equipment == 'VISION BV' ~ 'VISION MAX BV',
          TRUE ~ .$Equipment
        )) %>%
      dplyr::group_by(Country, Equipment) %>%
      dplyr::summarise_each(dplyr::funs(sum)) %>%
      tidyr::gather(key = YYYYMM, value = installbase,
                    c(-Country, -Equipment)) %>%
      dplyr::left_join(
        y = countries %>%
          dplyr::select(-Region),
        by = 'Country'
      )
  }

  if (write) {
    if (is.na(outpath)) {
      time <- time_vals()
      outpath <- paste0('ib_parsed_', type, '_',
                        time$year, time$last_month, '.xlsx')
    }

    openxlsx::write.xlsx(x = ib, file = outpath)
  }

  invisible(ib)
}
kimjam/srms documentation built on May 20, 2019, 10:21 p.m.