R/ib_parser.R

#' @title ib_parser
#'
#' @description A function to clean and manipulate installbase data to make it
#' usable for data analysis / Tableau.
#'
#' @param ib_df dataframe of raw installbase data obtained from Information Anywhere.
#' Can be read manually or using \link[srms]{ib_reader}.
#' @param use 'general', 'region', or 'reburbished'. Changes results based on end use.
#' \itemize{
#'  \item{general: shows total installbase by country and analyzer}
#'  \item{region: shows total installbase by region and analyzer}
#'  \item{refurbished: shows total installbase by country, analyzer, and refurbished vs. regular}
#' }
#' @param write Defaults to TRUE. If TRUE, will write to Excel Workbook.
#' @param xlsxfile Name of file to write results to. Defaults to 'cleaned_ib_MMDDYYYY.xlsx'
#' where MMDDYYYY is the date.
#'
#' @return Returns Cleaned installbase dataframe and writes results to Excel
#' Workbook in current working directory if write = TRUE
#'
#' @export

ib_parser <- function(
  ib_df,
  use = c('general', 'region', 'refurbished'),
  write = TRUE,
  xlsxfile = NA
) {
  # point to zip directory for file writing
  Sys.setenv(R_ZIPCMD = 'C:/Rtools/bin/zip')

  # test that parameter use is valid
  use_is_valid <- function(x) {
    x %in% c('general', 'region', 'refurbished')
  }

  assertthat::on_failure(use_is_valid) <- function(call, env) {
    paste0(deparse(call$x), " must be 'general', 'region', or 'refurbished'.")
  }

  assertthat::assert_that(use_is_valid(use))

  # remove column and row totals
  ib_df <- ib_df[1:(nrow(ib_df)-1), 1:(ncol(ib_df)-1)]

  names(ib_df)[1:2] <- c('Analyzer', 'Instrument')

  # change Analyzers to be compatible with complaint data
  ib_df$Analyzer[ib_df$Analyzer == '350'] <- '250'
  ib_df$Analyzer[ib_df$Analyzer == 'ECQ'] <- 'ECI'
  ib_df$Analyzer[ib_df$Analyzer == 'INTEGRATED SYS'] <- '5600'
  ib_df$Analyzer[ib_df$Analyzer == 'IMMUNO SYS'] <- '3600'
  ib_df$Analyzer[ib_df$Analyzer == 'LABAUT'] <- 'enGen'
  ib_df$Analyzer[ib_df$Analyzer == 'INSTMGR'] <- 'DT'

  # fill Analyzer blanks with previous value
  ib_df$Analyzer <- zoo::na.locf(ib_df$Analyzer)

  # fill blanks with zeroes
  ib_df[3:ncol(ib_df)] <- lapply(
    ib_df[3:ncol(ib_df)],
    function(x) ifelse(is.na(x), 0, x)
  )

  if (use %in% c('general', 'region')) {
    ib_df %<>%
      dplyr::filter(
        !grepl('*Total', Instrument)
      ) %>%
      reshape2::melt(
        .,
        id = c('Analyzer', 'Instrument')
      ) %>%
      dplyr::mutate(
        Country_Code = as.character(variable)
      ) %>%
      dplyr::group_by(
        Analyzer,
        Country_Code
      ) %>%
      dplyr::summarise(
        Total = sum(value)
      ) %>%
      dplyr::select(
        Analyzer,
        Country_Code,
        Total
      )

    if(use == 'region') {
      ib_df %<>%
        dplyr::left_join(
          y = ib_regions %>%
            dplyr::select(
              Country.Code,
              Region
            ),
          by = c('Country_Code' = 'Country.Code')
        )

      ib_df$Region[ib_df$Region == 'EMEA'] <- 'EUR'
      ib_df$Region[ib_df$Region == 'ASPAC' | ib_df$Region == 'JP'] <- 'APR'
      ib_df$Region[ib_df$Region == 'NA'] <- 'NAR'

      ib_df %<>%
        dplyr::group_by(
          Region,
          Analyzer
        ) %>%
        dplyr::summarise(
          Installbase = sum(Total)
        )
    }
  }

  if (use == 'refurbished') {
    ib_df <- ib_df[!grepl('*AT SYSTEM*', ib_df$Instrument), ]
    ib_df <- ib_df[!grepl('*AT SYSTEM', ib_df$Instrument), ]
    ib_df <- ib_df[!grepl('*Total', ib_df$Instrument), ]
    ib_df <- ib_df[!grepl('*RECERTIFIED*', ib_df$Instrument), ]

    ib_df %<>%
      dplyr::filter(
        Analyzer %in% c('250', 'FS', 'ECI', '5600', '3600', 'PROVUE')
      ) %>%
      reshape2::melt(
        .,
        id = c('Analyzer', 'Instrument')
      ) %>%
      dplyr::mutate(
        Refurbished = ifelse(
          grepl('*REFURB', toupper(Instrument)),
          'Refurbished',
          'Regular'
        ),
        Country_Code = as.character(variable)
      ) %>%
      dplyr::group_by(
        Analyzer,
        Refurbished,
        Country_Code
      ) %>%
      dplyr::summarise(
        Total = sum(value)
      ) %>%
      dplyr::select(
        Analyzer,
        Country_Code,
        Total,
        Refurbished
      )
  }

  if (write) {
    if (is.na(xlsxfile)) {
      time <- time_vals()
      xlsxfile <- paste0(
        'cleaned_ib_',
        time$month,
        time$day,
        time$year,
        '.xlsx'
      )
    }
    openxlsx::write.xlsx(
      x = ib_df,
      file = xlsxfile,
      asTable = TRUE
    )
  }

  return(ib_df)
}
kimjam/srms documentation built on May 20, 2019, 10:21 p.m.