R/quarterly_top_n.R

#' @title topn_sales_parser
#'
#' @description Function to get quarterly sales averages.
#'
#' @param df Dataframe containing sales data contained in list "sales" in sysdata.
#'
#' @return Returns dataframe with quarterly sales average and joining columns.
topn_sales_parser <- function(df) {
  num_inds <- (1:ncol(df))[sapply(df, class) == 'numeric']
  num_inds <- num_inds[num_inds > 2][1:12]

  df$AverageQ <- round(rowSums(df[num_inds]) / 4, digits = 0)
  df$AverageQ[df$AverageQ == 0] <- NA
  df <- df[c(1:(min(num_inds)-1), ncol(df))]

  return(df)
}

#' @title top_n_desc
#'
#' @description A function to generate description column for
#' \link[srms]{quarterly_top_n} summary table.
#'
#' @param tech One of MicroSlide, MicroTip, MicroWell for CL or one of
#' Donor Screening Reagent, Immunohematology Reagent, or MTS Reagent for TM.
#' @param df A dataframe of top CL complaints for previous quarter.
#'
#' @return Returns description.

top_n_desc <- function(
  tech = c('MicroSlide', 'MicroWell', 'MicroTip', 'Donor Screening Reagent',
           'Immunohematology Reagent', 'MTS Reagent'),
  df
) {
  tech_is_valid <- function(x) {
    x %in% c('MicroSlide', 'MicroWell', 'MicroTip', 'Donor Screening Reagent',
             'Immunohematology Reagent', 'MTS Reagent')
  }

  assertthat::on_failure(tech_is_valid) <- function(call, env) {
    paste(deparse(call$x), "must be 'MicroSlide', 'MicroWell', 'MicroTip',",
          "'Donor Screaning Reagent', 'Immunohematology Reagent', or 'MTS Reagent'.")
  }

  assertthat::assert_that(tech_is_valid(tech))

  df %<>%
    dplyr::filter(
      Technology == tech
    )

  desc <- paste(
    apply(df, 1, function(x) paste0(x[2], ' (n=', x[5], ')')),
    collapse = ', '
  )

  return(desc)
}

#' @title quarterly_top_n
#'
#' @description Function to summarise top 5/10 complaints on a quarterly basis.
#'
#' @param n Number for Clinical Chemistry. Defaults to 5.
#' @param norm_n Number for normalized. Defaults to 10.
#' @param filepath Filepath to excel workbook containing complaints, sales, and
#' normalized data.
#' @param clre_path Filepath to CLRE file as a .csv
#' @param dsre_path Filepath to DSRE file as a .csv
#' @param ihre_path Filepath to IHRE file as a .csv
#' @param mtsre_path Filepath to MTSRE file as a .csv
#' @param xlsxname Name of Excel workbook to write to. Defaults to top_n_YYYYQX.xlsx
#' where YYYY is the year and X is the quarter number.
#' @param write Defaults to TRUE. Writes to Excel Workbook if TRUE.
#'
#' @return Returns a list of dataframes.
#' \itemize{
#'  \item{Summary}
#'  \item{CL top 5 complaints}
#'  \item{CL top 10 normalized}
#'  \item{TM top 5 complaints}
#'  \item{TM top 10 normalized}
#' }
#'
#' @export

quarterly_top_n <- function(
  n = 5,
  norm_n = 10,
  filepath,
  clre_path,
  dsre_path,
  ihre_path,
  mtsre_path,
  xlsxname = NA,
  write = TRUE
) {

  Sys.setenv(R_ZIPCMD = 'C:/Rtools/bin/zip')
  top <- . %>%
    dplyr::arrange(
      -count
    ) %>%
    head(n)

  top_norm <- . %>%
    dplyr::arrange(
      -normalized
    ) %>%
    head(norm_n)

  clre_grouper <- . %>%
    dplyr::group_by(
      Technology,
      str_CallSubject
    ) %>%
    dplyr::summarise(
      count = n()
    )

  tm_grouper <- . %>%
    dplyr::group_by(
      Category
    ) %>%
    dplyr::summarise(
      count = n()
    )

  mts_grouper <- . %>%
    dplyr::group_by(
      Call_Subject
    ) %>%
    dplyr::summarise(
      count = n()
    )

  clnorms <- openxlsx::read.xlsx(
    xlsxFile = filepath,
    sheet = 4
  )[, 1:2]

  dsnorms <- openxlsx::read.xlsx(
    xlsxFile = filepath,
    sheet = 9
  )[1]

  ihnorms <- openxlsx::read.xlsx(
    xlsxFile = filepath,
    sheet = 10
  )[1]

  mtsnorms <- openxlsx::read.xlsx(
    xlsxFile = filepath,
    sheet = 11
  )[1]

  clnorms %<>% df_checker()
  dsnorms %<>% df_checker()
  ihnorms %<>% df_checker()
  mtsnorms %<>% df_checker()

  cl_sales <- sales$cl %>%
    dplyr::mutate(
      Material_Code = as.numeric(Material_Code)
    ) %>%
    dplyr::left_join(
      y = top_n_clcs %>%
        dplyr::select(
          Technology,
          Material_Long_Cd,
          Call_subject
        ),
      by = c('Material_Code' = 'Material_Long_Cd')
    ) %>%
    dplyr::select(
      -Material_Code,
      -Product_Desc
    ) %>%
    dplyr::mutate(
      Technology = ifelse(Technology == 'DT60', 'MicroSlide', Technology)
    ) %>%
    dplyr::group_by(
      Technology,
      Call_subject
    ) %>%
    dplyr::summarise_each(
      dplyr::funs(sum)
    ) %>%
    topn_sales_parser()

  ih_sales <- sales$ih[-1] %>%
    dplyr::select(
      -callsubject,
      -description
    ) %>%
    dplyr::group_by(
      dataset,
      category
    ) %>%
    dplyr::summarise_each(
      dplyr::funs(sum)
    ) %>%
    topn_sales_parser()

  ds_sales <- sales$ds[-1] %>%
    dplyr::select(
      -callsubject,
      -description
    ) %>%
    dplyr::group_by(
      dataset,
      category
    ) %>%
    dplyr::summarise_each(
      dplyr::funs(sum)
    ) %>%
    topn_sales_parser()

  mts_sales <- sales$mts[-1] %>%
    dplyr::select(
      -description,
      -category
    ) %>%
    dplyr::group_by(
      dataset,
      callsubject
    ) %>%
    dplyr::summarise_each(
      dplyr::funs(sum)
    ) %>%
    topn_sales_parser() %>%
    dplyr::mutate(
      callsubject = ifelse(nchar(callsubject) == 9,
                           gsub('S', '', callsubject),
                           callsubject)
    )
  # read in raw data and pre process
  clre <- read.csv(
    file = clre_path,
    stringsAsFactors = FALSE,
    strip.white = TRUE
  ) %>%
    dplyr::filter(
      !(str_CallSubject %in% c('MULT', 'UDA'))
    )

  clre$Technology[clre$Technology == 'DT60'] <- 'MicroSlide'
  clre$str_CallSubject[is.na(clre$str_CallSubject)] <- 'NA'

  dsre <- read.csv(
    file = dsre_path,
    stringsAsFactors = FALSE,
    strip.white = TRUE
  )

  ihre <- read.csv(
    file = ihre_path,
    stringsAsFactors = FALSE,
    strip.white = TRUE
  )

  mtsre <- read.csv(
    file = mtsre_path,
    stringsAsFactors = FALSE,
    strip.white = TRUE
  )
  # clre top n by complaint counts
  clre_top <- dplyr::do(
    clre %>%
      clre_grouper(),
    top(.)
  ) %>%
    dplyr::left_join(
      y = top_n_recats %>%
        dplyr::select(
          Assay,
          str_callsubject,
          Group
        ) %>%
        unique(),
      by = c('str_CallSubject' = 'str_callsubject')
    ) %>%
    dplyr::select(
      Technology,
      str_CallSubject,
      Assay,
      Group,
      count
    )

  # top norm_n clre by normalized complaint rate
  clre_norms <- dplyr::do(
    clre %>%
      clre_grouper() %>%
      dplyr::left_join(
        y = cl_sales %>%
          dplyr::select(
            Technology,
            Call_subject,
            AverageQ
          ),
        by = c('Technology', 'str_CallSubject' = 'Call_subject')
      ) %>%
      dplyr::mutate(
        normalized = round((count / AverageQ) * 1e6, digits = 0)
      ),
    top_norm(.)
  ) %>%
    dplyr::left_join(
      y = top_n_recats %>%
        dplyr::select(
          Assay,
          str_callsubject,
          Group
        ) %>%
        unique(),
      by = c('str_CallSubject' = 'str_callsubject')
    ) %>%
    dplyr::select(
      Technology,
      str_CallSubject,
      Assay,
      Group,
      count,
      AverageQ,
      normalized
    )

  dsre_top <- dplyr::do(
    dsre %>%
      tm_grouper(),
    top(.)
  ) %>%
    dplyr::left_join(
      y = top_n_tmrecats %>%
        dplyr::select(
          Category,
          Group
        ),
      by = 'Category'
    ) %>%
    dplyr::mutate(
      Technology = 'Donor Screening Reagent',
      Description = Category
    ) %>%
    dplyr::select(
      Technology,
      Category,
      Description,
      Group,
      count
    )

  dsre_norms <- dplyr::do(
    dsre %>%
      tm_grouper() %>%
      dplyr::left_join(
        y = ds_sales %>%
          dplyr::select(
            category,
            AverageQ
          ),
        by= c('Category' = 'category')
      ) %>%
      dplyr::mutate(
        normalized = round((count / AverageQ) * 1e6, digits = 0)
      ),
    top_norm(.)
  ) %>%
    dplyr::filter(
      !is.na(normalized)
    ) %>%
    dplyr::left_join(
      y = top_n_tmrecats %>%
        dplyr::select(
          Category,
          Group
        ),
      by = 'Category'
    ) %>%
    dplyr::mutate(
      Technology = 'Donor Screening Reagent',
      Description = Category
    ) %>%
    dplyr::select(
      Technology,
      Category,
      Description,
      Group,
      count,
      AverageQ,
      normalized
    )

  ihre_top <- dplyr::do(
    ihre %>%
      tm_grouper(),
    top(.)
  ) %>%
    dplyr::left_join(
      y = top_n_tmrecats %>%
        dplyr::select(
          Category,
          Group
        ),
      by = 'Category'
    ) %>%
    dplyr::mutate(
      Technology = 'Immunohematology Reagent',
      Description = Category
    ) %>%
    dplyr::select(
      Technology,
      Category,
      Description,
      Group,
      count
    )

  ihre_norms <- dplyr::do(
    ihre %>%
      tm_grouper() %>%
      dplyr::left_join(
        y = ih_sales %>%
          dplyr::select(
            category,
            AverageQ
          ),
        by = c('Category' = 'category')
      ) %>%
      dplyr::mutate(
        normalized = round((count / AverageQ) * 1e6, digits = 0)
      ),
    top_norm(.)
  ) %>%
    dplyr::filter(
      !is.na(normalized)
    ) %>%
    dplyr::left_join(
      y = top_n_tmrecats %>%
        dplyr::select(
          Category,
          Group
        ),
      by = 'Category'
    ) %>%
    dplyr::mutate(
      Technology = 'Immunohematology Reagent',
      Description = Category
    ) %>%
    dplyr::select(
      Technology,
      Category,
      Description,
      Group,
      count,
      AverageQ,
      normalized
    )

  mtsre_top <- dplyr::do(
    mtsre %>%
      mts_grouper(),
    top(.)
  ) %>%
    dplyr::left_join(
      y = top_n_tmrecats %>%
        dplyr::select(
          Category,
          Product_Description,
          Group
        ),
      by = c('Call_Subject' = 'Category')
    ) %>%
    dplyr::mutate(
      Technology = 'MTS Reagent',
      Description = Product_Description,
      Category = Call_Subject
    ) %>%
    dplyr::select(
      Technology,
      Category,
      Description,
      Group,
      count
    )

  mtsre_norms <- dplyr::do(
    mtsre %>%
      mts_grouper() %>%
      dplyr::left_join(
        y = mts_sales %>%
          dplyr::select(
            callsubject,
            AverageQ
          ),
        by = c('Call_Subject' = 'callsubject')
      ) %>%
      dplyr::mutate(
        normalized = round((count / AverageQ) * 1e6, digits = 0)
      ),
    top_norm(.)
  ) %>%
    dplyr::filter(
      !is.na(normalized)
    ) %>%
    dplyr::left_join(
      y = top_n_tmrecats %>%
        dplyr::select(
          Category,
          Product_Description,
          Group
        ),
      by = c('Call_Subject' = 'Category')
    ) %>%
    dplyr::mutate(
      Technology = 'MTS Reagent',
      Description = Product_Description,
      Category = Call_Subject
    ) %>%
    dplyr::select(
      Technology,
      Category,
      Description,
      Group,
      count,
      AverageQ,
      normalized
    )

  tm_top <- rbind(dsre_top, ihre_top, mtsre_top)
  tm_norms <- rbind(dsre_norms, ihre_norms, mtsre_norms)

  summary_cl <- clre %>%
    dplyr::group_by(
      Technology
    ) %>%
    dplyr::summarise(
      total = n()
    ) %>%
    dplyr::mutate(
      LOB = 'Clinical Chemistry'
    ) %>%
    dplyr::left_join(
      y = clre_top %>%
        dplyr::group_by(
          Technology
        ) %>%
        dplyr::summarise(
          top5 = sum(count)
        ),
      by = 'Technology'
    ) %>%
    dplyr::mutate(
      Group = Technology,
      Perc_by_top_5 = round(top5 / total, digits = 2),
      Top5_Desc = c(
        top_n_desc(tech = 'MicroSlide', df = clre_top),
        top_n_desc(tech = 'MicroTip', df = clre_top),
        top_n_desc(tech = 'MicroWell', df = clre_top)
      )
    ) %>%
    dplyr::select(
      LOB,
      Group,
      total,
      top5,
      Perc_by_top_5,
      Top5_Desc
    )

  summary_tm <- data.frame(
    LOB = rep('Transfusion Medicine', 3),
    Group = c('Donor Screening', 'Immunohematology', 'MTS Reagent')
  ) %>%
    dplyr::mutate(
      total = c(
        nrow(dsre),
        nrow(ihre),
        nrow(mtsre)
      ),
      top5 = c(
        sum(dsre_top$count),
        sum(ihre_top$count),
        sum(mtsre_top$count)
      ),
      Perc_by_top_5 = round(top5 / total, digits = 2),
      Top5_Desc = c(
        top_n_desc(tech = 'Donor Screening Reagent', df = tm_top),
        top_n_desc(tech = 'Immunohematology Reagent', df = tm_top),
        top_n_desc(tech = 'MTS Reagent', df = tm_top)
      )
    )

  summary <- rbind(summary_cl, summary_tm)

  if (write) {
    time <- time_vals()
    if (is.na(xlsxname)) {
      xlsxname <- paste0('top_n_', time$year, time$quarter, '.xlsx')
    }

    wb <- openxlsx::createWorkbook()

    openxlsx::addWorksheet(
      wb = wb,
      sheetName = 'summary'
    )

    openxlsx::addWorksheet(
      wb = wb,
      sheetName = 'CL complaints'
    )

    openxlsx::addWorksheet(
      wb = wb,
      sheetName = 'CL normalized'
    )

    openxlsx::addWorksheet(
      wb = wb,
      sheetName = 'TM complaints'
    )

    openxlsx::addWorksheet(
      wb = wb,
      sheetName = 'TM normalized'
    )

    openxlsx::writeDataTable(
      wb = wb,
      sheet = 'summary',
      x = summary,
      withFilter = FALSE
    )

    openxlsx::writeDataTable(
      wb = wb,
      sheet = 'CL complaints',
      x = clre_top,
      withFilter = FALSE
    )

    openxlsx::writeDataTable(
      wb = wb,
      sheet = 'CL normalized',
      x = clre_norms,
      withFilter = FALSE
    )

    openxlsx::writeDataTable(
      wb = wb,
      sheet = 'TM complaints',
      x = tm_top,
      withFilter = FALSE
    )

    openxlsx::writeDataTable(
      wb = wb,
      sheet = 'TM normalized',
      x = tm_norms,
      withFilter = FALSE
    )

    openxlsx::saveWorkbook(
      wb = wb,
      file = xlsxname,
      overwrite = TRUE
    )
  }

  return(
    list(summary, clre_top, clre_norms, tm_top, tm_norms)
  )
}
kimjam/srms documentation built on May 20, 2019, 10:21 p.m.