hisreportr: hisreportr: Automating MSF OCA EMR reporting

Documented in drug_tx_by_visit

#' Drug treatment by visit
#' 
#' Treatment prescribed at any specific clinic visit may be represented on one
#'  row where either one drug is prescribed, or where a FDC is prescribed, or over
#'  more than one row where multiple seperate drugs are prescribred. This function 
#'  takes single or multi-line data, converts to one line per patient clinic visit 
#'  and generates drug abbreviations. 
#'
#' @param x data frame containing drug prescription data - checked with HIV and HCV
#' treatment data
#'
#' @return data frame with patient_id, visit_date, tx_decision and tx_prescribed containing
#' drugs started during a particular visit. 
#' 
#' @importFrom assertthat assert_that
#' @importFrom dplyr select %>% group_by ungroup mutate slice distinct
#' @importFrom purrr map
#' @importFrom stringr str_extract_all str_flatten
#' @importFrom rlang .data
#'

drug_tx_by_visit <- function(x) {
  
  # check args
  assertthat::assert_that(is.data.frame(x),
                          all(c("patient_id", "visit_date", "tx_prescribed") %in% names(x)))
  
  # Might be easier to keep all treatment decisions and then link to timepoints later
  # keep visits where treatment was initiated
  x_start <- x[x$tx_decision %in% c(
    "Initiation/ Re-initiation",
    "Modification of regimen",
    "Modification due to adverse event",
    "Modification due to other reasons"
  ) | is.na(x$tx_decision), ]
  
  # remove duplicate id, date and tx_prescribed
  x_dup_removed <- dplyr::distinct(x_start, 
                                   .data$patient_id, .data$visit_date, .data$tx_prescribed, 
                                   .keep_all = TRUE)
  
  # remove '(FDC)' from all rows
  x_dup_removed$tx_clean <- gsub(" \\(FDC\\)", x_dup_removed$tx_prescribed, replacement = "")
  
  
  # extract all drug names from within brackets
  x_dup_removed$temp1 <- stringr::str_extract(x_dup_removed$tx_clean, pattern = "(?<=\\().+?(?=\\))")
  
  # use bare capitalised drugs where not inside brackets
  x_dup_removed$temp2 <- ifelse(is.na(x_dup_removed$temp1), x_dup_removed$tx_clean, x_dup_removed$temp1)
  
  # convert drug names to factor to help with ordering
  y <- x_dup_removed %>% 
    dplyr::group_by(.data$patient_id, .data$visit_date) %>% 
    dplyr::mutate(temp2 = factor(.data$temp2, levels = drug_formulations))
  
  # split to help with ordering
  y_lst <- split(y, list(y$patient_id, y$visit_date), drop = TRUE)
  
  # apply ordering using hiv_formulations (internal data)
  y_lst_ordered <- lapply(y_lst, FUN = function(x) x[order(x$temp2), ])
  
  # rbind ordered list
  y_ordered <- do.call(rbind, y_lst_ordered)
  
  # merge grouped treatment information
  y_out <- dplyr::mutate(y_ordered, drug_tx_prescribed = paste0(.data$temp2, collapse = "/")) 
  
  y_out$temp1 <- NULL
  y_out$temp2 <- NULL
  y_out$tx_prescribed <- NULL
  y_out$tx_clean <- NULL
  y_out$tx_decision <- NULL
  
  # return one row per patient per visit
  y_out %>% dplyr::slice(1) %>% dplyr::ungroup()
  
}