hisreportr: hisreportr: Automating MSF OCA EMR reporting

Documented in time_cohort_outcomes

#' @title Time cohort outcomes
#'
#' @param df data frame of treatment status over time
#' @param start_var character name of start treament date
#' @param time_months numeric vector defining which time cohorts to include
#' @param status_var character name of HIV status variable in data frame
#' @param id_var character name of ID variable in data frame
#' @param date_var character name of status change date variable in data frame
#' @param reporting_date date of reporting time - typically the first 
#' day of the reporting month 
#' @param convert_to_df flag defining whether to reduce the final output
#' from a list to a wide data frame - defaults to FALSE
#' 
#' @importFrom assertthat assert_that
#' @importFrom lubridate `%m-%` 
#' @importFrom purrr map reduce
#' 
#'
#' @return data frame where each row represents one patient
#' and HIV treatment status at specified time points are 
#' represented by variables
#'
#' @examples
#' \dontrun{
#' status_calculator(df = x,
#'                   time_months = c(6, 12, 24),
#'                   status_var = "hiv_status",
#'                   id_var = "id",
#'                   start_var = "starttre"
#'                   date_var = "date", 
#'                   reporting_date = lubridate::dmy("01/09/2010"),
#'                   convert_to_df = TRUE)
#' }

time_cohort_outcomes <- function(df,
                                 start_var,
                                 time_months,
                                 status_var,
                                 id_var,
                                 date_var,
                                 reporting_date,
                                 convert_to_df = FALSE) {
  
  # Check args ----
  assertthat::assert_that(is.data.frame(df),
                          is.numeric(time_months),
                          is.character(start_var),
                          is.character(status_var),
                          is.character(id_var),
                          is.character(date_var),
                          is.logical(convert_to_df))
  
  # Remove rows with no information ----
  df <- df[! is.na(df[[date_var]]), ]
  df <- df[! is.na(df[[status_var]]), ]
  df <- df[! is.na(df[[start_var]]), ]
  
  # Remove rows before start treatment date ----
  filtered_left <- df[df[[date_var]] >= df[[start_var]], ]
  
  # Remove rows after longest follow-up time defined in time_months ----
  filtered_both <- filtered_left[
    lubridate::`%m-%`(filtered_left[[date_var]], months(max(time_months))) <= 
      filtered_left[[start_var]], ]
  
  # Map outcomes over reporting months ----
  out_lst <- purrr::map(.x = time_months,
             .f = ~ outcome_by_followup_time(followup_month = .x, 
                                        df = filtered_both,
                                        reporting_date = reporting_date,
                                        start_var = start_var,
                                        date_var = date_var,
                                        id_var = id_var,
                                        status_var = status_var))
  
  # Reduce output list to data frame ----
  if (convert_to_df){

        purrr::reduce(out_lst, left_join, by = id_var)  
  
    } else {

    return(out_lst)
  }
  
}