hisreportr: hisreportr: Automating MSF OCA EMR reporting

Documented in outcome_by_followup_time

#' @title Cohort outcome by follow-up time
#' 
#' @description Calculate each patient's cohort status at any 
#' timepoint since treatment initiation by defining a reporting date
#' and follow-up time in months
#'
#' @param followup_month integer defining the follow-up time in months
#' @param df data frame of cohort status changes
#' @param reporting_date date defining the reporting date
#' @param start_var character defining the name of the start treatment variable
#' @param date_var character defining the status change date variable
#' @param id_var character defining the patient ID variable
#' @param status_var character definintg the status change variable
#'
#' @importFrom assertthat assert_that
#' @importFrom lubridate `%m-%`
#' @importFrom dplyr left_join


outcome_by_followup_time <- function(followup_month,
                                df,
                                reporting_date,
                                start_var,
                                date_var,
                                id_var, 
                                status_var) {
  
  # Check args ----
  assertthat::assert_that(is.numeric(followup_month),
                          length(followup_month) == 1,
                          is.data.frame(df),
                          is.character(start_var),
                          is.character(date_var),
                          is.character(id_var),
                          is.character(status_var))
  
  # Check that data is pre-filtered ----
  assertthat::assert_that(! all(df[[date_var]] > df[[start_var]]))
  
  # Generate output variable name
  fu_var_name <- paste0("fu_", followup_month, "_months")
  
  ## Keep records with sufficient follow up time ----
  fup <- df[df[[start_var]] <= 
                      lubridate::`%m-%`(reporting_date, 
                                        months(followup_month)), ]
  
  ## Remove records after time in months ----
  status_df <- fup[lubridate::`%m-%`(fup[[date_var]],
                                                months(followup_month)) <=
                                fup[[start_var]], ]
  
  # If no records remaining after filter return empty data frame ----
  if (nrow(status_df) == 0) {
    empty_df <- data.frame(a = unique(df[[id_var]]),
                           b = NA_character_,
                           stringsAsFactors = FALSE)
    names(empty_df) <- c(id_var, fu_var_name)  
    return(empty_df)
  }
  
  ## Split data frame to find last status in period
  full_status_lst <- split(status_df, status_df[[id_var]])
  
  ## Filter latest status for each ID
  final_status_lst <- lapply(full_status_lst, 
                             FUN = function(x) {
                               # retain latest status by date
                               y <- x[x[[date_var]] == max(x[[date_var]]), ]
                               # keep only 1 status per patient
                               y[1, ]
                             })
  
  ## Merge all ID statuses into data frame
  last_fu_df <- do.call(rbind, final_status_lst)
  
  # Keep last follow-up status ----
  # last_fu_df <- dplyr::ungroup(dplyr::top_n(filtered_df2, 1, {{ date_prom }}))
  
  # Remove variables ----
  last_fu_df[[date_var]] <- NULL
  last_fu_df[[start_var]] <- NULL
  
  # Check for duplicates ----
  assertthat::assert_that(length(last_fu_df[[id_var]]) == 
                            length(unique(last_fu_df[[id_var]])))
  
  # Rename output variable ----
  names(last_fu_df)[names(last_fu_df) == status_var] <- fu_var_name
  
  
  # Merge with starting IDs ----
  merged_df <- dplyr::left_join(df[id_var],
                                last_fu_df,
                                by = id_var)
  
  unique(merged_df)
}