R/recalculate_age_tx_start.R

Defines functions recalculate_age_tx_start

Documented in recalculate_age_tx_start

#' Adjust age to treatment episode
#' 
#' In the OCA EMR, age is recorded once when people are registered
#' in the database. Over time, people may initiate treatment for 
#' different diseases at different time points. Since age at treatment 
#' initiation is often required, the recorded value must be adjusted 
#' to represent the age when treatment starts. 
#' 
#' This function is designed to generate an additional variable, `age_recalculated`,
#' by adding time from registration to treatment initiation to the 
#' recorded age.
#'
#' @param x data frame including age at registration, treatment initiation date
#' and patient registration date
#' @param start_var define variable representing treatment initiation date
#' @param categorise logical argument to flag whether to add a categorised 
#' age variable to simplify reporting
#' 
#' @importFrom assertthat assert_that
#' @importFrom lubridate interval years
#'
#' @return data frame with additional variable `age_recalculated` alongside
#' recorded age

recalculate_age_tx_start <- function(x,
                                     start_var,
                                     categorise = TRUE) {
  
  # character string var of start_var
  start_var_chr <- deparse(substitute(start_var))
  
  # check args
  assertthat::assert_that(is.data.frame(x),
                          class(x[[start_var_chr]]) == "Date",
                          "adm_date" %in% names(x),
                          is.logical(categorise))
  
  # fix for age variable class == character
  if (is.character(x$age)) {
    x$age <- as.numeric(x$age)
  }
  
  # calculate years between age recorded date and episode start date
  x$duration <- lubridate::interval(x$adm_date,
                                    x[[start_var_chr]]) / lubridate::years(1)
  
  # message for ages that are registered after treatment initiation
  count_neg <- sum(x$duration < 0,
                   na.rm = TRUE)
  
  if (count_neg > 0) {
    message(paste0(count_neg, " patient ages were registered after treatment initiation"))
  }
  
  # add duration to recorded age
  count_na <- sum(is.na(x$duration))
  
  if (count_na > 0) {
    message(paste0(count_na, " patient ages could not be adjusted due to missing data"))
  }
  
  x$age_recalculated <- ifelse(is.na(x$duration), 
                               x$age,
                               x$age + x$duration)
  
  # categorise age variable
  if (categorise) {
    x$age_cat <- cut(x$age_recalculated,
                     breaks = c(0, 15, 45, max(x$age_recalculated, na.rm = TRUE) + 1),
                     labels = c(1:3))
      
    x$age_cat <- factor(x$age_cat,
                         levels = c(1:3),
                         labels = c("< 15y",
                                    "15-44y",
                                    "\u2265 45y"))
  }
  
  # tidy up
  x$duration <- NULL
  
  x
  
}
JayAchar/hisreportr documentation built on March 18, 2020, 5:57 a.m.