R/prepare_tb_report_data.R

Defines functions prepare_tb_report_data

Documented in prepare_tb_report_data

#' Prepare raw TB data
#'
#' @param x list of data frames generated by access_postgresql()
#' for TB reports
#' @param categorise_age logical argument to flag whether to add a categorised 
#' age variable to simplify reporting
#'
#' @importFrom dplyr inner_join left_join
#' @importFrom assertthat assert_that
#' @importFrom purrr map2_df


prepare_tb_report_data <- function(x, 
                                   categorise_age = TRUE) {
  
  patient_id <- NULL
  starttre <- NULL
  hx_review_date <- NULL
  end_date <- NULL
  assessment_date <- NULL
  
  # Check arg ----
  assertthat::assert_that(is.list(x),
                          is.data.frame(x$tb_start))
  
  # no missing start date or patient ID values
  assertthat::are_equal(sum(is.na(x$patient_id)), 0)
  
  # Impute missing start tx dates ----
  # use consult date where starttre is missing
  x$tb_start$starttre <- as.Date(ifelse(is.na(x$tb_start$starttre),
                                x$tb_start$consult_date,
                                x$tb_start$starttre), 
                                origin = "1970-01-01")
  
  # remove consult date variable
  x$tb_start$consult_date <- NULL
  
  # Clean and adjust variables ----
  
  # * Age and gender ----
  age_gender_df <- clean_adm_demographics(x$tb_characteristics)
  
  # * merge age/gender data with start treatment data ----
  age_gender_start <- dplyr::left_join(x$tb_start, 
                                       age_gender_df, 
                                       by = "patient_id")
  
  # * adjust age according to start treatment date ----
  start_tx_age_gender <- recalculate_age_tx_start(age_gender_start, 
                                                  start_var = starttre, 
                                                  categorise = TRUE)
  
  # * Baseline weight and height ----
  base_weight_height <- merge_to_episode(df_episode = x$tb_start[, c("patient_id", "starttre")],
                                         df_info = x$tb_weight_height,
                                         key_var = patient_id,
                                         start_date = starttre, 
                                         eval_date = assessment_date,
                                         post_start = FALSE)
  
  # * Baseline registration data ----
  base_registration <- merge_to_episode(df_episode = x$tb_start[, c("patient_id", "starttre")],
                                        df_info = x$tb_registration,
                                        key_var = patient_id, 
                                        start_date = starttre,
                                        eval_date = hx_review_date, 
                                        post_start = FALSE)
  
  # * End of treatment outcome ----
  end_outcome <- merge_to_episode(df_episode = x$tb_start[, c("patient_id", "starttre")],
                                  df_info = x$tb_end,
                                  key_var = patient_id, 
                                  start_date = starttre,
                                  eval_date = end_date, 
                                  post_start = TRUE)
  
  # Merge data frames ----
  # * Merge start, registration & outcome ----
  start_reg_out <- dplyr::left_join(base_registration, 
                                    end_outcome,
                                    by = c("patient_id", "starttre"))
  
  # * Merge adjusted age ----
  start_reg_out_age <- dplyr::left_join(start_reg_out,
                                        start_tx_age_gender,
                                        by = c("patient_id", "starttre"))

  # * Merge weight and height ----
  start_reg_out_age_wh <- dplyr::left_join(start_reg_out_age,
                                           base_weight_height,
                                           by = c("patient_id", "starttre"))
  # merge baseline data with tb treatment episode
  # m1 <- merge_to_episode(df_episode = x$tb_start,
  #                        df_info = x$tb_registration,
  #                        key_var = patient_id, 
  #                        start_date = starttre,
  #                        eval_date = hx_review_date, 
  #                        post_start = FALSE)
  
  # merge end of treatment outcome with treatment episode
  # m2 <- merge_to_episode(df_episode = x$tb_start,
  #                        df_info = x$tb_end,
  #                        key_var = patient_id, 
  #                        start_date = starttre,
  #                        eval_date = end_date, 
  #                        post_start = TRUE)
  
  
  
  # remove duplicate regimen variable
  # m2$regimen <- NULL
  

  
  
  # # merge age and gender with treatment episode df
  # m4 <- dplyr::left_join(m3, age_gender, by = "patient_id")
  # 
  # # recalculate baseline age at start of treatment episode
  # m5 <- recalculate_age_tx_start(m4,
  #                                start_var = starttre,
  #                                categorise = categorise_age)
  
  # Generate variables ----
  new <- start_reg_out_age_wh
  
  # * Convert variables to factors ----
  new[tb_levels$var_names] <- purrr::map2_df(.x = new[tb_levels$var_names],
                                            .y = tb_levels$var_levels,
                                            .f = ~ factor(.x, 
                                                          levels = .y))
  
  # * Create start month variable ----
  new$start_month <- factor(month.name[as.numeric(format(new$starttre, "%m"))],
                           levels = month.name)
  
  attr(new$start_month, "label") <- "Treatment start month"
  
  # * BMI ----
  new$bmi <- new$weight / (new$height / 100) ^ 2
  
  # * BMI factor ----
  new$bmi_factor <- factor(ifelse(new$bmi >= 18.5, 0, 1),
                          levels = c(0, 1),
                          labels = c("\u2265 18.5", "< 18.5"))
  
  # * New start treatment quarter and semester variable ----
  new$start_quarter <- lubridate::quarter(new$starttre, with_year = TRUE)
  new$start_semester <- lubridate::semester(new$starttre, with_year = TRUE)
  
  # * Add reporting time ----
  new_ds <- calculate_reporting_period(new, disease = "ds-tb")
  
  new_ds_dr <- calculate_reporting_period(new_ds, disease = "dr-tb")
  
  
  
  
  assertthat::assert_that(nrow(x$tb_start) == nrow(new_ds_dr))
  
  new_ds_dr
}
  
  
  
  
  
  
  
  
JayAchar/hisreportr documentation built on March 18, 2020, 5:57 a.m.