R/prepare_hiv_report_data.R

Defines functions prepare_hiv_report_data

Documented in prepare_hiv_report_data

#' Prepare HIV report data
#'
#' @inheritParams prepare_tb_report_data
#'
#' @importFrom assertthat assert_that are_equal 
#' @importFrom dplyr left_join
#'

prepare_hiv_report_data <- function(x, 
                                    categorise_age = TRUE) {
  starttre <- NULL
  patient_id <- NULL
  visit_date <- NULL
  sample_date <- NULL
  assessment_date <- NULL
  
  # Check args ----
  assertthat::assert_that(is.list(x),
                          is.data.frame(x$hiv_start))
  
  # no missing start date or patient ID values
  assertthat::are_equal(sum(is.na(x$hiv_start$patient_id)), 0)
  
  # Impute missing start tx dates ----
  # use consult date where starttre is missing
  x$hiv_start$starttre <- as.Date(ifelse(is.na(x$hiv_start$starttre),
                                         x$hiv_start$consult_date,
                                         x$hiv_start$starttre), 
                                  origin = "1970-01-01")
  
  # remove consult date variable
  x$hiv_start$consult_date <- NULL
  
  # Clean and adjust age and gender ----
  # * clean age and gender ----
  age_gender_df <- clean_adm_demographics(x$hiv_characteristics)
  
  # * merge age/gender data with start treatment data ----
  age_gender_start <- dplyr::left_join(x$hiv_start, 
                        age_gender_df, 
                        by = "patient_id")
  
  # * adjust age according to start treatment date ----
  start_tx_age_gender <- recalculate_age_tx_start(age_gender_start, 
                           start_var = starttre, 
                           categorise = categorise_age)
  
  # Baseline WHO staging ----
  base_who_stage <- merge_to_episode(df_episode = x$hiv_start[, c("patient_id", "starttre")],
                                     df_info = x$hiv_who_stage,
                                     key_var = patient_id,
                                     start_date = starttre,
                                     eval_date = visit_date,
                                     post_start = FALSE)
  
  # Baseline CD4 count ----
  base_cd4 <- merge_to_episode(df_episode = x$hiv_start[, c("patient_id", "starttre")],
                               df_info = x$hiv_vl_cd4[, c("patient_id", "sample_date", "cd4")],
                               key_var = patient_id,
                               start_date = starttre,
                               eval_date = sample_date,
                               post_start = FALSE)
  
  # Baseline weight and height ----
  base_weight_height <- merge_to_episode(df_episode = x$hiv_start[, c("patient_id", "starttre")],
                                         df_info = x$hiv_weigth_height,
                                         key_var = patient_id,
                                         start_date = starttre, 
                                         eval_date = assessment_date,
                                         post_start = FALSE)

  # Baseline drug treatment regimen ----
  drug_tx <- drug_tx_by_visit(x$hiv_drug_tx)
  
  # use regimen information from closest post-treatment start data 
  base_drug_tx <- merge_to_episode(df_episode = x$hiv_start[, c("patient_id", "starttre")],
                                   df_info = drug_tx[, c("patient_id", "visit_date", "drug_tx_prescribed")],
                                   key_var = patient_id,
                                   start_date = starttre,
                                   eval_date = visit_date,
                                   post_start = TRUE)
  
  # Merge data frames ----
  # * Merge start & WHO stage ----
  m1 <- dplyr::left_join(start_tx_age_gender, 
                         base_who_stage,
                         by = c("patient_id", "starttre"))
  
  # * Merge with baseline CD4 count ----
  m2 <- dplyr::left_join(m1, 
                         base_cd4,
                         by = c("patient_id", "starttre"))
  
  # * Merge with weight and height ----
  m3 <- dplyr::left_join(m2, 
                         base_weight_height, 
                         by = c("patient_id", "starttre"))
  
  # Remove variables ----
  m3$visit_date <- NULL
  m3$sample_date <- NULL
  m3$adm_date <- NULL
  m3$age <- NULL
  
  # * Merge with baseline drug treatment ----
  m4 <- dplyr::left_join(m3,
                          base_drug_tx,
                          by = c("patient_id", "starttre"))
  
  m4$visit_date <- NULL
  
  # Create variables ----
  # * Start month ----
  m4$start_month <- factor(month.name[as.numeric(format(m4$starttre, "%m"))],
                            levels = month.name)
  
  # * BMI ----
  m4$bmi <- m4$weight / (m4$height / 100) ^ 2
  
  # * CD4 ----
  m4$cd4_factor <- cut(m4$cd4, breaks = c(0, 50, 200, 350, 500, max(m4$cd4, na.rm = TRUE)),
                       labels = c("0-50", "51-200", "201-350",
                                  "351-500", "\u2265 501"))
  
  # * New start treatment quarter variable ----
  m4$start_quarter <- lubridate::quarter(m4$starttre, with_year = TRUE)
  
  # Clean variables ----
  # * Convert to factors ----
  m4[hiv_levels$var_names] <- purrr::map2_df(.x = m4[hiv_levels$var_names],
                                              .y = hiv_levels$var_levels,
                                              .f = ~ factor(.x, 
                                                            levels = .y))
  
  # * BMI factor ----
  m4$bmi_factor <- factor(ifelse(m4$bmi >= 18.5, 0, 1),
                          levels = c(0, 1),
                          labels = c("\u2265 18.5", "< 18.5"))
  
  # * Drug treatment ----
  m4$drug_factor <- ifelse(is.na(m4$drug_tx_prescribed), 
                           NA_character_,
                           ifelse(m4$drug_tx_prescribed %in% core_hiv_formulations, 
                                  m4$drug_tx_prescribed, 
                                  "Other"))
  m4$drug_factor <- factor(m4$drug_factor, levels = core_hiv_formulations)
  
  
  # * Clean treatment status data ----
  x$hiv_status_changes <- clean_hiv_status(x$hiv_status_changes, 
                                           .drop = TRUE)
  
  # * Merge start treatment date with HIV status changes ----
  x$hiv_status_changes <- dplyr::left_join(x$hiv_status_changes,
                                           x$hiv_start[, c("patient_id", "starttre")],
                                           by = "patient_id")
  
  # * Clean VL data ----
  x$vl_results <- clean_cd4_vl(x$hiv_vl_cd4)
  
  # * Merge start treamtent date with HIV VL results ----
  x$vl_results <- dplyr::left_join(x$vl_results, 
                                   x$hiv_start[, c("patient_id", "starttre")],
                                   by = "patient_id")
  
  # Check for duplicate variable names ----
  assertthat::assert_that(isFALSE(any(grepl("\\.", names(m4)))))
  
  # Output list ----
  list(baseline = m4, 
       status_changes = x$hiv_status_changes, 
       vl_results = x$vl_results)
}
JayAchar/hisreportr documentation built on March 18, 2020, 5:57 a.m.