#' Prepare HIV report data
#'
#' @inheritParams prepare_tb_report_data
#'
#' @importFrom assertthat assert_that are_equal
#' @importFrom dplyr left_join
#'
prepare_hiv_report_data <- function(x,
categorise_age = TRUE) {
starttre <- NULL
patient_id <- NULL
visit_date <- NULL
sample_date <- NULL
assessment_date <- NULL
# Check args ----
assertthat::assert_that(is.list(x),
is.data.frame(x$hiv_start))
# no missing start date or patient ID values
assertthat::are_equal(sum(is.na(x$hiv_start$patient_id)), 0)
# Impute missing start tx dates ----
# use consult date where starttre is missing
x$hiv_start$starttre <- as.Date(ifelse(is.na(x$hiv_start$starttre),
x$hiv_start$consult_date,
x$hiv_start$starttre),
origin = "1970-01-01")
# remove consult date variable
x$hiv_start$consult_date <- NULL
# Clean and adjust age and gender ----
# * clean age and gender ----
age_gender_df <- clean_adm_demographics(x$hiv_characteristics)
# * merge age/gender data with start treatment data ----
age_gender_start <- dplyr::left_join(x$hiv_start,
age_gender_df,
by = "patient_id")
# * adjust age according to start treatment date ----
start_tx_age_gender <- recalculate_age_tx_start(age_gender_start,
start_var = starttre,
categorise = categorise_age)
# Baseline WHO staging ----
base_who_stage <- merge_to_episode(df_episode = x$hiv_start[, c("patient_id", "starttre")],
df_info = x$hiv_who_stage,
key_var = patient_id,
start_date = starttre,
eval_date = visit_date,
post_start = FALSE)
# Baseline CD4 count ----
base_cd4 <- merge_to_episode(df_episode = x$hiv_start[, c("patient_id", "starttre")],
df_info = x$hiv_vl_cd4[, c("patient_id", "sample_date", "cd4")],
key_var = patient_id,
start_date = starttre,
eval_date = sample_date,
post_start = FALSE)
# Baseline weight and height ----
base_weight_height <- merge_to_episode(df_episode = x$hiv_start[, c("patient_id", "starttre")],
df_info = x$hiv_weigth_height,
key_var = patient_id,
start_date = starttre,
eval_date = assessment_date,
post_start = FALSE)
# Baseline drug treatment regimen ----
drug_tx <- drug_tx_by_visit(x$hiv_drug_tx)
# use regimen information from closest post-treatment start data
base_drug_tx <- merge_to_episode(df_episode = x$hiv_start[, c("patient_id", "starttre")],
df_info = drug_tx[, c("patient_id", "visit_date", "drug_tx_prescribed")],
key_var = patient_id,
start_date = starttre,
eval_date = visit_date,
post_start = TRUE)
# Merge data frames ----
# * Merge start & WHO stage ----
m1 <- dplyr::left_join(start_tx_age_gender,
base_who_stage,
by = c("patient_id", "starttre"))
# * Merge with baseline CD4 count ----
m2 <- dplyr::left_join(m1,
base_cd4,
by = c("patient_id", "starttre"))
# * Merge with weight and height ----
m3 <- dplyr::left_join(m2,
base_weight_height,
by = c("patient_id", "starttre"))
# Remove variables ----
m3$visit_date <- NULL
m3$sample_date <- NULL
m3$adm_date <- NULL
m3$age <- NULL
# * Merge with baseline drug treatment ----
m4 <- dplyr::left_join(m3,
base_drug_tx,
by = c("patient_id", "starttre"))
m4$visit_date <- NULL
# Create variables ----
# * Start month ----
m4$start_month <- factor(month.name[as.numeric(format(m4$starttre, "%m"))],
levels = month.name)
# * BMI ----
m4$bmi <- m4$weight / (m4$height / 100) ^ 2
# * CD4 ----
m4$cd4_factor <- cut(m4$cd4, breaks = c(0, 50, 200, 350, 500, max(m4$cd4, na.rm = TRUE)),
labels = c("0-50", "51-200", "201-350",
"351-500", "\u2265 501"))
# * New start treatment quarter variable ----
m4$start_quarter <- lubridate::quarter(m4$starttre, with_year = TRUE)
# Clean variables ----
# * Convert to factors ----
m4[hiv_levels$var_names] <- purrr::map2_df(.x = m4[hiv_levels$var_names],
.y = hiv_levels$var_levels,
.f = ~ factor(.x,
levels = .y))
# * BMI factor ----
m4$bmi_factor <- factor(ifelse(m4$bmi >= 18.5, 0, 1),
levels = c(0, 1),
labels = c("\u2265 18.5", "< 18.5"))
# * Drug treatment ----
m4$drug_factor <- ifelse(is.na(m4$drug_tx_prescribed),
NA_character_,
ifelse(m4$drug_tx_prescribed %in% core_hiv_formulations,
m4$drug_tx_prescribed,
"Other"))
m4$drug_factor <- factor(m4$drug_factor, levels = core_hiv_formulations)
# * Clean treatment status data ----
x$hiv_status_changes <- clean_hiv_status(x$hiv_status_changes,
.drop = TRUE)
# * Merge start treatment date with HIV status changes ----
x$hiv_status_changes <- dplyr::left_join(x$hiv_status_changes,
x$hiv_start[, c("patient_id", "starttre")],
by = "patient_id")
# * Clean VL data ----
x$vl_results <- clean_cd4_vl(x$hiv_vl_cd4)
# * Merge start treamtent date with HIV VL results ----
x$vl_results <- dplyr::left_join(x$vl_results,
x$hiv_start[, c("patient_id", "starttre")],
by = "patient_id")
# Check for duplicate variable names ----
assertthat::assert_that(isFALSE(any(grepl("\\.", names(m4)))))
# Output list ----
list(baseline = m4,
status_changes = x$hiv_status_changes,
vl_results = x$vl_results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.