#' Prepare raw TB data
#'
#' @param x list of data frames generated by access_postgresql()
#' for TB reports
#' @param categorise_age logical argument to flag whether to add a categorised
#' age variable to simplify reporting
#'
#' @importFrom dplyr inner_join left_join
#' @importFrom assertthat assert_that
#' @importFrom purrr map2_df
prepare_tb_report_data <- function(x,
categorise_age = TRUE) {
patient_id <- NULL
starttre <- NULL
hx_review_date <- NULL
end_date <- NULL
assessment_date <- NULL
# Check arg ----
assertthat::assert_that(is.list(x),
is.data.frame(x$tb_start))
# no missing start date or patient ID values
assertthat::are_equal(sum(is.na(x$patient_id)), 0)
# Impute missing start tx dates ----
# use consult date where starttre is missing
x$tb_start$starttre <- as.Date(ifelse(is.na(x$tb_start$starttre),
x$tb_start$consult_date,
x$tb_start$starttre),
origin = "1970-01-01")
# remove consult date variable
x$tb_start$consult_date <- NULL
# Clean and adjust variables ----
# * Age and gender ----
age_gender_df <- clean_adm_demographics(x$tb_characteristics)
# * merge age/gender data with start treatment data ----
age_gender_start <- dplyr::left_join(x$tb_start,
age_gender_df,
by = "patient_id")
# * adjust age according to start treatment date ----
start_tx_age_gender <- recalculate_age_tx_start(age_gender_start,
start_var = starttre,
categorise = TRUE)
# * Baseline weight and height ----
base_weight_height <- merge_to_episode(df_episode = x$tb_start[, c("patient_id", "starttre")],
df_info = x$tb_weight_height,
key_var = patient_id,
start_date = starttre,
eval_date = assessment_date,
post_start = FALSE)
# * Baseline registration data ----
base_registration <- merge_to_episode(df_episode = x$tb_start[, c("patient_id", "starttre")],
df_info = x$tb_registration,
key_var = patient_id,
start_date = starttre,
eval_date = hx_review_date,
post_start = FALSE)
# * End of treatment outcome ----
end_outcome <- merge_to_episode(df_episode = x$tb_start[, c("patient_id", "starttre")],
df_info = x$tb_end,
key_var = patient_id,
start_date = starttre,
eval_date = end_date,
post_start = TRUE)
# Merge data frames ----
# * Merge start, registration & outcome ----
start_reg_out <- dplyr::left_join(base_registration,
end_outcome,
by = c("patient_id", "starttre"))
# * Merge adjusted age ----
start_reg_out_age <- dplyr::left_join(start_reg_out,
start_tx_age_gender,
by = c("patient_id", "starttre"))
# * Merge weight and height ----
start_reg_out_age_wh <- dplyr::left_join(start_reg_out_age,
base_weight_height,
by = c("patient_id", "starttre"))
# merge baseline data with tb treatment episode
# m1 <- merge_to_episode(df_episode = x$tb_start,
# df_info = x$tb_registration,
# key_var = patient_id,
# start_date = starttre,
# eval_date = hx_review_date,
# post_start = FALSE)
# merge end of treatment outcome with treatment episode
# m2 <- merge_to_episode(df_episode = x$tb_start,
# df_info = x$tb_end,
# key_var = patient_id,
# start_date = starttre,
# eval_date = end_date,
# post_start = TRUE)
# remove duplicate regimen variable
# m2$regimen <- NULL
# # merge age and gender with treatment episode df
# m4 <- dplyr::left_join(m3, age_gender, by = "patient_id")
#
# # recalculate baseline age at start of treatment episode
# m5 <- recalculate_age_tx_start(m4,
# start_var = starttre,
# categorise = categorise_age)
# Generate variables ----
new <- start_reg_out_age_wh
# * Convert variables to factors ----
new[tb_levels$var_names] <- purrr::map2_df(.x = new[tb_levels$var_names],
.y = tb_levels$var_levels,
.f = ~ factor(.x,
levels = .y))
# * Create start month variable ----
new$start_month <- factor(month.name[as.numeric(format(new$starttre, "%m"))],
levels = month.name)
attr(new$start_month, "label") <- "Treatment start month"
# * BMI ----
new$bmi <- new$weight / (new$height / 100) ^ 2
# * BMI factor ----
new$bmi_factor <- factor(ifelse(new$bmi >= 18.5, 0, 1),
levels = c(0, 1),
labels = c("\u2265 18.5", "< 18.5"))
# * New start treatment quarter and semester variable ----
new$start_quarter <- lubridate::quarter(new$starttre, with_year = TRUE)
new$start_semester <- lubridate::semester(new$starttre, with_year = TRUE)
# * Add reporting time ----
new_ds <- calculate_reporting_period(new, disease = "ds-tb")
new_ds_dr <- calculate_reporting_period(new_ds, disease = "dr-tb")
assertthat::assert_that(nrow(x$tb_start) == nrow(new_ds_dr))
new_ds_dr
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.