#' Merge to episode
#'
#' Merge information back to treatment episodes based on patient ID numbers and dates.
#'
#' @param df_episode data frame containing episode data - each row is expected to have
#' a unique treatment episode
#' @param df_info data frame containing baseline information related to specific
#' treatment episodes. Each row should have a unique collection date for each patient
#' @param key_var bare name of the key variable linking the two data frames - normally patient_id
#' @param start_date bare name of start episode variable in df_episode
#' @param eval_date bare name of collection date variable in df_info
#' @param post_start boolean flag to indicate whether information is entered after start
#' of episode - e.g. treatment outcome.
#'
#' @importFrom assertthat assert_that
#' @importFrom dplyr left_join mutate filter select arrange group_by ungroup
#' @importFrom rlang .data
#'
#' @return data frame with information attached to each treatment episode. Information
#' is selected based on date of collection such that the closest information prior to
#' the episode starting is selected
#'
#' @examples
#' \dontrun{
#' merge_to_episode(df_episode = start,
#' df_info = reg,
#' key_var = id,
#' start_date = sdate,
#' eval_date = reg_date)
#' }
#'
merge_to_episode <- function(df_episode,
df_info,
key_var,
start_date,
eval_date,
post_start = FALSE) {
. <- NULL
# check args
assertthat::assert_that(is.data.frame(df_episode),
is.data.frame(df_info),
is.logical(post_start))
# convert specific function args to strings for left_join
by_var <- deparse(substitute(key_var))
start_by_var <- deparse(substitute(start_date))
# remove additional variables from df_episode to avoid duplication during
# final merge
selected_episode <- df_episode[c(by_var, start_by_var)]
# merge two data frames
m <- dplyr::left_join(selected_episode,
df_info,
by = by_var)
# filter any information if before or after start date
if (post_start) {
m <- dplyr::filter(m, {{ start_date }} <= {{ eval_date }})
} else {
m <- dplyr::filter(m, {{ start_date }} >= {{ eval_date }})
}
m <- m %>%
# count how many days prior to start of episode information collected
dplyr::mutate(days = abs(as.numeric( {{ start_date}} - {{ eval_date }}))) %>%
dplyr::arrange({{ key_var }}, {{ start_date }}, .data$days) %>%
dplyr::group_by( {{ key_var }}, {{ start_date }}) %>%
# keep information which is closest to episode start
dplyr::slice(1) %>%
dplyr::ungroup() %>%
# merge remaining information back to original episode data
dplyr::left_join(df_episode, ., by = c(by_var, start_by_var)) %>%
dplyr::select(-.data$days)
# check that output has same rows as df_episode
assertthat::assert_that(nrow(m) == nrow(df_episode))
# check that no new rows have been generated
no_new_rows <- identical(unique(m[c(by_var, start_by_var)]),
unique(df_episode[c(by_var, start_by_var)]))
# generate warning if episode details don't match between input and output
if (! no_new_rows) {
warning("Treatment episode (key_var and start date) are different between input and output")
}
m
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.