Nothing
#' Derive LOCF (Last Observation Carried Forward) Records
#'
#' Adds LOCF records as new observations for each 'by group' when the dataset
#' does not contain observations for missed visits/time points and when analysis
#' value is missing.
#'
#' @param dataset
#' `r roxygen_param_dataset(expected_vars = c("by_vars", "analysis_var", "order", "keep_vars"))`
#'
#' @param dataset_ref Expected observations dataset
#'
#' Data frame with all the combinations of `PARAMCD`, `PARAM`, `AVISIT`,
#' `AVISITN`, ... which are expected in the dataset is expected.
#'
#'
#' @param by_vars Grouping variables
#'
#' For each group defined by `by_vars` those observations from `dataset_ref`
#' are added to the output dataset which do not have a corresponding observation
#' in the input dataset or for which `analysis_var` is `NA` for the corresponding observation
#' in the input dataset.
#'
#' `r roxygen_param_by_vars()`
#'
#'
#' @param id_vars_ref Grouping variables in expected observations dataset
#'
#' The variables to group by in `dataset_ref` when determining which observations should be
#' added to the input dataset.
#'
#' `r roxygen_param_by_vars()`
#'
#' @default All the variables in `dataset_ref`
#'
#'
#' @param analysis_var Analysis variable.
#'
#' @permitted a variable
#'
#' @param imputation Select the mode of imputation:
#'
#' `add`: Keep all original records and add imputed records for missing
#' timepoints and missing `analysis_var` values from `dataset_ref`.
#'
#' `update`: Update records with missing `analysis_var` and add imputed records
#' for missing timepoints from `dataset_ref`.
#'
#' `update_add`: Keep all original records, update records with missing `analysis_var`
#' and add imputed records for missing timepoints from `dataset_ref`.
#'
#'
#' @permitted One of these 3 values: `"add"`, `"update"`, `"update_add"`
#'
#' @param order Sort order
#'
#' The dataset is sorted by `order` before carrying the last observation
#' forward (e.g. `AVAL`) within each `by_vars`.
#'
#' `r roxygen_order_na_handling()`
#'
#' @param keep_vars Variables that need carrying the last observation forward
#'
#' Keep variables that need carrying the last observation forward other than `analysis_var`
#' (e.g., `PARAMN`, `VISITNUM`). If by default `NULL`, only variables specified in
#' `by_vars` and `analysis_var` will be populated in the newly created records.
#'
#' @author G Gayatri
#'
#' @details For each group (with respect to the variables specified for the
#' by_vars parameter) those observations from `dataset_ref` are added to
#' the output dataset
#' - which do not have a corresponding observation in the input dataset or
#' - for which `analysis_var` is `NA` for the corresponding observation in the input dataset.
#'
#' For the new observations, `analysis_var` is set to the non-missing `analysis_var` of the
#' previous observation in the input dataset (when sorted by `order`) and
#' `DTYPE` is set to "LOCF".
#'
#' The `imputation` argument decides whether to update the existing observation when
#' `analysis_var` is `NA` (`"update"` and `"update_add"`), or to add a new observation from
#' `dataset_ref` instead (`"add"`).
#'
#' @return The input dataset with the new "LOCF" observations added for each
#' `by_vars`, based on the value passed to the `imputation` argument.
#'
#' @keywords der_prm_bds_findings
#' @family der_prm_bds_findings
#'
#' @export
#'
#' @examples
#'
#' library(dplyr)
#' library(tibble)
#'
#' advs <- tribble(
#' ~STUDYID, ~USUBJID, ~VSSEQ, ~PARAMCD, ~PARAMN, ~AVAL, ~AVISITN, ~AVISIT,
#' "CDISC01", "01-701-1015", 1, "PULSE", 1, 65, 0, "BASELINE",
#' "CDISC01", "01-701-1015", 2, "DIABP", 2, 79, 0, "BASELINE",
#' "CDISC01", "01-701-1015", 3, "DIABP", 2, 80, 2, "WEEK 2",
#' "CDISC01", "01-701-1015", 4, "DIABP", 2, NA, 4, "WEEK 4",
#' "CDISC01", "01-701-1015", 5, "DIABP", 2, NA, 6, "WEEK 6",
#' "CDISC01", "01-701-1015", 6, "SYSBP", 3, 130, 0, "BASELINE",
#' "CDISC01", "01-701-1015", 7, "SYSBP", 3, 132, 2, "WEEK 2"
#' )
#'
#' # A dataset with all the combinations of PARAMCD, PARAM, AVISIT, AVISITN, ...
#' # which are expected.
#' advs_expected_obsv <- tribble(
#' ~PARAMCD, ~AVISITN, ~AVISIT,
#' "PULSE", 0, "BASELINE",
#' "PULSE", 6, "WEEK 6",
#' "DIABP", 0, "BASELINE",
#' "DIABP", 2, "WEEK 2",
#' "DIABP", 4, "WEEK 4",
#' "DIABP", 6, "WEEK 6",
#' "SYSBP", 0, "BASELINE",
#' "SYSBP", 2, "WEEK 2",
#' "SYSBP", 4, "WEEK 4",
#' "SYSBP", 6, "WEEK 6"
#' )
#'
#' # Example 1: Add imputed records for missing timepoints and for missing
#' # `analysis_var` values (from `dataset_ref`), keeping all the original records.
#' derive_locf_records(
#' dataset = advs,
#' dataset_ref = advs_expected_obsv,
#' by_vars = exprs(STUDYID, USUBJID, PARAMCD),
#' imputation = "add",
#' order = exprs(AVISITN, AVISIT),
#' keep_vars = exprs(PARAMN)
#' ) |>
#' arrange(USUBJID, PARAMCD, AVISIT)
#'
#'
#' # Example 2: Add imputed records for missing timepoints (from `dataset_ref`)
#' # and update missing `analysis_var` values.
#' derive_locf_records(
#' dataset = advs,
#' dataset_ref = advs_expected_obsv,
#' by_vars = exprs(STUDYID, USUBJID, PARAMCD),
#' imputation = "update",
#' order = exprs(AVISITN, AVISIT),
#' ) |>
#' arrange(USUBJID, PARAMCD, AVISIT)
#'
#'
#' # Example 3: Add imputed records for missing timepoints (from `dataset_ref`) and
#' # update missing `analysis_var` values, keeping all the original records.
#' derive_locf_records(
#' dataset = advs,
#' dataset_ref = advs_expected_obsv,
#' by_vars = exprs(STUDYID, USUBJID, PARAMCD),
#' imputation = "update_add",
#' order = exprs(AVISITN, AVISIT),
#' ) |>
#' arrange(USUBJID, PARAMCD, AVISIT)
derive_locf_records <- function(dataset,
dataset_ref,
by_vars,
id_vars_ref = NULL,
analysis_var = AVAL,
imputation = "add",
order,
keep_vars = NULL) {
# Input Checking
analysis_var <- assert_symbol(enexpr(analysis_var))
# Check if input parameters is a valid list of variables
assert_vars(by_vars, optional = TRUE)
assert_vars(keep_vars, optional = TRUE)
assert_vars(id_vars_ref, optional = TRUE)
assert_expr_list(order)
imputation <-
assert_character_scalar(
imputation,
values = c("add", "update", "update_add"),
case_sensitive = FALSE
)
# Check by_vars and order variables in input datasets
assert_data_frame(dataset_ref)
assert_data_frame(
dataset,
required_vars = expr_c(
by_vars, analysis_var, extract_vars(order), keep_vars,
chr2vars(colnames(dataset_ref))
)
)
# Setting id_vars_ref to all the variables of dataset_ref when not specified by user #
if (is.null(id_vars_ref)) {
id_vars_ref <- lapply(names(dataset_ref), sym)
}
# Prepping 'dataset_ref'
# Get the IDs from input dataset for which the expected observations are to be added
ids <- dataset %>%
select(!!!setdiff(by_vars, chr2vars(colnames(dataset_ref)))) %>%
distinct()
exp_obsv <- ids %>%
crossing(dataset_ref)
# Add LOCF records
# Get the variable names to join by
exp_obs_by_vars <- as.character(union(by_vars, id_vars_ref))
tmp_missing_avar <- get_new_tmp_var(exp_obsv, prefix = "tmp_missing_avar")
# Flag the original missing analysis_var records
dataset <- dataset %>%
mutate(!!tmp_missing_avar := if_else(is.na(!!analysis_var), "missing", NA_character_))
# Get unique combination of visits/timepoints per parameter per subject
# from the input dataset
advs_unique_original <- dataset %>%
filter(!(is.na(!!analysis_var))) %>%
select(all_of(exp_obs_by_vars)) %>%
distinct()
tmp_dtype <- get_new_tmp_var(exp_obsv, prefix = "tmp_dtype")
tmp_new_records <- get_new_tmp_var(exp_obsv, prefix = "tmp_new_records")
# Get the missing analysis_var (e.g., AVAL) records
if (imputation %in% c("add", "update_add")) {
aval_missing <- dataset %>%
filter(is.na(!!analysis_var)) %>%
remove_tmp_vars()
} else {
aval_missing <- NULL
}
if (imputation %in% c("update", "update_add")) {
data_fill <- dataset
# Get all the expected observations that are to be added to the input dataset
exp_obsv_to_add <- exp_obsv %>%
anti_join(dataset, by = exp_obs_by_vars) %>%
mutate(!!tmp_new_records := "new")
} else {
# Get the records with missing analysis_var (e.g., AVAL) to impute
data_fill <- dataset %>%
filter(!is.na(!!analysis_var))
# Get all the expected observations that are to be added to the input dataset
exp_obsv_to_add <- exp_obsv %>%
anti_join(advs_unique_original, by = exp_obs_by_vars) %>%
mutate(!!tmp_new_records := "new")
}
# Add the expected observations to the input dataset
# Arrange the dataset by 'order' and group it by 'by_vars'
# Use fill() to fill the 'analysis_var' from the previous observation for the newly
# added records
aval_locf <- bind_rows(data_fill, exp_obsv_to_add) %>%
mutate(!!tmp_dtype := if_else(is.na(!!analysis_var), "LOCF", NA_character_))
if ("DTYPE" %in% colnames(aval_locf)) {
aval_locf <- aval_locf %>%
mutate(DTYPE = if_else(!!tmp_dtype == "LOCF", "LOCF", DTYPE, missing = DTYPE)) %>%
select(-!!tmp_dtype)
} else {
aval_locf <- rename(aval_locf, DTYPE = !!tmp_dtype)
}
aval_locf <- aval_locf %>%
arrange(!!!by_vars, !!!order) %>%
group_by(!!!by_vars) %>%
fill(!!analysis_var, !!!keep_vars) %>%
ungroup() %>%
filter(!(!is.na(!!tmp_missing_avar) & is.na(!!tmp_new_records) & is.na(DTYPE))) %>%
remove_tmp_vars()
# When imputation = 'add', keep all variables other than analysis_var, by_vars,
# order, id_vars_ref, keep_vars and 'DTYPE' missing. Else, keep all variables populated
if (imputation == "add") {
# Non-imputed records
non_locf <- aval_locf %>%
filter(!(DTYPE %in% c("LOCF")))
# imputed records
locf <- aval_locf %>%
filter(DTYPE %in% c("LOCF"))
aval_locf <- locf %>%
mutate(across(
.cols = -as.character(c(analysis_var, by_vars, order, id_vars_ref, keep_vars, "DTYPE")),
.fns = ~ vector(typeof(.), 1)[NA]
)) %>%
bind_rows(non_locf)
}
# Output dataset:
# If imputation == 'add' or 'update_add', add the missing analysis_var records
# with non-missing + newly added LOCF records
# If imputation == 'update', keep non-missing + newly added LOCF records
bind_rows(aval_locf, aval_missing)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.