Nothing
#' @title Standardize data
#' @export
#' @family data
#' @description Standardize a tidy input dataset.
#' @details Users do not normally need to call this function.
#' It mainly serves exposes the indexing behavior of
#' studies and group levels to aid in interpreting
#' summary tables.
#' @return A standardized tidy data frame with one row per patient
#' and the following columns:
#' * `response`: continuous response/outcome variable. (Should be
#' change from baseline of an outcome of interest.)
#' * `study_label`: human-readable label of the study.
#' * `study`: integer study index with the max index equal to the
#' current study (at `study_reference`).
#' * `group_label`: human-readable group label (e.g. treatment arm name).
#' * `group`: integer group index with an index of 1 equal to the control
#' group (at `group_reference`).
#' * `patient_label`: original patient ID.
#' * `patient`: integer patient index.
#' * `covariate_*`: baseline covariate columns.
#' @param data A tidy data frame or `tibble` with the data.
#' @param response Character of length 1,
#' name of the column in `data` with the response/outcome variable.
#' `data[[response]]` must be a continuous variable,
#' and it *should* be the change from baseline of a
#' clinical endpoint of interest, as opposed to just
#' the raw response. Treatment differences
#' are computed directly from this scale, please supply
#' change from baseline unless you are absolutely certain
#' that treatment differences computed directly from
#' this quantity are clinically meaningful.
#' @param study Character of length 1,
#' name of the column in `data` with the study ID.
#' @param study_reference Atomic of length 1,
#' element of the `study` column that indicates
#' the current study.
#' (The other studies are historical studies.)
#' @param group Character of length 1,
#' name of the column in `data` with the group ID.
#' @param group_reference Atomic of length 1,
#' element of the `group` column that indicates
#' the control group.
#' (The other groups may be treatment groups.)
#' @param patient Character of length 1,
#' name of the column in `data` with the patient ID.
#' @param covariates Character vector of column names
#' in `data` with the columns with baseline covariates.
#' These can be continuous, categorical, or binary.
#' Regardless, `historicalborrow` derives the appropriate
#' model matrix.
#' @examples
#' data <- hb_sim_independent(n_continuous = 1, n_study = 2)$data
#' data <- dplyr::select(
#' data,
#' study,
#' group,
#' patient,
#' response,
#' tidyselect::everything()
#' )
#' colnames(data) <- c("trial", "arm", "subject", "change", "cov1", "cov2")
#' data$trial <- paste0("trial", data$trial)
#' data$arm <- paste0("arm", data$arm)
#' hb_data(
#' data = data,
#' response = "change",
#' study = "trial",
#' study_reference = "trial1",
#' group = "arm",
#' group_reference = "arm1",
#' patient = "subject",
#' covariates = c("cov1", "cov2")
#' )
hb_data <- function(
data,
response,
study,
study_reference,
group,
group_reference,
patient,
covariates
) {
true(is.data.frame(data))
true(response, !anyNA(.), is.character(.), length(.) == 1)
true(study, !anyNA(.), is.character(.), length(.) == 1)
true(group, !anyNA(.), is.character(.), length(.) == 1)
true(patient, !anyNA(.), is.character(.), length(.) == 1)
true(covariates, !anyNA(.), is.character(.))
true(group_reference, !anyNA(.), length(.) == 1)
true(study_reference, !anyNA(.), length(.) == 1)
true(dim(data), length(.) == 2, . > 0)
true(response %in% colnames(data))
true(study %in% colnames(data))
true(group %in% colnames(data))
true(patient %in% colnames(data))
true(all(covariates %in% colnames(data)))
true(study_reference %in% data[[study]])
true(group_reference %in% data[[group]])
true(is.numeric(data[[response]]))
true(data[[response]], is.atomic(.), is.vector(.))
true(data[[study]], is.atomic(.), is.vector(.), !anyNA(.))
true(data[[group]], is.atomic(.), is.vector(.), !anyNA(.))
true(data[[patient]], is.atomic(.), is.vector(.), !anyNA(.))
for (covariate in covariates) {
true(data[[covariate]], is.atomic(.), is.vector(.), !anyNA(.))
}
out <- tibble::tibble(
response = data[[response]],
study_label = data[[study]],
group_label = data[[group]],
patient_label = data[[patient]],
study = as_index_max(data[[study]], max = study_reference),
group = as_index_min(data[[group]], min = group_reference),
patient = as_index_min(data[[patient]])
)
for (x in covariates) {
name <- if_any(grepl("^covariate", x), x, paste0("covariate_", x))
out[[name]] <- data[[x]]
}
dplyr::arrange(out, study, group, patient)
}
as_index_min <- function(x, min = base::min(x)) {
levels <- c(min, setdiff(sort(unique(x)), min))
out <- as.integer(ordered(x, levels = levels))
out - min(out) + 1L
}
as_index_max <- function(x, max = base::max(x)) {
levels <- c(setdiff(sort(unique(x)), max), max)
out <- as.integer(ordered(x, levels = levels))
out - min(out) + 1L
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.