Nothing
#' Build specifications for covariate effect simulation/visualization
#'
#' @name build_spec_coveff
#' @inheritParams sim_coveff
#' @param n_sigfig Number of significant figures to form value_label of
#' continuous variables. See [gt::vec_fmt_number()] for details.
#' @param use_seps Whether to use separators for thousands in printing numbers.
#' See [gt::vec_fmt_number()] for details.
#' @param drop_trailing_dec_mark Whether to drop the trailing decimal mark
#' (".") in value_label of continuous variables. See [gt::vec_fmt_number()]
#' for details.
#'
#' @examplesIf BayesERtools:::.if_run_ex_coveff()
#' \donttest{
#' data(d_sim_binom_cov_hgly2)
#'
#' ermod_bin <- dev_ermod_bin(
#' data = d_sim_binom_cov_hgly2,
#' var_resp = "AEFLAG",
#' var_exposure = "AUCss_1000",
#' var_cov = c("BHBA1C_5", "RACE"),
#' )
#'
#' spec_coveff <- build_spec_coveff(ermod_bin)
#' plot_coveff(ermod_bin, spec_coveff = spec_coveff)
#' }
#'
NULL
#' @export
#' @rdname build_spec_coveff
#' @return
#' `spec_coveff` (return object) is a data frame for the specification
#' of the covariate effects to be visualized. This is internally generated by
#' [build_spec_coveff()] if you run [sim_coveff()] or [plot_coveff()]
#' directly. Alternatively, you can develop your own or modify the one
#' generated by [build_spec_coveff()] and supply it to [sim_coveff()] or
#' [plot_coveff()]. The data frame should have the following columns (but
#' it's probably easier to try [build_spec_coveff()] and see the structure):
#'
#' - `var_order`: The order of the covariate in the forest plot. The exposure
#' variable is always the first one and the covariates are ordered by the
#' order they are supplied in the `var_cov` argument of the `dev_ermod_*`
#' function. If you used a model from [dev_ermod_bin_cov_sel()], then the
#' order is determined by the variable selection process.
#' - `var_name`: The name of the variable.
#' - `var_label`: The label of the variable to be used for plot.
#' This is the same as `var_name` by default.
#' - `value_order`: The order of the value of the variable to be evaluated.
#' - `value_annot`: The annotation of the value of the variable to be
#' evaluated. This appears on the right hand side of the forest plot.
#' - `value_label`: The label of the value of the variable to be evaluated.
#' - `value_cont`: The value for continuous variables.
#' - `value_cat`: The value for categorical variables.
#' - `is_ref_value`: Whether the value is the reference value.
#' - `show_ref_value`: Whether to show the reference value in the plot and
#' table. This is TRUE by default for is_ref_value == TRUE, otherwise NA
#' (and ignored).
#' - `is_covariate`: Whether the variable is a covariate (TRUE) or exposure
#' variable (FALSE).
#'
build_spec_coveff <- function(
ermod,
data = NULL,
qi_width_cov = 0.9,
n_sigfig = 3,
use_seps = TRUE,
drop_trailing_dec_mark = TRUE) {
stopifnot(inherits(ermod, "ermod"))
if (is.null(data)) {
data <- ermod$data
}
stopifnot(is.data.frame(data))
var_exposure <- extract_var_exposure(ermod)
var_cov <- extract_var_cov(ermod)
check_data_columns(
data = data,
var_exposure = var_exposure,
var_cov = var_cov
)
data <- data |>
dplyr::select(
dplyr::all_of(var_exposure),
dplyr::all_of(var_cov)
)
spec_var_exp <-
calc_summary_col(
data[[var_exposure]],
col_name = var_exposure, var_order = 1, qi_width_cov = qi_width_cov
) |>
dplyr::mutate(is_covariate = FALSE)
spec_var_cov <-
purrr::pmap(list(
dplyr::select(data, dplyr::all_of(var_cov)),
var_cov, seq_along(var_cov) + 1
), function(.x, .y, .z) {
calc_summary_col(.x, .y, .z,
qi_width_cov = qi_width_cov,
n_sigfig = n_sigfig, use_seps = use_seps,
drop_trailing_dec_mark = drop_trailing_dec_mark
)
}) |>
purrr::list_rbind() |>
dplyr::mutate(is_covariate = TRUE)
# Create dummy value_cat column when there were no categorical covariates
if (!"value_cat" %in% colnames(spec_var_cov)) {
spec_var_cov <- spec_var_cov |>
dplyr::mutate(value_cat = NA_character_)
}
spec_coveff <-
dplyr::bind_rows(spec_var_exp, spec_var_cov) |>
dplyr::mutate(
var_label = var_name, show_ref_value = ifelse(is_ref_value, TRUE, NA)
) |>
dplyr::select(
var_order, var_name, var_label, value_order, value_annot, value_label,
value_cont, value_cat, is_ref_value, show_ref_value, is_covariate
)
return(spec_coveff)
}
calc_summary_col <- function(
x, col_name, var_order, qi_width_cov = 0.9,
n_sigfig = 3, use_seps = TRUE,
drop_trailing_dec_mark = TRUE) {
if (is.numeric(x)) {
calc_summary_col_cont(x,
qi_width_cov = qi_width_cov,
n_sigfig = n_sigfig, use_seps = use_seps,
drop_trailing_dec_mark = drop_trailing_dec_mark
) |>
dplyr::mutate(var_name = col_name, var_order = var_order)
} else {
calc_summary_col_categ(x) |>
dplyr::mutate(var_name = col_name, var_order = var_order)
}
}
calc_summary_col_cont <- function(
x, qi_width_cov = 0.9, n_sigfig = 3,
use_seps = TRUE, drop_trailing_dec_mark = TRUE) {
rlang::check_installed("gt")
summary_col_cont <-
dplyr::tibble(
value_cont = c(
as.numeric(stats::quantile(x, probs = 0.5 - qi_width_cov / 2)),
stats::median(x),
as.numeric(stats::quantile(x, probs = 0.5 + qi_width_cov / 2))
),
value_order = 1:3,
value_annot = get_val_annot_cont(qi_width_cov),
is_ref_value = c(FALSE, TRUE, FALSE)
) |>
dplyr::mutate(value_label = gt::vec_fmt_number(value_cont,
n_sigfig = n_sigfig,
use_seps = use_seps
))
# For some reason drop_trailing_dec_mark is not working, so manually
# remove trailing decimal mark
if (drop_trailing_dec_mark) {
summary_col_cont <-
summary_col_cont |>
dplyr::mutate(value_label = sub("\\.$", "", value_label))
}
}
get_val_annot_cont <- function(qi_width_cov = 0.9) {
c(
paste0(
as.character(round((0.5 - qi_width_cov / 2) * 100, digits = 2)), "th"
),
"median",
paste0(
as.character(round((0.5 + qi_width_cov / 2) * 100, digits = 2)), "th"
)
)
}
calc_summary_col_categ <- function(x) {
# Replicating forcats::fct_infreq
x_fct_ord <- factor(x, levels = names(sort(table(x), decreasing = TRUE)))
dplyr::tibble(value_cat = levels(x_fct_ord)) |>
dplyr::mutate(
value_order = dplyr::row_number(),
value_annot = paste0(add_ordinal_suffix(value_order), " freq"),
is_ref_value = value_order == 1
) |>
dplyr::mutate(value_label = value_cat)
}
add_ordinal_suffix <- function(x) {
paste0(
x,
c("th", "st", "nd", "rd", rep("th", 6))[1 + x %% 10 * !x %% 100 == 11]
)
}
# TODO
# edit_spec_coveff_raw(var_name, values_vec) function to replace the spec
# edit_spec_coveff_summary(var_name, value_cont, value_cat, value_order,
# value_label (optional), value_annot (optional?), is_ref_value, is_covariate)
# function to replace the spec
#' Customize specifications for covariate effect simulations/visualizations
#'
#' @description
#' - [build_spec_coveff_one_variable()] is a helper function to create a new
#' specification for a single variable. This is useful when you want to
#' customize the specification for a single variable.
#' - [replace_spec_coveff()] is used to replace the specification for some (or
#' all) variables in the original specification data frame. If you want to
#' replace multiple variables, you can just stack the specifications
#' together.
#'
#' @export
#' @name edit_spec_coveff
#' @inheritParams build_spec_coveff
#' @param var_name The name of the variable for which a new spec is to be
#' created.
#' @param values_vec The vector of the values for creating a new spec.
#' @param var_label The label of the variable to be used for plot. If NULL
#' (default), it is set to `var_name`.
#' @param show_ref_value Whether to show the reference value in the plot and
#' table. Setting this results in the `show_ref_value` column in the
#' specification data frame.
#' @return See [build_spec_coveff()] for the structure of the return object.
#' [build_spec_coveff_one_variable()] returns a data frame corresponding to
#' the specification for a single variable, which can be used as an input to
#' [replace_spec_coveff()].
#'
#' @examplesIf BayesERtools:::.if_run_ex_coveff()
#' \donttest{
#' set.seed(1234)
#' data(d_sim_binom_cov_hgly2)
#'
#' ermod_bin <- suppressWarnings(dev_ermod_bin(
#' data = d_sim_binom_cov_hgly2, var_resp = "AEFLAG",
#' var_exposure = "AUCss_1000", var_cov = c("BGLUC", "RACE"),
#' verbosity_level = 0,
#' # Below option to make the example run fast
#' chains = 2, iter = 1000
#' ))
#'
#' spec_coveff <- build_spec_coveff(ermod_bin)
#' spec_new_bgluc <- build_spec_coveff_one_variable(
#' "BGLUC", seq(4, 8, by = 0.1),
#' var_label = "Baseline Glucose (mmol/L)",
#' qi_width_cov = 0.8, show_ref_value = FALSE
#' )
#' spec_coveff_new <- replace_spec_coveff(spec_coveff, spec_new_bgluc)
#' plot_coveff(ermod_bin, spec_coveff = spec_coveff_new)
#' }
#'
build_spec_coveff_one_variable <- function(
var_name, values_vec,
var_label = NULL,
qi_width_cov = 0.9, n_sigfig = 3, use_seps = TRUE,
drop_trailing_dec_mark = TRUE, show_ref_value = TRUE) {
stopifnot(is.vector(values_vec))
spec_one_var <-
calc_summary_col(values_vec, var_name,
var_order = 1,
qi_width_cov = qi_width_cov,
n_sigfig = n_sigfig, use_seps = use_seps,
drop_trailing_dec_mark = drop_trailing_dec_mark
) |>
dplyr::mutate(
var_label = var_name,
show_ref_value = ifelse(is_ref_value, show_ref_value, NA)
) |>
dplyr::select(
var_name, var_label, value_order, value_annot, value_label,
dplyr::any_of(c("value_cont", "value_cat")), is_ref_value, show_ref_value
)
if (!is.null(var_label)) spec_one_var$var_label <- var_label
return(spec_one_var)
}
#' @export
#' @name edit_spec_coveff
#' @param spec_orig Original specification data frame.
#' @param spec_new New specification data frame. It can be generated by
#' [build_spec_coveff_one_variable()] or manually crafting with the
#' following variables: `var_name`, `var_label`, `value_order`, `value_annot`,
#' `value_label`, `value_cont` or `value_cat`, `is_ref_value`,
#' `show_ref_value`. You can have multiple variables stacked together.
#' @param replace_ref_value Whether to replace the reference values from the
#' original specification data frame. Default is FALSE; in this case,
#' show_ref_value is set to FALSE as it can be confusing.
#' If you set replace_ref_value to TRUE,
#' the reference calculation for the forest plot is also done with the one
#' in spec_new.
replace_spec_coveff <- function(
spec_orig, spec_new,
replace_ref_value = FALSE) {
stopifnot(inherits(spec_orig, "data.frame"))
stopifnot(inherits(spec_new, "data.frame"))
# Check columns
## Check if value_cont and/or value_cat columns are present
value_col_to_expect <- c()
if ("value_cont" %in% colnames(spec_new)) {
value_col_to_expect <- c(value_col_to_expect, "value_cont")
}
if ("value_cat" %in% colnames(spec_new)) {
value_col_to_expect <- c(value_col_to_expect, "value_cat")
}
if (length(value_col_to_expect) == 0) {
stop("At least one of value_cont or value_cat columns must be present.")
}
## Make sure spec_new has the following columns
col_names_expect_spec_new <-
c(
"var_name", "var_label", "value_order", "value_annot", "value_label",
value_col_to_expect, "is_ref_value", "show_ref_value"
)
cond1 <- all(col_names_expect_spec_new %in% colnames(spec_new))
cond2 <- all(colnames(spec_new) %in% col_names_expect_spec_new)
if (!(cond1 && cond2)) {
stop(
"`spec_new` expected to have the following columns:\n",
paste0(col_names_expect_spec_new, collapse = ", "),
".\nThe following columns are found:\n",
paste0(colnames(spec_new), collapse = ", "),
# Show the difference
".\nDifference: ",
paste0(
dplyr::symdiff(col_names_expect_spec_new, colnames(spec_new)),
collapse = ", "
)
)
}
# Check var_name
## Make sure all var_name in spec_new exist in spec_orig
if (any(!spec_new$var_name %in% spec_orig$var_name)) {
## Which var_name in spec_new does not exist in spec_orig?
var_name_mismatch <-
spec_new$var_name[!spec_new$var_name %in% spec_orig$var_name] |>
unique()
stop(
"Following var_name in spec_new do not exist in spec_orig: ",
paste0(var_name_mismatch, collapse = ", ")
)
}
map_name_label <-
spec_new |>
dplyr::distinct(var_name, var_label)
if (nrow(map_name_label) != length(unique(spec_new$var_name))) {
stop("var_label should be the same for the same var_name.")
}
add_to_spec_new <- spec_orig |>
dplyr::filter(var_name %in% spec_new$var_name) |>
dplyr::select(var_order, var_name, is_covariate) |>
dplyr::distinct()
if (replace_ref_value) {
spec_new_2 <-
spec_new |>
dplyr::left_join(add_to_spec_new, by = "var_name")
spec_updated <-
spec_orig |>
dplyr::filter(!var_name %in% spec_new$var_name) |>
dplyr::bind_rows(spec_new_2) |>
dplyr::arrange(var_order, value_order)
} else {
spec_keep <- spec_orig |>
dplyr::filter(!var_name %in% spec_new$var_name)
spec_ref_orig <- spec_orig |>
dplyr::filter(is_ref_value, var_name %in% spec_new$var_name) |>
dplyr::select(
var_name, value_order, value_annot, value_label,
dplyr::any_of(c("value_cont", "value_cat")), is_ref_value,
show_ref_value
) |>
dplyr::left_join(map_name_label, by = "var_name") |>
dplyr::mutate(
show_ref_value = FALSE,
value_annot = NA
)
spec_new_2 <-
spec_new |>
dplyr::arrange(value_order) |>
dplyr::mutate(is_ref_value = FALSE, show_ref_value = NA)
spec_updated <-
dplyr::bind_rows(spec_ref_orig, spec_new_2) |>
dplyr::mutate(value_order = dplyr::row_number(), .by = var_name) |>
dplyr::left_join(add_to_spec_new, by = "var_name") |>
dplyr::bind_rows(spec_keep) |>
dplyr::arrange(var_order, value_order)
}
return(spec_updated)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.