R/build_spec_coveff.R

Defines functions replace_spec_coveff build_spec_coveff_one_variable add_ordinal_suffix calc_summary_col_categ get_val_annot_cont calc_summary_col_cont calc_summary_col build_spec_coveff

Documented in build_spec_coveff build_spec_coveff_one_variable replace_spec_coveff

#' Build specifications for covariate effect simulation/visualization
#'
#' @name build_spec_coveff
#' @inheritParams sim_coveff
#' @param n_sigfig Number of significant figures to form value_label of
#' continuous variables. See [gt::vec_fmt_number()] for details.
#' @param use_seps Whether to use separators for thousands in printing numbers.
#' See [gt::vec_fmt_number()] for details.
#' @param drop_trailing_dec_mark Whether to drop the trailing decimal mark
#' (".") in value_label of continuous variables. See [gt::vec_fmt_number()]
#' for details.
#'
#' @examplesIf BayesERtools:::.if_run_ex_coveff()
#' \donttest{
#' data(d_sim_binom_cov_hgly2)
#'
#' ermod_bin <- dev_ermod_bin(
#'   data = d_sim_binom_cov_hgly2,
#'   var_resp = "AEFLAG",
#'   var_exposure = "AUCss_1000",
#'   var_cov = c("BHBA1C_5", "RACE"),
#' )
#'
#' spec_coveff <- build_spec_coveff(ermod_bin)
#' plot_coveff(ermod_bin, spec_coveff = spec_coveff)
#' }
#'
NULL

#' @export
#' @rdname build_spec_coveff
#' @return
#' `spec_coveff` (return object) is a data frame for the specification
#' of the covariate effects to be visualized. This is internally generated by
#' [build_spec_coveff()] if you run [sim_coveff()] or [plot_coveff()]
#' directly. Alternatively, you can develop your own or modify the one
#' generated by [build_spec_coveff()] and supply it to [sim_coveff()] or
#' [plot_coveff()]. The data frame should have the following columns (but
#' it's probably easier to try [build_spec_coveff()] and see the structure):
#'
#' - `var_order`: The order of the covariate in the forest plot. The exposure
#'   variable is always the first one and the covariates are ordered by the
#'   order they are supplied in the `var_cov` argument of the `dev_ermod_*`
#'   function. If you used a model from [dev_ermod_bin_cov_sel()], then the
#'   order is determined by the variable selection process.
#' - `var_name`: The name of the variable.
#' - `var_label`: The label of the variable to be used for plot.
#'    This is the same as `var_name` by default.
#' - `value_order`: The order of the value of the variable to be evaluated.
#' - `value_annot`: The annotation of the value of the variable to be
#'   evaluated. This appears on the right hand side of the forest plot.
#' - `value_label`: The label of the value of the variable to be evaluated.
#' - `value_cont`: The value for continuous variables.
#' - `value_cat`: The value for categorical variables.
#' - `is_ref_value`: Whether the value is the reference value.
#' - `show_ref_value`: Whether to show the reference value in the plot and
#'   table. This is TRUE by default for is_ref_value == TRUE, otherwise NA
#'   (and ignored).
#' - `is_covariate`: Whether the variable is a covariate (TRUE) or exposure
#'   variable (FALSE).
#'
build_spec_coveff <- function(
    ermod,
    data = NULL,
    qi_width_cov = 0.9,
    n_sigfig = 3,
    use_seps = TRUE,
    drop_trailing_dec_mark = TRUE) {
  stopifnot(inherits(ermod, "ermod"))
  if (is.null(data)) {
    data <- ermod$data
  }
  stopifnot(is.data.frame(data))

  var_exposure <- extract_var_exposure(ermod)
  var_cov <- extract_var_cov(ermod)

  check_data_columns(
    data = data,
    var_exposure = var_exposure,
    var_cov = var_cov
  )

  data <- data |>
    dplyr::select(
      dplyr::all_of(var_exposure),
      dplyr::all_of(var_cov)
    )

  spec_var_exp <-
    calc_summary_col(
      data[[var_exposure]],
      col_name = var_exposure, var_order = 1, qi_width_cov = qi_width_cov
    ) |>
    dplyr::mutate(is_covariate = FALSE)

  spec_var_cov <-
    purrr::pmap(list(
      dplyr::select(data, dplyr::all_of(var_cov)),
      var_cov, seq_along(var_cov) + 1
    ), function(.x, .y, .z) {
      calc_summary_col(.x, .y, .z,
        qi_width_cov = qi_width_cov,
        n_sigfig = n_sigfig, use_seps = use_seps,
        drop_trailing_dec_mark = drop_trailing_dec_mark
      )
    }) |>
    purrr::list_rbind() |>
    dplyr::mutate(is_covariate = TRUE)

  # Create dummy value_cat column when there were no categorical covariates
  if (!"value_cat" %in% colnames(spec_var_cov)) {
    spec_var_cov <- spec_var_cov |>
      dplyr::mutate(value_cat = NA_character_)
  }

  spec_coveff <-
    dplyr::bind_rows(spec_var_exp, spec_var_cov) |>
    dplyr::mutate(
      var_label = var_name, show_ref_value = ifelse(is_ref_value, TRUE, NA)
    ) |>
    dplyr::select(
      var_order, var_name, var_label, value_order, value_annot, value_label,
      value_cont, value_cat, is_ref_value, show_ref_value, is_covariate
    )

  return(spec_coveff)
}



calc_summary_col <- function(
    x, col_name, var_order, qi_width_cov = 0.9,
    n_sigfig = 3, use_seps = TRUE,
    drop_trailing_dec_mark = TRUE) {
  if (is.numeric(x)) {
    calc_summary_col_cont(x,
      qi_width_cov = qi_width_cov,
      n_sigfig = n_sigfig, use_seps = use_seps,
      drop_trailing_dec_mark = drop_trailing_dec_mark
    ) |>
      dplyr::mutate(var_name = col_name, var_order = var_order)
  } else {
    calc_summary_col_categ(x) |>
      dplyr::mutate(var_name = col_name, var_order = var_order)
  }
}

calc_summary_col_cont <- function(
    x, qi_width_cov = 0.9, n_sigfig = 3,
    use_seps = TRUE, drop_trailing_dec_mark = TRUE) {
  rlang::check_installed("gt")

  summary_col_cont <-
    dplyr::tibble(
      value_cont = c(
        as.numeric(stats::quantile(x, probs = 0.5 - qi_width_cov / 2)),
        stats::median(x),
        as.numeric(stats::quantile(x, probs = 0.5 + qi_width_cov / 2))
      ),
      value_order = 1:3,
      value_annot = get_val_annot_cont(qi_width_cov),
      is_ref_value = c(FALSE, TRUE, FALSE)
    ) |>
    dplyr::mutate(value_label = gt::vec_fmt_number(value_cont,
      n_sigfig = n_sigfig,
      use_seps = use_seps
    ))

  # For some reason drop_trailing_dec_mark is not working, so manually
  # remove trailing decimal mark
  if (drop_trailing_dec_mark) {
    summary_col_cont <-
      summary_col_cont |>
      dplyr::mutate(value_label = sub("\\.$", "", value_label))
  }
}

get_val_annot_cont <- function(qi_width_cov = 0.9) {
  c(
    paste0(
      as.character(round((0.5 - qi_width_cov / 2) * 100, digits = 2)), "th"
    ),
    "median",
    paste0(
      as.character(round((0.5 + qi_width_cov / 2) * 100, digits = 2)), "th"
    )
  )
}

calc_summary_col_categ <- function(x) {
  # Replicating forcats::fct_infreq
  x_fct_ord <- factor(x, levels = names(sort(table(x), decreasing = TRUE)))

  dplyr::tibble(value_cat = levels(x_fct_ord)) |>
    dplyr::mutate(
      value_order = dplyr::row_number(),
      value_annot = paste0(add_ordinal_suffix(value_order), " freq"),
      is_ref_value = value_order == 1
    ) |>
    dplyr::mutate(value_label = value_cat)
}

add_ordinal_suffix <- function(x) {
  paste0(
    x,
    c("th", "st", "nd", "rd", rep("th", 6))[1 + x %% 10 * !x %% 100 == 11]
  )
}



# TODO
# edit_spec_coveff_raw(var_name, values_vec) function to replace the spec
# edit_spec_coveff_summary(var_name, value_cont, value_cat, value_order,
# value_label (optional), value_annot (optional?), is_ref_value, is_covariate)
# function to replace the spec

#' Customize specifications for covariate effect simulations/visualizations
#'
#' @description
#' - [build_spec_coveff_one_variable()] is a helper function to create a new
#'   specification for a single variable. This is useful when you want to
#'   customize the specification for a single variable.
#' - [replace_spec_coveff()] is used to replace the specification for some (or
#'   all) variables in the original specification data frame. If you want to
#'   replace multiple variables, you can just stack the specifications
#'   together.
#'
#' @export
#' @name edit_spec_coveff
#' @inheritParams build_spec_coveff
#' @param var_name The name of the variable for which a new spec is to be
#' created.
#' @param values_vec The vector of the values for creating a new spec.
#' @param var_label The label of the variable to be used for plot. If NULL
#' (default), it is set to `var_name`.
#' @param show_ref_value Whether to show the reference value in the plot and
#' table. Setting this results in the `show_ref_value` column in the
#' specification data frame.
#' @return See [build_spec_coveff()] for the structure of the return object.
#' [build_spec_coveff_one_variable()] returns a data frame corresponding to
#' the specification for a single variable, which can be used as an input to
#' [replace_spec_coveff()].
#'
#' @examplesIf BayesERtools:::.if_run_ex_coveff()
#' \donttest{
#' set.seed(1234)
#' data(d_sim_binom_cov_hgly2)
#'
#' ermod_bin <- suppressWarnings(dev_ermod_bin(
#'   data = d_sim_binom_cov_hgly2, var_resp = "AEFLAG",
#'   var_exposure = "AUCss_1000", var_cov = c("BGLUC", "RACE"),
#'   verbosity_level = 0,
#'   # Below option to make the example run fast
#'   chains = 2, iter = 1000
#' ))
#'
#' spec_coveff <- build_spec_coveff(ermod_bin)
#' spec_new_bgluc <- build_spec_coveff_one_variable(
#'   "BGLUC", seq(4, 8, by = 0.1),
#'   var_label = "Baseline Glucose (mmol/L)",
#'   qi_width_cov = 0.8, show_ref_value = FALSE
#' )
#' spec_coveff_new <- replace_spec_coveff(spec_coveff, spec_new_bgluc)
#' plot_coveff(ermod_bin, spec_coveff = spec_coveff_new)
#' }
#'
build_spec_coveff_one_variable <- function(
    var_name, values_vec,
    var_label = NULL,
    qi_width_cov = 0.9, n_sigfig = 3, use_seps = TRUE,
    drop_trailing_dec_mark = TRUE, show_ref_value = TRUE) {
  stopifnot(is.vector(values_vec))

  spec_one_var <-
    calc_summary_col(values_vec, var_name,
      var_order = 1,
      qi_width_cov = qi_width_cov,
      n_sigfig = n_sigfig, use_seps = use_seps,
      drop_trailing_dec_mark = drop_trailing_dec_mark
    ) |>
    dplyr::mutate(
      var_label = var_name,
      show_ref_value = ifelse(is_ref_value, show_ref_value, NA)
    ) |>
    dplyr::select(
      var_name, var_label, value_order, value_annot, value_label,
      dplyr::any_of(c("value_cont", "value_cat")), is_ref_value, show_ref_value
    )

  if (!is.null(var_label)) spec_one_var$var_label <- var_label

  return(spec_one_var)
}

#' @export
#' @name edit_spec_coveff
#' @param spec_orig Original specification data frame.
#' @param spec_new New specification data frame. It can be generated by
#' [build_spec_coveff_one_variable()] or manually crafting with the
#' following variables: `var_name`, `var_label`, `value_order`, `value_annot`,
#' `value_label`, `value_cont` or `value_cat`, `is_ref_value`,
#' `show_ref_value`. You can have multiple variables stacked together.
#' @param replace_ref_value Whether to replace the reference values from the
#' original specification data frame. Default is FALSE; in this case,
#' show_ref_value is set to FALSE as it can be confusing.
#' If you set replace_ref_value to TRUE,
#' the reference calculation for the forest plot is also done with the one
#' in spec_new.
replace_spec_coveff <- function(
    spec_orig, spec_new,
    replace_ref_value = FALSE) {
  stopifnot(inherits(spec_orig, "data.frame"))
  stopifnot(inherits(spec_new, "data.frame"))

  # Check columns
  ## Check if value_cont and/or value_cat columns are present
  value_col_to_expect <- c()
  if ("value_cont" %in% colnames(spec_new)) {
    value_col_to_expect <- c(value_col_to_expect, "value_cont")
  }
  if ("value_cat" %in% colnames(spec_new)) {
    value_col_to_expect <- c(value_col_to_expect, "value_cat")
  }
  if (length(value_col_to_expect) == 0) {
    stop("At least one of value_cont or value_cat columns must be present.")
  }

  ## Make sure spec_new has the following columns
  col_names_expect_spec_new <-
    c(
      "var_name", "var_label", "value_order", "value_annot", "value_label",
      value_col_to_expect, "is_ref_value", "show_ref_value"
    )

  cond1 <- all(col_names_expect_spec_new %in% colnames(spec_new))
  cond2 <- all(colnames(spec_new) %in% col_names_expect_spec_new)
  if (!(cond1 && cond2)) {
    stop(
      "`spec_new` expected to have the following columns:\n",
      paste0(col_names_expect_spec_new, collapse = ", "),
      ".\nThe following columns are found:\n",
      paste0(colnames(spec_new), collapse = ", "),
      # Show the difference
      ".\nDifference: ",
      paste0(
        dplyr::symdiff(col_names_expect_spec_new, colnames(spec_new)),
        collapse = ", "
      )
    )
  }

  # Check var_name
  ## Make sure all var_name in spec_new exist in spec_orig
  if (any(!spec_new$var_name %in% spec_orig$var_name)) {
    ## Which var_name in spec_new does not exist in spec_orig?
    var_name_mismatch <-
      spec_new$var_name[!spec_new$var_name %in% spec_orig$var_name] |>
      unique()
    stop(
      "Following var_name in spec_new do not exist in spec_orig: ",
      paste0(var_name_mismatch, collapse = ", ")
    )
  }

  map_name_label <-
    spec_new |>
    dplyr::distinct(var_name, var_label)

  if (nrow(map_name_label) != length(unique(spec_new$var_name))) {
    stop("var_label should be the same for the same var_name.")
  }

  add_to_spec_new <- spec_orig |>
    dplyr::filter(var_name %in% spec_new$var_name) |>
    dplyr::select(var_order, var_name, is_covariate) |>
    dplyr::distinct()

  if (replace_ref_value) {
    spec_new_2 <-
      spec_new |>
      dplyr::left_join(add_to_spec_new, by = "var_name")

    spec_updated <-
      spec_orig |>
      dplyr::filter(!var_name %in% spec_new$var_name) |>
      dplyr::bind_rows(spec_new_2) |>
      dplyr::arrange(var_order, value_order)
  } else {
    spec_keep <- spec_orig |>
      dplyr::filter(!var_name %in% spec_new$var_name)

    spec_ref_orig <- spec_orig |>
      dplyr::filter(is_ref_value, var_name %in% spec_new$var_name) |>
      dplyr::select(
        var_name, value_order, value_annot, value_label,
        dplyr::any_of(c("value_cont", "value_cat")), is_ref_value,
        show_ref_value
      ) |>
      dplyr::left_join(map_name_label, by = "var_name") |>
      dplyr::mutate(
        show_ref_value = FALSE,
        value_annot = NA
      )

    spec_new_2 <-
      spec_new |>
      dplyr::arrange(value_order) |>
      dplyr::mutate(is_ref_value = FALSE, show_ref_value = NA)

    spec_updated <-
      dplyr::bind_rows(spec_ref_orig, spec_new_2) |>
      dplyr::mutate(value_order = dplyr::row_number(), .by = var_name) |>
      dplyr::left_join(add_to_spec_new, by = "var_name") |>
      dplyr::bind_rows(spec_keep) |>
      dplyr::arrange(var_order, value_order)
  }

  return(spec_updated)
}

Try the BayesERtools package in your browser

Any scripts or data that you put into this service are public.

BayesERtools documentation built on June 8, 2025, 1:26 p.m.