#' Grouped Forest Plots
#'
#' @param object A `forge` object that contains or represents model data, with
#' required grouping variables (such as strata or interaction terms).
#'
#' @param formula Identifies the relationships of interest, with LHS
#' representing the outcome, and RHS representing the exposure.
#'
#' @param vars Character vector of the variables of interest. They can either be
#' grouping variables, or can be performed as interaction terms (based on the
#' models included originally). If the parameter __interaction__ is changed to
#' `TRUE`, then will create joint confidence intervals for the presence or
#' absence of the interaction term.
#'
#' @param type Character vector to identify if the model was using subgroups or
#' interaction terms. The difference is that confidence intervals for
#' interaction can be calculated in multiple ways, and the effect size is
#' dependent on the presence or absence of the interaction variable.
#'
#' * __interaction__ = assumes an interaction term that is binary/dichotomous
#'
#' * __subgroup__ = assumes a categorical variable that was used to group the
#' original dataset
#'
#' @param level List of formulas. Each list-element is a formula with the LHS
#' reflecting either the variable to re-label or a specific level, and the RHS
#' reflecting what the new level should be called (for display). If there are
#' conflicting labels, the most recent will be used.
#'
#' For example, `list(am ~ c("Manual", "Automatic")` would take, from the
#' `mtcars` dataset, the `am` variable, which consists of `c(0, 1)`, and
#' relabel them in the order described. They are sorted in ascendiing order
#' prior to re-labeling.
#'
#' The alternative approach is to use the specific level itself and have it
#' re-labeled. `list(0 ~ "Absent")` would take all levels that are zero, and
#' change their value.
#'
#' @param columns Additional columns that help to describe the subgroup models.
#' At least one column should be selected from this list. The sequence listed
#' will reflect the sequence shown in the table. The current options are:
#'
#' * beta = point estimate value, such as odds ratio or hazard ratio
#'
#' * conf = inclusion of the confidence interval (presumed to be 95%-ile)
#'
#' * n = number of observations in each model group
#'
#' * p = p.value for model or interaction term
#'
#' For example: `list(beta ~ "Hazard", conf ~ "95% CI" n ~ "No.")"`
#'
#' @param flip Determine if the odds or hazard ratio should be shown as the
#' reciprocal values. Instead of a decreasing hazard for every unit increase,
#' it describes an increasing hazard for every unit decrease.
#'
#' @param axis Argument to help modify the forest plot itself. This is a
#' list-formula of the following parameters. If they are not named, the
#' function will attempt to "guess" the optimal parameters. The options are:
#'
#' * title = label or title for the column describing the forest plot
#'
#' * lim = x-axis limits
#'
#' * breaks = x-axis tick marks or break points that should be numbers
#'
#' * int = x-axis intercept
#'
#' * lab = label for the x-axis
#'
#' * scale = defaults to continuous, but may also use a log transformation as
#' well `c("continuous", "log")`
#'
#' For example: `list(title ~ "Decreasing Hazard", lab ~ "HR (95% CI))`
#'
#' @param width Describes the width of each column in a list of two-sided
#' formulas. The RHS is a decimal reflecting the percent each column should
#' take of the entire table. The forest plot is usually given 30% of the
#' width.
#'
#' For example: `list(n ~ .1, forest ~ 0.3)`
#'
#' @import ggplot2
#' @import gt
#' @export
tbl_group_forests <- function(object,
formula,
type,
vars,
level = list(),
columns = list(beta ~ "Estimate",
conf ~ "95% CI",
n ~ "No."),
flip = FALSE,
axis = list(scale ~ "continuous"),
width = list()) {
# TODO revise how this function works for forge objects versus standard data tables
# TODO add ability to extract or filter by formulas from forge objects
# TODO how to decide which LEVEL or number to pick in terms of adjusted models
validate_class(object, "forge")
# Validate formula
if (!inherits(formula, "formula")) {
stop("Object should be of type `formula`, not `", class(formula)[1], "`")
}
y <- lhs(formula)
x <- rhs(formula)
# Validate type and if strata variables are appropriate
if (!type %in% c("interaction", "subgroup")) {
stop("A single string for type of 'interaction' or 'subgroup' must be selected.")
} else {
vars_in_models <- vars %in% unlist(object[c("strata", "interaction")], use.names = FALSE)
if (!any(vars_in_models)) {
stop("The variables selected for ", type, " are not available in the model objects.")
} else if (!all(vars_in_models)) {
message("The variables `", paste0(vars[!vars_in_models], collapse = ", "), "` are not avaiilable in the model object.")
}
}
# Get labels and/or levels
lvl <- list()
if (length(level) > 0) {
lvl <- formula_to_named_list(level)
}
# Rename selecting columns (for both parameter estimates and model info)
cols <- formula_to_named_list(columns)
est_vars <- character()
mod_vars <- character()
if ("beta" %in% names(cols)) {
est_vars <- append(est_vars, "estimate")
}
if ("conf" %in% names(cols)) {
est_vars <- append(est_vars, c("conf.low", "conf.high"))
}
if ("p" %in% names(cols)) {
est_vars <- append(est_vars, "p.value")
}
if ("n" %in% names(cols)) {
mod_vars <- append(mod_vars, "nobs")
}
if (type == "interaction") {
# Basic table by interaction
tbl_present <-
object |>
dplyr::filter(interaction %in% vars) |>
dplyr::filter(outcome == y & exposure == x) |>
dplyr::mutate(joint_term = paste0(exposure, ":", interaction)) |>
dplyr::rowwise() |>
dplyr::mutate(p.value = parameter_estimates$p.value[parameter_estimates$term == joint_term]) |>
dplyr::mutate(pars = list(interaction_estimates(model, exposure, interaction, present = TRUE)))
tbl_absent <-
object |>
dplyr::filter(interaction %in% vars) |>
dplyr::filter(outcome == y & exposure == x) |>
dplyr::mutate(joint_term = paste0(exposure, ":", interaction)) |>
dplyr::rowwise() |>
dplyr::mutate(p.value = parameter_estimates$p.value[parameter_estimates$term == joint_term]) |>
dplyr::mutate(pars = list(interaction_estimates(model, exposure, interaction, present = FALSE)))
tbl <-
dplyr::bind_rows(tbl_present, tbl_absent) |>
dplyr::mutate(
estimate = pars[["estimate"]],
conf.low = pars[["conf.low"]],
conf.high = pars[["conf.high"]],
nobs = pars[["nobs"]],
level = pars[["level"]]
) |>
dplyr::select(exposure, interaction, level, exposure, terms, all_of(est_vars), all_of(mod_vars)) |>
dplyr::rename(strata = interaction,
term = exposure) |>
dplyr::group_by(strata, level) |>
dplyr::arrange(strata)
# Relabel if needed
if (length(lvl) > 0) {
for (i in seq_along(lvl)) {
if (names(lvl)[i] %in% tbl$strata) {
tbl$level[tbl$strata == names(lvl[i])] <- lvl[[i]]
} else if (names(lvl)[i] %in% tbl$level) {
tbl$level[tbl$level == names(lvl)[i]] <- lvl[[i]]
}
}
}
} else {
# Basic table by strata
tbl <-
object |>
temper() |>
dplyr::filter(strata %in% vars) |>
dplyr::filter(outcome == y & exposure == x) |>
dplyr::group_by(strata, level) |>
dplyr::filter(number == max(number)) |>
dplyr::filter(term == x) |>
dplyr::ungroup() |>
dplyr::select(strata, level, term, terms,
all_of(est_vars), all_of(mod_vars))
}
# Reciprocal odds or hazard if needed
if (flip) {
tbl <-
dplyr::mutate(tbl, across(
c(all_of(est_vars), -p.value),
~ 1 / .x
))
if ("conf.low" %in% est_vars) {
tbl <-
dplyr::rename(tbl, conf.high = conf.low, conf.low = conf.high)
}
}
# Setup plotting variables from the axis argument
x_vars <- formula_to_named_list(axis)
if ("lim" %in% names(x_vars)) {
lim_val <- eval(x_vars$lim)
xmin <- min(lim_val)
xmax <- max(lim_val)
} else {
xmin <- min(tbl$conf.low, na.rm = TRUE)
xmax <- min(tbl$conf.high, na.rm = TRUE)
}
if ("int" %in% names(x_vars)) {
xint <- eval(x_vars$int)
} else {
xint <- dplyr::case_when(
xmin < -1 & xmax <= 0 ~ -1,
xmin > -1 & xmax <= 0 ~ 0,
xmin < 0 & xmax > 0 ~ 0,
xmin >= 0 & xmax <= 1 ~ 0,
xmin >= 0 & xmax > 1 ~ 1
)
}
if ("breaks" %in% names(x_vars)) {
breaks <- eval(x_vars$breaks)
} else {
breaks <- ggplot2::waiver()
}
if ("lab" %in% names(x_vars)) {
lab <- x_vars$lab
} else {
lab <- NULL
}
if ("scale" %in% names(x_vars)) {
scale <- x_vars$scale
} else {
scale <- "continuous"
}
# Make appropriate plots
plots <-
tbl |>
dplyr::group_by(strata, level) |>
tidyr::nest() |>
dplyr::mutate(gg = purrr::map(data, ~ {
ggplot(.x, aes(x = estimate, y = 0)) +
geom_point(size = 50) +
geom_linerange(aes(xmax = conf.high, xmin = conf.low, size = 5)) +
geom_vline(xintercept = xint, linetype = 3, linewidth = 5) +
theme_minimal() +
theme(
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.line.x = element_blank(),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
{
if(scale == "log") {
scale_x_continuous(
trans = scales::pseudo_log_trans(sigma = 0.1, base = exp(1)),
breaks = breaks,
limits = c(xmin, xmax),
oob = scales::oob_squish
)
} else {
scale_x_continuous(
breaks = breaks,
limits = c(xmin, xmax),
oob = scales::oob_squish
)
}
}
})) |>
tidyr::unnest(data) |>
dplyr::ungroup() |>
dplyr::add_row()
tmp <- plots$gg[[1]]
tmp$layers[1:2] <- NULL
btm_axis <-
tmp +
xlab(lab) +
theme(
axis.text.x = element_text(size = 100, margin = margin(10, 0 , 0 , 0)),
axis.ticks.x = element_line(linewidth = 5),
axis.ticks.length.x = unit(30, "pt"),
axis.title.x = element_text(size = 150, margin = margin(10, 0, 0 , 0)),
axis.line.x = element_line(
linewidth = 5,
arrow = grid::arrow(
length = grid::unit(50, "pt"),
ends = "both",
type = "closed"
)
)
)
plots$gg[nrow(plots)] <- list(btm_axis)
# To use to help filter for which variables to modify in grouped rows
lowest_lvls <-
subset(tbl, select = c(strata, level)) |>
dplyr::group_by(strata) |>
dplyr::slice_tail() |>
dplyr::pull(level) |>
unique()
# Create table
tbl |>
dplyr::rowwise() |>
dplyr::mutate(strata = dplyr::case_when(
strata %in% names(labels(terms)) ~ vec_data(get_runes(terms, field = "runes", value = strata))$label,
!(strata %in% names(labels(terms))) ~ strata
)) |>
dplyr::ungroup() |>
dplyr::mutate(ggplots = NA) |>
dplyr::add_row() |>
dplyr::select(level, strata, all_of(mod_vars), all_of(est_vars), ggplots) |>
gt(rowname_col = "level", groupname_col = "strata") |>
# Estimates and confidence intervals
{\(.) {
if (all(c("estimate", "conf.low", "conf.high") %in% est_vars)) {
. |>
cols_merge(columns = est_vars[1:3],
pattern = "{1} ({2}, {3})") |>
cols_width(estimate ~ pct(40)) |>
cols_label(estimate = cols$beta)
} else {
.
}
}}() |>
# Number of observations
{\(.) {
if (all(c("nobs") %in% mod_vars)) {
. |>
cols_width(nobs ~ pct(10)) |>
cols_label(nobs = cols$n)
} else {
.
}
}}() |>
# P.value is included for interactions
{\(.) {
if (all(c("p.value") %in% est_vars & isTRUE(type == "interaction"))) {
. |>
cols_move_to_end(p.value) |>
tab_style(
style = cell_text(color = "white", size = px(0)),
locations = cells_body(columns = p.value,
rows = level %in% lowest_lvls)
) |>
tab_style(
style = cell_text(v_align = "bottom"),
locations = cells_body(columns = p.value,
rows = level > min(level, na.rm = TRUE))
) |>
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = p.value,
rows = p.value < 0.05)
) |>
cols_label(p.value = cols$p)
} else {
.
}
}}() |>
# P value included for general groups
{\(.) {
if (all(c("p.value") %in% est_vars)) {
. |>
cols_move_to_end(p.value) |>
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = p.value,
rows = p.value < 0.05)
) |>
cols_label(p.value = cols$p)
} else {
.
}
}}() |>
tab_style(
style = list(
cell_borders(sides = "all", color = NULL)
),
locations = list(
cells_body(columns = c(all_of(mod_vars), all_of(est_vars))),
cells_stub(rows = everything())
)
) |>
fmt_number(
columns = where(is.numeric),
drop_trailing_zeros = TRUE,
n_sigfig = 2
) |>
cols_width(ggplots ~ pct(50)) |>
opt_vertical_padding(scale = 0) |>
opt_table_outline(style = "none") |>
tab_options(
data_row.padding = px(0),
table_body.border.bottom.width = px(0),
table_body.border.top.width = px(0),
column_labels.border.top.width = px(0)
) |>
tab_style(
style = list(
cell_text(color = "white", size = px(0)),
cell_borders(sides = "all", color = NULL)
),
locations = list(
cells_body(columns = ggplots),
cells_row_groups(groups = "NA"),
cells_stub(rows = is.na(level))
)
) |>
tab_style(
style = list(
cell_text(color = "white", size = px(0))
),
locations = list(
cells_body(columns = c(all_of(mod_vars), all_of(est_vars)),
rows = is.na(level))
)
) |>
cols_label(
ggplots = x_vars$title,
) |>
text_transform(
locations = cells_body(columns = ggplots),
fn = function(x) {
purrr::map(plots$gg,
ggplot_image,
height = px(50),
aspect_ratio = 5)
}
)
}
#' Compact and minimal theme for `gt` tables
#'
#' This theme was used for placing somewhat larger tables into `xaringan` slides
#' by making the spacing more compact and decreasing the font size. The exposed
#' variables are to control font size and table width, but any option from the
#' `gt` package is allowed.
#'
#' @inheritParams gt::tab_options
#' @param ... For passing additional arguments to the [tab_options()]
#' function
#' @family visualizers
#' @importFrom gt tab_options px pct
#' @export
theme_gt_compact <- function(data,
table.font.size = pct(80),
table.width = pct(90),
...) {
validate_class(data, "gt_tbl")
data |>
tab_options(
# Preset
table.margin.left = px(1),
table.margin.right = px(1),
row_group.padding = px(1),
data_row.padding = px(1),
footnotes.padding = px(1),
source_notes.padding = px(1),
stub.border.width = px(1),
# User supplied
table.width = table.width,
table.font.size = table.font.size,
...
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.