#' Forest Plot
#'
#' This function creates the data for and plots a forest plot comparing hazard rates for categorical variables by a 2 level by variable.
#'
#' @param data A dataset containing the necessary survival variables (event, time, other covariables).
#' @param stat The event survival variable, numeric, with 1 coded as an event and 0 a censor.
#' @param time The time survival variable, numeric.
#' @param byvar The by variable with 2 distinct levels.
#' @param covars A vector of categorical covariates that are desired for analysis.
#' @param ref Reference by variable level (optional). Use this if specific by variable level is desired as the reference.
#' @param title Title for the plot.
#' @param labels An optional vector, soon to be list, of labels for the covariates in the 'covars' variable. Currently must be the same length as the 'covars' vector.
#' @param graph.pos Column position for the graph.
#' @param xaxis.breaks Distance between the x-axis breaks of the forest plot.
#' @param x.lab Label for x-axis of plot.
#' @param plot.include Logical to have plot return from function call.
#' @param x.max Max value for x-axis. CI bands for intervals that pass this value will have an arrow on the right indicating a continuing band.
#' @param ... Additional arguments for the 'forestplot' function.
#'
#' @return This function returns a list of the forestplot produced and tables necessary to reproduce that plot.
#' @export
#'
#' @examples
#'
#' library(arsenal)
#' data(mockstudy)
#' mockstudy %>%
#' mutate(fu.stat = fu.stat - 1) %>%
#' filter(arm != "A: IFL" & age.ord != "10-19" & race != "Other") %>%
#' forest_plot(data = ., stat = "fu.stat", time = "fu.time", byvar = "arm",
#' covars = c("sex", "age.ord"), title = "Overall Survival",
#' labels = c("Gender", "Age Groups"),
#' xaxis.breaks = .5)
#'
forest_plot <- function(data, stat, time, byvar, covars, ref = as.character(levels(data2$arm)[1]), labels = NULL, graph.pos = 3,
xaxis.breaks = .25, title = "Hazard Rates", x.lab = "Hazard Ratios", plot.include = TRUE, x.max = max(values$upper, na.rm = TRUE),
...)
{
data2 <- data.frame(time=data[[time]], stat=data[[stat]], arm=factor(data[[byvar]])) %>%
bind_cols(data[covars] %>%
mutate_all(as.character))
data2$arm <- relevel(data2$arm, ref=ref)
nevent <- c(); hr <- c(); var <- c(); ci <- c(); upper <- c(); lower <- c(); pval <- c(); hrCI <- c()
i <- 0
for(variable in covars)
{
i <- i + 1
nevent = c(nevent, "")
hr = c(hr, "")
if(!is.null(labels) & length(labels) == length(covars))
{
var = c(var, labels[i])
}
else{
var = c(var, variable)
}
ci = c(ci, "")
upper = c(upper, "")
lower = c(lower, "")
pval = c(pval, "")
hrCI = c(hrCI, "")
for(level in unique(data2[[variable]]))
{
if(!is.na(level))
{
data3 <- data2[data2[[variable]] == level, ]
fit <- survival::coxph(survival::Surv(time, stat) ~ arm, data = data3)
# tidy_fit = broom::tidy(fit, exponentiate = TRUE, conf.int = TRUE)
s <- summary(fit, conf.int = .95)
co <- stats::coef(s)
nn <- c("term", "estimate", "std.error", "statistic", "p.value")
ret <- dplyr::bind_cols(tibble::tibble(term = rownames(co)), tibble::as_tibble(co[, -2, drop = FALSE]))
names(ret) <- nn
ret$estimate <- exp(ret$estimate)
ci_mod <- s$conf.int[, 3:4, drop = FALSE]
rownames(ci_mod) <- NULL
CI <- as.matrix(ci_mod)
colnames(CI) <- c("conf.low", "conf.high")
ret <- cbind(ret, CI)
tidy_fit <- tibble::as_tibble(ret)
nevent = c(nevent, paste0(fit$nevent, "/", fit$n))
hr = c(hr, round(tidy_fit$estimate, 2))
ci = c(ci, paste0("(", round(tidy_fit$conf.low, 3), ", ", round(tidy_fit$conf.high, 3), ")"))
hrCI = c(hrCI, paste0(round(tidy_fit$estimate, 2), " ", "(", round(tidy_fit$conf.low, 3), ", ", round(tidy_fit$conf.high, 3), ")"))
upper = c(upper, round(tidy_fit$conf.high, 3))
lower = c(lower, round(tidy_fit$conf.low, 3))
var = c(var, paste0(" ", level))
pval = c(pval, round(tidy_fit$p.value, 3))
}
}
}
forest_data <- data.frame(
variables = c(paste0(levels(data2$arm)[2], " vs ", levels(data2$arm)[1], " (ref)"), "Subgroup", var),
nevent = c("", "Events/N", nevent),
hazard = c("", "Hardard Rate", hr),
ci = c("", "95% CI", ci),
lower = c("", "Lower bound", lower),
upper = c("", "Upper bound", upper),
pval = c("", "P-value", pval),
hrCI = c("", "HR (95% CI)", hrCI)
) %>%
filter(upper != "Inf")
values <- forest_data[3:nrow(forest_data), c("hazard", "lower", "upper")] %>%
mutate_all(as.character) %>%
mutate_all(as.numeric)
table_text <- forest_data[, c("variables", 'nevent', "hrCI", "pval")]
if(plot.include == TRUE)
{
forestplot::forestplot(labeltext = table_text, graph.pos = graph.pos,
mean=c(NA, NA, values$hazard),
lower=c(NA, NA,values$lower), upper=c(NA, NA, values$upper),
title = title,
hrzl_lines = TRUE,
zero = 1,
clip = c(0, x.max),
xticks = seq(plyr::round_any(min(values$lower, na.rm = TRUE), xaxis.breaks),
plyr::round_any(max(pmin(x.max, values$upper, na.rm = TRUE)), xaxis.breaks), by = xaxis.breaks),
xticks.digits = 2,
txt_gp = forestplot::fpTxtGp(label=grid::gpar(cex=.8),
ticks=grid::gpar(cex=.8),
xlab=grid::gpar(cex = 1.2),
title=grid::gpar(cex = 1.2)),
col=forestplot::fpColors(box="black", lines="black", zero = "gray50"),
cex=0.9,
lineheight = "auto",
boxsize=0.15,
colgap=unit(1,"mm"),
lwd.ci=2,
ci.vertices=TRUE,
ci.vertices.height = 0.1,
xlab = x.lab,
...
)
}
out <- list(table = table_text, values = values)
class(out) <- "forest_plot"
invisible(out)
}
# @export
# print.forest_plot <- function(x, ...)
# {
# print(x$table)
# invisible(x)
# }
#
#
# s <- summary(x, conf.int = .95)
# co <- stats::coef(s)
# nn <- c("term", "estimate", "std.error", "statistic", "p.value")
# ret <- dplyr::bind_cols(tibble::tibble(term = rownames(co)), tibble::as_tibble(co[, -2, drop = FALSE]))
# names(ret) <- nn
# ret$estimate <- exp(ret$estimate)
# ci <- s$conf.int[, 3:4, drop = FALSE]
# rownames(ci) <- NULL
# CI <- as.matrix(ci)
# colnames(CI) <- c("conf.low", "conf.high")
# ret <- cbind(ret, CI)
# tidy_fit <- tibble::as_tibble(ret)
#
#
#
#
# if (is.numeric(conf.int)) {
# conf.level <- conf.int
# conf.int <- TRUE
# }
# if (conf.int) {
# s <- summary(x, conf.int = conf.level)
# } else {
# s <- summary(x, conf.int = FALSE)
# }
# co <- stats::coef(s)
# if (!is.null(x$frail)) {
# nn <- c("estimate", "std.error", "statistic", "p.value")
# } else if (s$used.robust) {
# nn <- c("estimate", "std.error", "robust.se", "statistic",
# "p.value")
# } else {
# nn <- c("estimate", "std.error", "statistic", "p.value")
# }
# if (is.null(x$frail)) {
# ret <- as_tidy_tibble(co[, -2, drop = FALSE], new_names = nn)
# } else {
# ret <- as_tidy_tibble(co[, -c(3, 5), drop = FALSE], new_names = nn)
# }
# if (exponentiate) {
# ret$estimate <- exp(ret$estimate)
# }
# if (!is.null(s$conf.int)) {
# CI <- as.matrix(unrowname(s$conf.int[, 3:4, drop = FALSE]))
# colnames(CI) <- c("conf.low", "conf.high")
# if (!exponentiate) {
# CI <- log(CI)
# }
# ret <- cbind(ret, CI)
# }
# as_tibble(ret)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.