R/forest_bygroup.R
In PropensitySub: Treatment Effect Estimate in Strata with Missing Data

Documented in forest_bygroup

#' Forest plot: colored by groups
#' @param data data frame. plotting order will be following the order in the data frame.
#' @param summarystat column name that indicates the summary statistics
#' column (e.g. HR, ORR)
#' @param upperci,lowerci column names that indicate the lower and upper CI
#' of the summary statistics
#' @param population.name column name of the column which indicates population names of the
#' summary statistics
#' @param group.name column name of the column which indicates which group each population is in.
#' group names will be shown in the left panel of the forest plot
#' @param color.group.name column name of the column which indicates how to color different groups
#' @param stat.label Y axis label text
#' @param text.column column name of the column which provides texts to be display on the right side
#' of the forest plot. If it is NULL, the text will be generated by pasting summary stat column,
#' the lowerci column and the upperci column
#' @param text.column.addition additional columns to put (will be placed on the right of the figures), could be
#' a vector of multiple column names
#' @param color customized colors to different groups.
#' @param stat.type.hr whether the summary statistics is HR. If so, the forest plot will be centered at 1
#' @param extra.stat column name of the column which indicates the extra statistics to be drawn on
#' the forest plot. The extra statistics will be drawn by "+". Could be NULL
#' @param extra.stat.label label of the extra.stat (to be shown on y axis)
#' @param endpoint.name,study.name text to be shown in title
#' @param draw whether to draw
#' @param shape.column column to pass to adjust shape. Use NULL for none
#' @param shape.vec a vector to sepecify shapes, e.g. c(15, 16).
#' @param log.scale whether show summary statistics in log scale
#'
#' @return return forest plot of class `grob`.
#' @examples  
#' library(dplyr)
#' clinical_1 <- clinical %>% mutate( 
#'   indicator = case_when(
#'     STRATUM == "strata_1" ~ 0, 
#'     STRATUM == "strata_2" ~ 1,
#'     is.na(STRATUM) & ARM == "experimental" ~ 1,
#'     TRUE ~ -1 
#'   ),
#'   ARM  = factor(ARM, levels = c("control","experimental")),
#'   BNLR = case_when(
#'     is.na(BNLR) ~ median(BNLR, na.rm = TRUE),
#'     TRUE ~ BNLR
#'   )
#' ) 
#' ipw_res <- ipw_strata(
#'   data.in = clinical_1, formula = indicator ~ BECOG + SEX + BNLR,
#'   indicator.var = "indicator", tte = "OS_MONTH", event = "OS_EVENT", trt = "ARM",
#'   class.of.int = list("strata_1" = 1, "strata_2" = 0)
#'  )
#' boot_ipw <- bootstrap_propen(
#'   data.in = clinical_1, formula = indicator ~ BECOG + SEX + BNLR,
#'   indicator.var = "indicator", tte = "OS_MONTH", event = "OS_EVENT", trt = "ARM",
#'   class.of.int = list("strata_1" = 1, "strata_2" = 0),
#'   estimate.res = ipw_res, method = "ipw", n.boot = 5
#' )
#' ps_res <- ps_match_strata(
#'   data.in = clinical_1, formula = indicator ~ BECOG + SEX + BNLR,
#'   indicator.var = "indicator", tte = "OS_MONTH", event = "OS_EVENT", trt = "ARM",
#'   class.of.int = list("strata_1" = 1, "strata_2" = 0)
#'  )
#' boot_ps <- bootstrap_propen(
#'   data.in = clinical_1, formula = indicator ~ BECOG + SEX + BNLR, 
#'   indicator.var = "indicator", tte = "OS_MONTH", event = "OS_EVENT", trt = "ARM",
#'   class.of.int = list("strata_1" = 0, "strata_2" = 1),
#'   estimate.res = ps_res, method = "ps", n.boot = 5
#' )
#' boot.out.ipw <- boot_ipw$boot.out.est
#' boot.out.ps <- boot_ps$boot.out.est
#' ipw.ci.mat <- boot_ipw$est.ci.mat
#' ps.ci.mat <- boot_ps$est.ci.mat
#' 
#' data.fp <- data.frame(
#'   HR = round(exp(c(ipw.ci.mat[, 1], ps.ci.mat[, 1])), 2),
#'   LOWER = round(exp(c(ipw.ci.mat[, 2], ps.ci.mat[, 2])), 2),
#'   UPPER = round(exp(c(ipw.ci.mat[, 3], ps.ci.mat[, 3])), 2),
#'   ADA_Group = rep(rownames(ipw.ci.mat), 2),
#'   n = paste("n =", rep(table(clinical_1$indicator)[c("0", "1")], 2)),
#'   Methods_ADA = paste(
#'     rep(c("IPW", "PS"), each = 2), rep(rownames(ipw.ci.mat), 2)
#'   ),
#'   Methods = rep(c("IPW", "PS"), each = 2),
#'   bootstrapHR = c(
#'     boot.out.ipw[grep("HR", rownames(boot.out.ipw)), "Median"],
#'     boot.out.ps[grep("HR", rownames(boot.out.ps)), "Median"]
#'   )
#' )
#' forest_bygroup(
#'   data = data.fp, summarystat = "HR", upperci = "UPPER", lowerci = "LOWER",
#'   population.name = "Methods_ADA", group.name = "Methods",
#'   color.group.name = "ADA_Group", text.column.addition = "n",
#'   stat.label = "Hazard Ratio", text.column = NULL,
#'   stat.type.hr = TRUE, log.scale = FALSE, extra.stat = "bootstrapHR",
#'   extra.stat.label = "bootstrap median",
#'   endpoint.name = "OS", study.name = "Example Study", draw = TRUE
#'  ) 
#' @import grid
#' @import ggplot2
#' @importFrom scales hue_pal
#' @importFrom gtable gtable_add_cols gtable_add_grob
#' @importFrom gridExtra ttheme_default tableGrob
#' @export
#' 


forest_bygroup <- function(data, summarystat, upperci, lowerci,
                           population.name, group.name = population.name, color.group.name = population.name,
                           stat.label = "Hazard Ratio", text.column = NULL, text.column.addition = NULL, color = NULL,
                           stat.type.hr = TRUE, log.scale = FALSE, extra.stat = NULL, extra.stat.label = NULL,
                           endpoint.name = "OS", study.name = "", draw = TRUE,
                           shape.column = NULL, shape.vec = NULL) {
  stopifnot(class(data) == "data.frame")
  data[, population.name] <- factor(data[, population.name], levels = rev(unique(data[, population.name])))
  # make sure factor order follows data point order
  data$statuse <- data[, summarystat]
  if (is.null(color)) color <- hue_pal()(length(unique(data[, color.group.name])))
  if (stat.type.hr) {
    y_upper <- max(data[, upperci], na.rm = TRUE) %>%
      round(1) %>%
      +0.1
    y_lower <- min(data[, lowerci], na.rm = TRUE) %>%
      round(1) %>%
      -0.1
    if ((y_upper - y_lower) <= 2) {
      breaks <- c(seq(y_lower, y_upper, 0.1)) %>% round(1)
    } else {
      breaks <- sort(c(seq(y_lower, y_upper, length.out = 5), 1)) %>% round(1)
    }
  } else {
    y_upper <- ceiling(max(data[, upperci], na.rm = TRUE))
    y_lower <- floor(min(data[, upperci], na.rm = TRUE))
    if (y_lower >= 0) {
      # breaks = c(0, seq(y_lower, y_upper, length.out = 5))
      breaks <- seq(0, y_upper, 5)
    } else {
      brk1 <- seq(0, y_upper, by = 10)
      brk2 <- seq(0, abs(y_lower), by = 10)
      brks <- sort(unique(c(brk1, -brk2, 0)))
      breaks <- brks
      if (y_upper - max(brks) > 3) breaks <- c(brks, y_upper)
      if (min(brks) - y_lower > 3) breaks <- c(y_lower, breaks)
    }
  }


  if (is.null(text.column)) {
    text.column <- paste0(summarystat, " (CI)")
    data[, text.column] <- paste0(
      data[, summarystat], "(", data[, lowerci],
      "-", data[, upperci], ")"
    )
  }


  p <- ggplot(data, aes_string(y = summarystat, x = population.name, color = color.group.name)) +
    # label = format(round(data[,summarystat], 3), nsmall = 3))) +
    geom_point(size = 2, shape = 16) +
    geom_errorbar(aes_string(ymax = upperci, ymin = lowerci), width = 0.4, size = 1) +
    geom_hline(yintercept = ifelse(stat.type.hr, 1.0, 0.0), color = "black", lty = "dotted", size = 1) +
    scale_y_continuous(breaks = breaks) +
    # geom_text(nudge_x = 0.4, size = 3, show.legend = FALSE) +
    theme(
      axis.title.y = element_blank(), text = element_text(size = 14),
      legend.position = "top", legend.title = element_blank(),
      axis.text.y = element_blank(), axis.title = element_text(size = 14, face = "bold")
    ) +
    ggtitle(paste0(endpoint.name, " results of ", study.name)) +
    theme(plot.title = element_text(hjust = 0.5, size = 20)) +
    labs(y = paste0(
      stat.label, " (", endpoint.name, ")",
      ifelse(is.null(extra.stat), "", paste0("\n +: ", extra.stat.label))
    )) +
    coord_flip(xlim = c(1, nrow(data) + 1)) +
    scale_color_manual(values = color)

  if (!is.null(shape.column)) {
    p <- p +
      # show caption instead of legend
      geom_point(aes_string(shape = shape.column),
        size = 5,
        show.legend = FALSE
      )
    if (!is.null(shape.vec)) {
      p <- p +
        scale_shape_manual(values = shape.vec)
    }
  }

  if (!is.null(extra.stat)) p <- p + geom_point(aes_string(x = population.name, y = extra.stat), colour = "black", shape = 3)
  if (log.scale) p <- p + scale_y_log10()

  t_left <- tableGrob(data[, group.name, drop = FALSE],
    rows = NULL,
    theme = ttheme_default(
      core = list(
        bg_params = list(fill = NA, col = NA),
        fg_params = list(cex = 1.0)
      ),
      colhead = list(
        bg_params = list(fill = NA, col = NA),
        fg_params = list(cex = 1.0)
      )
    )
  )
  t_left$heights <- unit(rep(1 / nrow(t_left), nrow(t_left)), "null")
  t_right <- tableGrob(data[, c(text.column, text.column.addition), drop = FALSE],
    rows = NULL,
    theme = ttheme_default(
      core = list(
        bg_params = list(fill = NA, col = NA),
        fg_params = list(cex = 1.0)
      ),
      colhead = list(
        bg_params = list(fill = NA, col = NA),
        fg_params = list(cex = 1.0)
      )
    )
  )
  t_right$heights <- unit(rep(1 / nrow(t_right), nrow(t_right)), "null")
  g <- ggplotGrob(p)
  g <- gtable_add_cols(g, sum(t_left$widths), 0)
  g <- gtable_add_cols(g, sum(t_right$widths), -1)
  # gtable_show_layout(g)
  g <- gtable_add_grob(g, t_left, t = g$layout[g$layout$name == "panel", 1], l = 1)
  g <- gtable_add_grob(g, t_right, t = g$layout[g$layout$name == "panel", 1], l = 10, r = 11)


  if (draw) {
    grid.newpage()
    grid.draw(g)
  }
  invisible(g)
}