R/01_sl_summary.R

Defines functions get_bar_stack get_bar_binom get_box check_length sl_summary

Documented in sl_summary

#' A subject level graphical summary
#'
#' Some systematic visualizations to display the characteristics of the study population
#'
#' @param data dataset with variables to plot. It generally is a subject level data (`adsl`).
#' @param vars the vector of baseline variables of interest to display.
#' @param kinds the type of graphical visualization corresponding to each element of `vars`.
#'   It may be a length one string means that graph will be used for all the varialbes of `vars`.
#'   Otherwise, `kinds` should have the same length of `vars`. In this case, each element of `kinds`, will
#'   be applied to its corresponding element of `vars`. `kinds` may take three options for each element:
#'   \itemize{
#'     \item{"box"}{special box plot, combining with rug plot, for `continuous` variables}
#'     \item{"bar_binom"}{bar plot for categorical variables. It shows only one of the levels.
#'        By default, it selects the high frequency level.
#'        For selecting the other level, you can follow the format
#'        of `list("var" = "level")` in the `vars`. Check the examples to see how it works}
#'     \item{"bar_stack"}{The stack bar chart for binomial or multinomial variables.}
#'    }
#' @param show_plots logical If `TRUE`, it combined all the plots and shows them using \code{\link[patchwork]{wrap_plots}}.
#'   For controlling the number of columns, check `ncol`
#' @param ncol numeric. If `show_plots` is `TRUE`, `ncol` controls the number of column. Default is 1.
#' @param titles the vector of main titles for each graph. By default all is `NA` which means no title.
#' @param trt the column name for planned treatment. Default is `TRT01P`.
#' @param relevel_list A list of vectors that applies a relevel on the `var` levels. It affect the order
#'   of stacked bar plots. It only affects `bar_stack` kinds.
#' @param breaks_vec A list of vectors that breaks the x axis of plots.
#' @param digits integer indicating the number of decimal places to be used. Default is `0`.
#' @return A list of ggplot objects
#' @export
#'
#' @examples
#' \dontrun{
#'
#' load("data/adsl.rda")
#'
#' sl_summary(adsl,
#'           vars = "SEX",
#'           kinds = "bar_binom",
#'           trt = "TRT01P")
#'
#' sl_summary(adsl,
#'           vars = "SEX",
#'           kinds = "bar_binom",
#'           titles = "Female Subjects (%)",
#'           trt = "TRT01P")
#'
#' sl_summary(adsl,
#'           vars = list("SEX" = "M"),  # using `list()` style only works for `bar_binom` kind
#'           kinds = "bar_binom",
#'           titles = "Male Subjects (%)",
#'           trt = "TRT01P")
#'
#'
#' plots <- sl_summary(adsl, vars = c(list("SEX" = "M"), "SEX"),
#'           kinds = c("bar_binom"),
#'           trt = "TRT01P",
#'           show_plots = TRUE,
#'           ncol = 2)
#'
#' plots <- sl_summary(adsl,
#'             vars = c("RACE", "SEX"),
#'             kinds = c("bar_stack"),        # `bar_stack` visualization will be applied for both `SEX` and `RACE`
#'             titles = c("RACE (%)", NA),    # Put `NA` when you don't need to have a main title for your plot
#'             trt = "TRT01P")
#'
#' sl_summary(adsl,
#'            vars = "AGEGR1",
#'            relevel_vec = list(adsl %>% pull(AGEGR1) %>% levels() %>% rev()),
#'            kinds = "bar_stack",        # `bar_stack` visualization will be applied for both `SEX` and `RACE`
#'            titles = "AGE Group 1 (%)",  # Put `NA` when you don't need to have a main title for your plot
#'            trt = "TRT01P")
#'
#' plots <- sl_summary(adsl,
#'             vars = c("SEX", "AGE"),
#'             kinds = c("bar_stack", "box"),
#'             titles = c("Female Subjects (%)", "AGE Groups"),
#'             trt = "TRT01A",
#'             show_plots = FALSE)
#'
#' plots[[1]]
#' patchwork::wrap_plots(plots, ncol = 1)
#'
#' ggplot2::ggsave(
#'        plots[[1]],
#'        filename = here::here("Your_File", "Figure01_sex_f.png"),
#'        width = 4,
#'        height = 2
#'      )
#'
#' }
sl_summary <- function(data, vars, kinds, trt = "TRT01P", titles = NULL, show_plots = TRUE, ncol = 1, relevel_vec = list(NULL), breaks_vec = list(NULL), base_size = 18, digits = 0, ...){


  check_length(vars, kinds)
  check_length(vars, titles)

  if(length(kinds) == 1){
    kinds = rep(kinds, length(vars))
  }

  if(length(titles) == 1){
    titles = rep(titles, length(vars))
  }

  if(length(trt) == 1){
    trt = rep(trt, length(vars))
  }

  if(length(relevel_vec) == 1){
    relevel_vec = rep(relevel_vec, length(vars))
  }

  if(length(breaks_vec) == 1){
    breaks_vec = rep(breaks_vec, length(vars))
  }

  output_objects <- rep(list(NA), length(vars))


  for(i in 1:length(vars)){

    if(kinds[i] == 'box'){
      output_objects[[i]] <- get_box(data, var = vars[i], trt = trt[i], title = titles[i], breaks_vec = breaks_vec[[i]], base_size, digits = digits)
    } else if(kinds[i] == 'bar_binom'){
      output_objects[[i]] <- get_bar_binom(data, var = vars[i], trt = trt[i], title = titles[i], base_size, digits = digits)
    } else if(kinds[i] == 'bar_stack'){
      output_objects[[i]] <- get_bar_stack(data, var = vars[i], trt = trt[i], title = titles[i], base_size, relevel_vec = relevel_vec[[i]], digits = digits)
    }

  }

  output <- if(length(vars) == 1){
    output_objects[[1]]
  } else {
    output_objects
  }

  if(show_plots){
    return(patchwork::wrap_plots(output, ncol = ncol))
  }

  return(output)

}



# utilities =================================================================================


#' Check the length of variables and kinds in the \code{sl_summary()} functions are matched
#' @keywords internal
#' @noRd
check_length <- function(vars, alts){
  if(!is.null(alts) && length(alts)!= 1 && (length(alts) != length(vars))){
    stop(glue::glue("`{alts}` should have length 1 or same as length(vars)."))
  } else {
    return(invisible(NULL))
  }
}



#' Build a ggplot boxplot object for a continuous variable
#'
#' @param data dataframe with variables to plot. It generally is a subject level data (`adsl`).
#' @param var the baseline variable of interest to display
#' @param trt the column name for planned treatment. Default is `TRT01P`.
#' @param title the main title of plot.
#' @param breaks_vec the vector of breaks showing in the x axis.
#' @param digits integer indicating the number of decimal places to be used. Default is `0`.
#'
#' @return ggplot object
#' @keywords internal
#' @noRd
get_box <- function(data, var, trt, title, breaks_vec, base_size = 18, digits = 1) {

  ## Get the variable label for plot axis
  #------------------------------------------------
  all <- sjlabelled::get_label(data)
  #selection <- deparse(substitute(var))
  selection <- var
  xlab <- all[[selection]]

  data %>%
    dplyr::group_by(!!sym(trt), !!sym(var)) %>%
    dplyr::summarize(N = dplyr::n()) %>%
    dplyr::mutate(
      freq = N / sum(N),
      pct = round((freq * 100), digits),
      axis_lab = pct #paste0(pct, "%")
    ) %>%
    group_by(!!sym(trt)) %>%
    mutate(nTRT = sum(N)) %>%
    ungroup() %>%
    # dplyr::mutate(trt_with_label =  paste0(!!sym(trt), "\n", "(n", substr(!!sym(trt),1, 1) %>% toupper(), "=", nTRT,")")) %>%
    dplyr::mutate(trt_with_label =  paste0(!!sym(trt), "\n", "(n", "=", nTRT,")")) %>%
    dplyr::ungroup()


  data_added_labels <- data %>%
    dplyr::group_by(!!sym(trt)) %>%
    dplyr::summarise(nTRT = dplyr::n()) %>%
    dplyr::ungroup() %>%
    #dplyr::mutate(trt_with_label =  paste0(!!sym(trt), "\n", "(n", substr(!!sym(trt),1, 1) %>% toupper(), "=", nTRT,")"))
    dplyr::mutate(trt_with_label =  paste0(!!sym(trt), "\n", "(n", "=", nTRT,")"))


  gg <- data %>%
    select(!!sym(trt), !!sym(var)) %>%
    left_join(data_added_labels %>% select(!!sym(trt), trt_with_label)) %>%
    ggplot(aes(x = !!sym(var),
               y = trt_with_label )) +
    geom_boxplot(alpha = 0.9, width = 0.4) +
    stat_summary(
      fun.y = "mean",
      geom = "point",
      shape = 20,
      size = 4,
      color = "red",
      fill = "white",
      alpha = 0.5
    ) +
    #geom_rug(sides = "b") +
    #theme_light(base_size = 10) +
    theme_minimal(base_size = base_size) +
    # scale_x_continuous(limit = c(min(data[[selection]]), max(data[[selection]])),
    #                    breaks = round(fivenum(data[[selection]]), 0)) +
    xlab(xlab) +
    theme(
      panel.grid.major.y = element_blank(),
      panel.grid.minor = element_blank(),
      axis.text.x = element_text(color = "black"),
      axis.text.y = element_text(color = "black"),
      axis.title = element_blank(),
      plot.title = element_text(hjust = 0.1)
    )

  if(is.null(title)){
    gg <- gg +
          ggtitle(xlab)
  } else {
    gg <- gg +
      ggtitle(title)
  }


  if(is.null(breaks_vec)){
    gg <- gg +
      scale_x_continuous(limit = c(min(data[[selection]]), max(data[[selection]])),
                         breaks = round(fivenum(data[[selection]]), 0))
  } else {
    gg <- gg +
      scale_x_continuous(breaks = breaks_vec)
  }




  return(gg)

}




# sl_summary(adsl,
#            vars = "SEX",
#            kinds = "bar_binom",
#            titles = "Female Subjects (%)",
#            trt = "TRT01P")


#' Build a ggplot bar plot object for a binomial variable
#'
#' @param data dataframe with variables to plot. It generally is a subject level data (`adsl`).
#' @param var the baseline variable of interest to display
#' @param trt the column name for planned treatment. Default is `TRT01P`.
#' @param title the main title of the ggplot
#' @param base_size the \code{base_size} of the ggplot \code{theme_minimal}. Default is 11.
#' @param digits integer indicating the number of decimal places to be used. Default is `0`.
#'
#' @return ggplot object
#' @keywords internal
#' @noRd
get_bar_binom <- function(data, var, trt, title = NA, base_size = 18, digits = 1){

  if(class(var) == 'list' && names(var) != ""){
    var_filter = var[[1]]
    var = names(var)
  } else {
    var <- var %>% unlist()
    var_filter = (data %>% dplyr::count(!!sym(var), sort = T))[1,1] %>% as.character()
  }

  title = if(is.null(title) || is.na(title)){
    glue::glue("{var_filter} subjects (%)")
  } else{
    title
  }


  if(data %>% dplyr::pull(!!sym(var)) %>% unique() %>% length() != 2){
    stop(glue::glue("{var} is not a binary variable."))
  }

  if(is.na(title)){
    title <- NULL
  }


  data %>%
    dplyr::group_by(!!sym(trt), !!sym(var)) %>%
    dplyr::summarize(N = dplyr::n()) %>%
    #ungroup() %>%
    dplyr::mutate(
      freq = N / sum(N),
      pct = round((freq * 100), digits)
    ) %>%
    dplyr::mutate(axis_lab = paste0(pct)) %>%
    group_by(!!sym(trt)) %>%
    mutate(nTRT = sum(N)) %>%
    ungroup() %>%
    #dplyr::mutate(trt_with_label =  paste0(!!sym(trt), "\n", "(n", substr(!!sym(trt),1, 1) %>% toupper(), "=", nTRT,")")) %>%
    dplyr::mutate(trt_with_label =  paste0(!!sym(trt), "\n", "(n", "=", nTRT,")")) %>%
    dplyr::ungroup() %>%
    dplyr::filter(!!sym(var) == var_filter) %>%
    ggplot(aes(x = trt_with_label, y = pct, fill = !!sym(var), label = axis_lab)) +
    geom_hline(yintercept = 0, colour = "wheat4", linetype=1, size=0.6)+
    geom_bar(stat = "identity", size = 0.5, position = position_dodge(width = 1), alpha = 0.8) +
    geom_text(aes(y = pct), position = position_dodge(width = 1), size = 5, vjust = 0, hjust=1.6, color = "white") +
    scale_y_continuous(limit = c(0, 100))+
    theme_minimal(base_size = base_size) +
    coord_flip() +
    xlab("")+
    # ylab("Percentage of subjects (%)") +
    labs(title = title) +
    scale_fill_grey() +
    theme(legend.position = "none",
          panel.grid.major.y = element_blank(),
          panel.grid.minor.y = element_blank(),
          panel.grid.minor.x = element_blank(),
          axis.text.x = element_text(color = "black"),
          axis.text.y = element_text(color = "black"),
          axis.title = element_blank(),
          plot.title = element_text(hjust = 0.1))

}


#' Build a ggplot stacked bar plot object for a binomial or multinomial variable
#'
#' @param data dataframe with variables to plot. It generally is a subject level data (`adsl`).
#' @param var the baseline variable of interest to display
#' @param trt the column name for planned treatment. Default is `TRT01P`.
#' @param title the main title of the ggplot
#' @param base_size the \code{base_size} of the ggplot \code{theme_minimal}. Default is 10.
#' @param relevel_vec A vector of characters that shoes a relevel of the `var` levels. It affect the order
#'   of stacked bar plots.
#' @param digits integer indicating the number of decimal places to be used. Default is `0`.
#'
#' @return ggplot object
#' @keywords internal
#' @noRd
get_bar_stack <- function(data, var, trt, title = NA, base_size = 10, relevel_vec = NULL, digits = 1){

  if(is.na(title)){
    title <- NULL
  }

  if(!is.null(relevel_vec)){
    data <- data %>%
      dplyr::mutate(!!sym(var) := factor(!!sym(var), levels = relevel_vec))
  }


  plot <- data %>%
    dplyr::group_by(!!sym(trt), !!sym(var)) %>%
    dplyr::summarize(N = dplyr::n()) %>%
    dplyr::mutate(
      freq = N / sum(N),
      pct = round((freq * 100), digits),
      axis_lab = pct #paste0(pct, "%")
      ) %>%
    group_by(!!sym(trt)) %>%
    mutate(nTRT = sum(N)) %>%
    ungroup() %>%
    #dplyr::mutate(trt_with_label =  paste0(!!sym(trt), "\n", "(n", substr(!!sym(trt),1, 1) %>% toupper(), "=", nTRT,")")) %>%
    dplyr::mutate(trt_with_label =  paste0(!!sym(trt), "\n", "(n", "=", nTRT,")")) %>%
    dplyr::ungroup() %>%
    ggplot(aes(x = trt_with_label, y = pct, fill = !!sym(var), label = axis_lab, order = !!sym(var))) +
    geom_bar(stat = "identity", position = "stack") +
    scale_y_continuous(limit = c(0, 100))+
    theme_minimal(base_size = base_size) +
    coord_flip() +
    xlab("")+
    ylab("Percentage of subjects (%)") +
    labs(title = title, fill = "") +
    scale_fill_brewer(direction = -1) +
    theme(legend.position = "right",
          panel.grid.major.y = element_blank(),
          panel.grid.minor.y = element_blank(),
          panel.grid.minor.x = element_blank(),
          axis.text.x = element_text(color = "black"),
          axis.text.y = element_text(color = "black"),
          axis.title = element_blank(),
          plot.title = element_text(hjust = 0.5))

  plot

}
DMC21cent/DMC21cent documentation built on May 14, 2023, 5:53 p.m.