esci2: Effect Sizes and Confidence Intervals

Documented in plot_mdiff_contrast_bs

#' Makes a plot from an esci_estimate object
#'
#' This function takes an esci_estimate object and makes a plot emphasizing
#' effect sizes
#'
#' @param estimate A esci_estimate object
#' @param rope - list giving reference value, upper, lower, and units for
#'   plotting a rope
#' @param plot_attributes - a esci_plot_attributes object; obtain one with
#'   my_plot_attributes <- esci_plot_attributes()
#' @param spacing - optional number indicating space ot be left between groups,
#'   defaults to 1
#' @param starting_point - optional number indicating starting point on x axis,
#'   defaults to 1
#' @param difference_gap - optional number indicating distance left between last
#'   group and difference arena
#' @param data_layout - optional; if raw data included specifies ggbeeswarm
#'   layout: random (default), swarm, or none
#' @param data_spread - optional; if raw data included controls spacing in
#'   ggbeeswarm layout, higher values more spread out
#' @param error_layout - optional, tells shape to use for expected error
#'   distribution: halfeye (default), eye, gradient, or none
#' @param error_scale - optional number giving width allocated to expected error
#'   distribution
#' @param error_nudge - optional; if raw data, this number specifies offset for
#'   expected error distribution
#' @param error_normalize - optional; the way error distribution height is
#'   normalized in the graph: groups (default), all, panels
#' @param ylim - optional vector of two numeric elements for desired min and max
#'   on y axis, defaults to c(NA, NA)
#' @param breaks - optional suggested number of breaks desired for y axis.
#'   Defaults to 5
#' @param difference_axis_units - plot difference axis in original units or sds
#' @param difference_axis_breaks - otional suggested number of breaks for
#'   the difference axis.  Defaults to 5.
#' @param y.axis.text - optional number giving font size for y-axis markings,
#'   defaults to 10pt
#' @param y.axis.title - optional number giving font size for y-axis labels,
#'   defaults to 12pt
#' @param x.axis.text - optional number giving font size for x-axis markings,
#'   defaults to 10pt
#' @param x.axis.title - optional number giving font size for x-axis labels,
#'   defaults to 12pt
#' @param ylab - optional label for y axis - defaults to outcome_variable_name
#'   if present in estimate$properties
#' @param xlab - optional label for x axis - defaults to grouping_variable_name
#'   if present in estimate$properties
#' @param ggtheme - an optional ggtheme object to apply to the graph.  Defaults
#'   to theme_classic
#'
#' @return Returns an ggplot2 object
#'
#' @export
plot_mdiff_contrast_bs <- function(
  estimate,
  rope = list(
    reference = NULL, 
    upper = NULL, 
    lower = NULL, 
    rope_units = "raw"
  ),
  plot_attributes = NULL,
  spacing = 1,
  starting_point = 1,
  difference_gap = 2,
  data_layout = c("random", "swarm", "none"), 
  data_spread = 0.25,
  error_layout = c("halfeye", "eye", "gradient", "none"),
  error_scale = 0.3,
  error_nudge = 0.35,
  error_normalize = c("groups", "all", "panels"),
  ylim = c(NA, NA),
  breaks = 5,
  difference_axis_units = c("sd", "raw"),
  difference_axis_breaks = 5,
  y.axis.text = 10,
  y.axis.title = 12,
  x.axis.text = 10,
  x.axis.title = 12,
  ylab = "default",
  xlab = "default",
  ggtheme = NULL
) {
  
  # To do ---------------------------------------------------------------
  # Update all data checks
  # Check that effect_size_number is an integer and valid
  # Check that the contrast number is within the number of contrasts in estimate
  # Check all the color parameters 
  
  
  # Input checks ---------------------------------------------------------------
  esci_assert_type(estimate, "is.estimate")
  data_layout <- match.arg(data_layout)
  error_layout <- match.arg(error_layout)
  error_normalize <- match.arg(error_normalize)
  difference_axis_units <- match.arg(difference_axis_units)
  
  plot_attributes <- esci_plot_attributes(plot_attributes)
  
  if(is.null(ggtheme)) { ggtheme <- ggplot2::theme_classic()}
  
  if (is.null(ylab)) {ylab = "default"}
  if (is.null(xlab)) {xlab = "default"}
  
  
  
  # Type of graphs -----------------------------------------------------------
  # Difference graph:
  #  Shows an effect size as a difference, plotting a reference group,
  #    a comparison group, and an estimated difference
  #  Simple difference - only 2 levels in overview, so no need to plot
  #    overview data because fully represented by reference and comparison
  #    groups
  #    Raw data is plotted with effect_size groups
  #    If raw data, summary of effect size offset by error_nudge
  #  Complex difference - more than 2 levels in overview, so plot overview
  #    as well, organizing by type (unused, reference, or comparison)
  #    Raw data is plotted by the overview level
  #    If raw data, summary of overview offset by error_nudge
  # Simple graph:
  #   Just shows what is in effect size table, no group data, no diff axis
  #   Effect sizes are offset by error_nudge if raw data available
  is_difference <- (estimate$properties$effect_size_category == "Difference")
  is_complex_difference <- (is_difference & nrow(estimate$overview) > 2)
  is_means <- (estimate$properties$effect_size_name == "M")
  plot_raw <- (!is.null(estimate$raw_data) & data_layout != "none")
  
  
  # Data prep -----------------------------------------------------------
  # Data prep: effect_size data
  es_data <- estimate$effect_sizes
  
  
  if (is_difference) {
    # Handles and calculations for plotting a difference in effect size
    weights <- estimate$properties$contrast
    
    if(is.null(weights)) {
      comparison_levels <- es_data[es_data$type == "Comparison", "effect"]
      reference_levels <- NULL
    } else {
      comparison_levels <- names(weights)[which(weights > 0)]
      reference_levels <- names(weights)[which(weights < 0)]
    }
    comparison_count <- length(comparison_levels)
    reference_count <- length(reference_levels)
    
        
    # Store comparison and reference value
    #  and difference CI limits and pooled SD
    comparison_value <- 
      es_data[es_data$type == "Comparison", "effect_size"]
    reference_value <- 
      es_data[es_data$type == "Reference", "effect_size"]
    errorLower <- 
      es_data[es_data$type == "Difference", "lower"]
    errorUpper <- 
      es_data[es_data$type == "Difference", "upper"]
    pooled_sd <- 
      es_data[es_data$type == "Difference", "variability_component"]
    
    # Adjust different effect sizes for sake of plotting
    es_data[es_data$type == "Difference", "effect_size"] <- 
      comparison_value
  } else {
    comparison_levels <- NULL
    reference_levels <- NULL
  }
  
  if(!is.null(es_data)) {
    # Reduce down to just the needed columns
    es_data <- data.frame(
      type = es_data$type,
      level = es_data$effect,
      effect_size = es_data$effect_size,
      df = es_data$df,
      se = es_data$se,
      line_end = NA
    )
    
    es_data$nudge <- 0
    if(plot_raw) {
      es_data[es_data$type != "Difference", ]$nudge <- error_nudge
    }
    
    # Set x values
    if(is_difference) {
      if(is_complex_difference) {
        # Reference will come after overview data
        reference_position <- starting_point + 
          (spacing*(nrow(estimate$overview)-1)) +
          difference_gap
      } else {
        # Reference will be at starting position
        reference_position <- starting_point
      }
      # After reference comes comparison and difference
      comparison_position <- reference_position + spacing
      difference_position <- comparison_position + spacing
      es_data$x <- c(
        comparison_position, 
        reference_position, 
        difference_position
      )
    } else {
      # Not plotting a difference, so just a simple sequence
      es_data$x <- seq(
        from = starting_point, 
        by = spacing, 
        length.out = nrow(es_data)
      )
    }
  }
  
  # Data prep: overview data
  if (is_complex_difference | is_means) {
    # For complex differences, we plot overview data as well
    # We also plot overview if it is means with no contrast
    
    # We won't nudge the contrast data over because raw data won't go with it
    if(!is.null(es_data)) es_data$nudge <- 0
    
    
    # Handle and reduction down to needed columns
    group_data <- data.frame(
      type = NA,
      level = estimate$overview$group,
      effect_size = estimate$overview$m,
      df = estimate$overview$df,
      se = estimate$overview$se,
      line_end = NA,
      nudge = ifelse(plot_raw, error_nudge, 0)
    )

    # Kludge to handle single-sample comparison
    group_data[is.na(group_data$df), "se"] <- .Machine$double.xmin
    group_data[is.na(group_data$df), "df"] <- 1
        
    # Set positions for group data, it will come first
    group_data$x <- seq(
      from = starting_point, 
      by = spacing, 
      length.out = nrow(group_data)
    )

    # Set type: Unused (not in contrast), Comparison, or Reference      
    group_data$type <- "Unused"
    if(is_complex_difference) {
      group_data[group_data$level %in% comparison_levels, ]$type <- "Comparison"
      group_data[group_data$level %in% reference_levels, ]$type <- "Reference"
      # Set line end
      group_data[group_data$type == "Reference", ]$line_end  <- 
        reference_position - ifelse(reference_count > 1, spacing/2, 0)
      group_data[group_data$type == "Comparison", ]$line_end <- 
        comparison_position - ifelse(comparison_count > 1, spacing/2, 0)
      
    } 
    
    if(is_means) {
      group_data[nrow(group_data)+1, names(group_data)] <-
        data.frame(type = "Reference", level = "Reference", 
                   effect_size = NA, df = 1, se = .Machine$double.xmin, 
                   line_end = NA, nudge = 0, 
                   x = max(group_data$x)+difference_gap
                   )
      group_data[nrow(group_data)+1, names(group_data)] <-
        data.frame(type = "Comparison", level = "Comparison", 
                   effect_size = NA, df = 1, se = .Machine$double.xmin, 
                   line_end = NA, nudge = 0, 
                   x = max(group_data$x)+difference_gap
        )
      group_data[nrow(group_data)+1, names(group_data)] <-
        data.frame(type = "Difference", level = "Difference", 
                   effect_size = NA, df = 1, se = .Machine$double.xmin, 
                   line_end = NA, nudge = 0, 
                   x = max(group_data$x)+difference_gap
        )
    }
    
    # Now merge with es_data
    es_data <- rbind(group_data, es_data)  
  }
  
  
  # Data prep: raw data
  if(plot_raw) {
    raw_data <- estimate$raw_data
    # Set x position
    raw_data$x <- es_data$x[
      match(unlist(raw_data$grouping_variable), es_data$level)
    ]
    
    if(is_difference) {
      raw_data$type <- "Unused"
      if (!is.null(comparison_levels)) {
        raw_data[
          raw_data$grouping_variable %in% comparison_levels, ]$type <- 
          "Comparison"
      }
      if (!is.null(reference_levels)) {
        raw_data[raw_data$grouping_variable %in% reference_levels, ]$type <- 
          "Reference"
      }
    } else {
      raw_data$type <- "Comparison"
    }
    
    raw_data$type <- as.factor(raw_data$type)
  }
  
  
  # Data prep: Set graph attributes
  for (myattrib in names(plot_attributes)) {
    es_data[myattrib] <- esci_plot_match_attributes(
      es_data$type, plot_attributes[[myattrib]]
    )
    if (plot_raw) {
      raw_data[myattrib] <- esci_plot_match_attributes(
        raw_data$type, plot_attributes[[myattrib]]
      )
    }
  }
  
  
  # Wrapping up data prep, store x limits
  # Store the beginning and end of the x axis
  x_ending <- max(es_data$x, na.rm = TRUE) + 
    spacing + 
    ifelse(plot_raw, error_nudge, 0)
  x_starting <- min(es_data$x, na.rm = TRUE) - spacing
  

  # The actual graph! -------------------------------------------------------
  
  # Baseplot 
  myplot <- ggplot2::ggplot() + ggtheme
  
  
  # For differences, lines to floating axis 
  if(is_difference) {
    # Draw lines to the floating axis
    myplot <- myplot + ggplot2::geom_segment(
      linetype = "dotted",
      colour = "black",
      ggplot2::aes(
        x = reference_position,
        xend = x_ending,
        y = reference_value,
        yend = reference_value
      )
    )
    myplot <- myplot + ggplot2::geom_segment(
      linetype = "dashed",
      colour = "black",
      ggplot2::aes(
        x = comparison_position,
        xend = x_ending,
        y = comparison_value,
        yend = comparison_value
      )
    )
  } 
  
  # Y axis ------------------------------------------
  # Set expansion factor to default of 0.15 if no ylim passed or 0 if passed
  myexpansion <- c(
    ifelse(is.na(ylim[1]), 0.15, 0),
    ifelse(is.na(ylim[2]), 0.15, 0)
  )
  
  # This is a kludge for ggbeeswarm, which breaks if one of the ylims
  #   is NA!
  if (data_layout == "swarm") {
    if (is.na(ylim[1])) {
      ylim[1] <- min(
        c(es_data$effect_size, get0('raw_data$outcome_variable', ifnotfound = NULL)), 
        na.rm = TRUE
      )
      ylim[1] <- ylim[1] - .10*abs(ylim[1])
    } 
    if (is.na(ylim[2])) {
      ylim[2] <- max(
        c(es_data$effect_size, get0('raw_data$outcome_variable', ifnotfound = NULL)),
        na.rm = TRUE
      )
      ylim[2] <- ylim[2] + .1*abs(ylim[2])
    }
  }
  
  
  # For differences, floating axis -----------------------------------------
  if(is_difference) {
    lowest <- min(
      c(
        es_data$effect_size, 
        ylim[1], ylim[2],
        errorLower + reference_value, errorUpper + reference_value,
        get0('raw_data$outcome_variable', ifnotfound = NULL)
      ), 
      na.rm = TRUE
    )
    highest <-  max(
      c(
        es_data$effect_size, 
        ylim[1], ylim[2],
        errorLower + reference_value, errorUpper + reference_value,
        get0('raw_data$outcome_variable', ifnotfound = NULL)
      ),
      na.rm = TRUE
    )
    
    
    
    if ( (errorUpper + reference_value) > reference_value) {
      rawEnd <- errorUpper
      saxisEnd <- ceiling(errorUpper/pooled_sd)
      if (saxisEnd < 1) saxisEnd = 1
    } else {
      rawEnd <- 0
      saxisEnd <- 0
    }
    
    if ( (errorLower + reference_value) < reference_value) {
      rawStart <- errorLower
      saxisStart <- floor(errorLower/pooled_sd)
      if (saxisStart > -1) saxisStart = -1
    } else {
      rawStart <- 0
      saxisStart <- 0
    }
    

    if (difference_axis_units == "raw") {
      saxisBreaks <- pretty(c(rawStart,rawEnd), n = difference_axis_breaks)
      if(! 0 %in% saxisBreaks) {
        saxisBreaks <- sort(c(0, saxisBreaks))
      }
      slabels <- esci_scaleFUN(saxisBreaks)
    } else {
      saxisBreaks <- pretty(c(saxisStart,saxisEnd), n = difference_axis_breaks)
      slabels <- esci_scaleFUN(saxisBreaks)
      saxisBreaks <- saxisBreaks * pooled_sd
    }

    saxisStart <- reference_value + (saxisBreaks[1])
    saxisEnd <- reference_value + (saxisBreaks[length(saxisBreaks)])
    
    daxis_name <- if(difference_axis_units == "raw")
      "<i>M</i><sub>diff</sub>"
    else
      estimate$standardized_effect_size_properties$d_name_html
    # epart <- 
    daxis_name <- gsub("<sub>", "[", daxis_name)
    daxis_name <- gsub("</sub>", "]", daxis_name)
    daxis_name <- gsub("<i>", "italic(", daxis_name)
    daxis_name <- gsub("</i>", ")", daxis_name)
    daxis_name <- parse(text = daxis_name)
    
    # Make the secondary axis floating
    myplot <- myplot + ggplot2::geom_segment(color="black", 
                                             linetype="solid", 
                                             ggplot2::aes(x=x_ending,
                                                          xend=x_ending,
                                                          y=saxisStart,
                                                          yend=saxisEnd
                                             ), 
                                             size=1
    )
    
    label_placement <- (highest-((saxisStart + saxisEnd)/2)) / (highest - lowest)
    
    # Now define the y-axis
    myplot <- myplot + ggplot2::scale_y_continuous(
      limits = ylim,
      n.breaks = breaks,
      expand = ggplot2::expansion(mult = myexpansion),
      sec.axis = ggplot2::sec_axis(
        name = daxis_name,
        trans = ~.-reference_value,
        breaks = saxisBreaks,
        labels = slabels
      )
    )
    
  } else {
    myplot <- myplot + ggplot2::scale_y_continuous(
      limits = ylim,
      n.breaks = breaks,
      expand = ggplot2::expansion(mult = myexpansion)
    )
  }
  
  
  # Contrast lines, for non-simple differences
  if (is_complex_difference) {
    # First, we plot lines from overview to effect size data
    myplot <- myplot + ggplot2::geom_segment(
      data = es_data[!is.na(es_data$line_end), ], 
      ggplot2::aes(x = x + nudge,
                   xend = line_end,
                   y = effect_size,
                   yend = effect_size,
                   colour = I(summary_colour)
      ),
      linetype = "solid"
    )
    # Next, if needed, join these up
    if(comparison_count > 1) {
      comparison_min <- min(
        es_data[es_data$type == "Comparison", ]$effect_size,
        na.rm = TRUE
      )
      comparison_max <- max(
        es_data[es_data$type == "Comparison", ]$effect_size,
        na.rm = TRUE
      )
      comparison_line_end <- comparison_position - spacing/2
      myplot <- myplot + ggplot2::geom_segment(ggplot2::aes(
        x = c(comparison_line_end, comparison_line_end),
        xend = c(comparison_line_end, comparison_position),
        y = c(comparison_min, comparison_value),
        yend = c(comparison_max, comparison_value),
      ),
      linetype = "solid",
      colour = plot_attributes$summary_colour$Comparison
      )
    }
    if(reference_count > 1) {
      reference_min <- min(
        es_data[es_data$type == "Reference", ]$effect_size,
        na.rm = TRUE
      )
      reference_max <- max(
        es_data[es_data$type == "Reference", ]$effect_size,
        na.rm = TRUE
      )
      reference_line_end <- reference_position - spacing/2
      myplot <- myplot + ggplot2::geom_segment(ggplot2::aes(
        x = c(reference_line_end, reference_line_end),
        xend = c(reference_line_end, reference_position),
        y = c(reference_min, reference_value),
        yend = c(reference_max, reference_value)
      ),
      linetype = "solid",
      colour = plot_attributes$summary_colour$Reference
      )
    }
  }
  
  
  # Effect size locations and sampling error 
  e_dist <- estimate$properties$error_distribution
  if (e_dist == "dist_normal") {
    dist_call <- "dist = distributional::dist_normal(
      mu = effect_size, 
      sigma = se
     )"
  } else {
    dist_call <- "dist = distributional::dist_student_t(
      df = df, 
      mu = effect_size, 
      sigma = se
     )"
  }
  
  error_glue <- 
    "myplot <- myplot + {error_call}(
    data = es_data,
    orientation = 'vertical',
    ggplot2::aes(
      x = x+nudge, 
      y = effect_size,
      dist = distributional_wrapper(
        call = e_dist,
        mu = effect_size,
        sigma = se,
        df = df
      ),
      shape = I(summary_shape),
      point_size = I(summary_size),
      point_colour = I(summary_colour),
      point_fill = I(summary_fill),
      point_alpha = I(summary_alpha),
      interval_colour = I(ci_colour),
      interval_alpha = I(ci_alpha),
      interval_size = I(ci_size),
      interval_linetype = I(ci_linetype),
      fill = I(error_fill),
      alpha = I(error_alpha)
    ),
    scale={error_scale},
    .width = c(estimate$properties$conf_level),
    normalize = '{error_normalize}'
    )"
  e_dist <- estimate$properties$error_distribution
  
  error_call <- esci_plot_error_layouts(error_layout)
  error_expression <- parse(text = glue::glue(error_glue))
  myplot <- try(eval(error_expression))
  
  
  # Raw data 
  if (plot_raw) {
    raw_glue <- 
      "myplot <- myplot + {raw_call$call}(
        data = raw_data, 
        ggplot2::aes(
          x = x,
          y = outcome_variable,
          shape = I(data_shape),
          size = I(data_size),
          colour = I(data_colour),
          fill = I(data_fill),
          alpha = I(data_alpha)
        )
        {raw_call$extras}
      )"
    raw_call <- esci_plot_data_layouts(data_layout, data_spread)
    raw_expression <- parse(text = glue::glue(raw_glue))
    myplot <- try(eval(raw_expression))
  }  
  
  
  
  # Remove legend
  myplot <- myplot + ggplot2::theme(
    legend.position = "none", 
    axis.title.x = ggplot2::element_text(size = x.axis.title),
    axis.title.y = ggplot2::element_text(size = y.axis.title),
    axis.text.x = ggplot2::element_text(size = x.axis.text),
    axis.text.y = ggplot2::element_text(size = y.axis.text),
    axis.line.y.right = ggplot2::element_blank(),
    axis.title.y.right = ggplot2::element_text(hjust = label_placement)
  )
  
  # X labels
  if (is_difference) {
    group_labels_spaced <- gsub(" and ", "\nand\n", es_data$level)
    group_labels_spaced[[length(group_labels_spaced)]] <- "Difference"
  } else {
    group_labels_spaced <- es_data$level
  }
  myplot <- myplot + ggplot2::scale_x_continuous(
    limits = c(x_starting, x_ending),
    breaks = es_data$x, 
    labels = group_labels_spaced,
    expand = ggplot2::expansion(add = c(0, 0))
  )
  
  # Axis labels
  myx <- if(xlab == "default") 
    estimate$properties$grouping_variable_name 
  else 
    xlab
  myy <- if(ylab != "default") {
    ylab    
  } else if (is_difference | is_means) {
    estimate$properties$outcome_variable_name 
  } else {
    estimate$properties$effect_size_name
  }
  
  myplot <- myplot + ggplot2::labs(x = myx, y = myy)
  
  return(myplot)
}