#' Function produces a scatter plot of median versus spread across a variable's
#' factor levels.
#'
#' The spread is defined as the difference between the 75th and 25th quartiles.
#' Function returns a named list with a data.table of each level's median, quartile values,
#' and a ggplot2 scatter plot with medians along the x axis and spreads along the y axis.
#'
#' @param df The required target data frame with a measure variable and a factor variable
#' with multiple levels.
#' @param meas_var A required string that names the measure variable from \code{df}.
#' @param factor_var A required string that names the factor variable from \code{df}.
#' @param plot_line_fit A logical which if TRUE plots a line fit between median and spread values.
#' @param plot_log10 A logical which if TRUE will plot log10 values of median versus spread
#' instead of raw values.
#' @param title A string that sets the plot title.
#' @param subtitle A string that sets the plot subtitle.
#' @param caption A string that sets the plot caption
#' @param center_titles A logical which if \code{TRUE} centers both the \code{title} and \code{subtitle}.
#' @param x_title A string that sets the x axis title. If NULL then the x axis title does not appear.
#' @param y_title A string that sets the y axis title. If NULL then the y axis title does not appear.
#' @param x_limits A numeric 2 element vector that sets the minimum
#' and maximum for the x axis.
#' @param x_major_breaks A numeric vector or function that defines
#' the exact major tic locations along the x axis.
#' @param y_limits A numeric 2 element vector that sets the minimum and maximum for the y axis.
#' @param y_major_breaks A numeric vector or function that defines the exact major tic locations along the y axis.
#' @param label_pts A logical which if TRUE will label the plot points.
#'
#' @import ggplot2
#' @importFrom data.table as.data.table
#'
#' @return A named list object with a data.table and ggplot2 scatter plot.
#'
#' @author Rick Dean
#'
#' @export
spread_level_plot <- function(
df,
meas_var,
factor_var,
plot_line_fit = TRUE,
plot_log10 = TRUE,
title = NULL,
subtitle = NULL,
caption = NULL,
center_titles = FALSE,
x_title = "Median",
y_title = "Spread",
x_limits = NULL,
x_major_breaks = waiver(),
y_limits = NULL,
y_major_breaks = waiver(),
label_pts = TRUE
){
dt <- data.table::as.data.table(df)
ans_dt <- dt[,.(
M = stats::median(get(meas_var)),
q_lo = stats::quantile(get(meas_var), probs = 0.25),
q_hi = stats::quantile(get(meas_var), probs = 0.75)),
by = mget(factor_var)]
ans_dt[, `:=`(
spread = q_hi - q_lo,
median_log = log10(M),
spread_log = log10(q_hi - q_lo)
)]
spread_level_lm <- lm(spread_log ~ 0 + median_log, data = ans_dt)
ans_dt[, fitted_val := spread_level_lm$fitted.values]
aes_x <- "M"
aes_y <- "spread"
aes_label <- NULL
if(plot_log10){
aes_x <- "median_log"
aes_y <- "spread_log"
}
if(label_pts){
aes_label = factor_var
}
scatter_plot <- RplotterPkg::create_scatter_plot(
df = ans_dt,
aes_x = aes_x,
aes_y = aes_y,
aes_label = aes_label,
aes_label_size = 3,
aes_label_nudge_y = 0.015,
title = title,
subtitle = subtitle,
caption = caption,
center_titles = center_titles,
x_title = x_title,
y_title = y_title,
x_limits = x_limits,
x_major_breaks = x_major_breaks,
y_limits = y_limits,
y_major_breaks = y_major_breaks
)
if(plot_line_fit){
scatter_plot <- scatter_plot + geom_line(aes(y = fitted_val), color = "red")
}
return(
list(
df = ans_dt,
spread_level_lm = spread_level_lm,
scatter_plot = scatter_plot
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.