Nothing
#' Produces the paired probability plot for two groups
#'
#' The paired probability plot maps the probability of obtaining a specific
#' score for each of two groups. The area under the curve
#' (\code{\link{auc}}) corresponds to the probability that a randomly
#' selected observation from the x-axis group will have a higher score than
#' a randomly selected observation from the y-axis group. This function
#' extends the basic pp-plot by allowing multiple curves and faceting to
#' facilitate a variety of comparisons. Note that because the plotting is
#' built on top of \link[ggplot2]{ggplot2}, additional customization can
#' be made on top of the plots, as illustrated in the examples.
#'
#' @param data The data frame to be plotted
#' @param formula A formula of the type \code{out ~ group} where \code{out} is
#' the outcome variable and \code{group} is the grouping variable. Note this
#' variable can include any arbitrary number of groups. Additional variables
#' can be included with \code{+} to produce separate plots by the secondary or
#' tertiary variable of interest (e.g., \code{out ~ group + characteristic1 +
#' characteristic2}). No more than two additional characteristics can be
#' supplied at this time.
#' @param ref_group Optional character vector (of length 1) naming the
#' reference group. Defaults to the group with the highest mean score.
#' @param cuts Integer. Optional vector (or single number) of scores used to
#' annotate the plot. If supplied, line segments will extend from the
#' corresponding x and y axes and meet at the PP curve.
#' @param cut_labels Logical. Should the reference lines corresponding to
#' \code{cuts} be labeled? Defaults to \code{TRUE}.
#' @param cut_label_x The x-axis location of the cut labels. Defaults to 0.02.
#' @param cut_label_size The size of the cut labels. Defaults to 3.
#' @param lines Logical. Should the PP Lines be plotted? Defaults to
#' \code{TRUE}.
#' @param linetype The \link[ggplot2]{linetype} for the PP lines. Defaults to
#' "solid".
#' @param linewidth The width of the PP lines. Defaults to 1.1 (just
#' marginally larger than the default ggplot2 lines).
#' @param shade Logical. Should the area under the curve be shaded? Defaults to
#' \code{TRUE}.
#' @param shade_alpha Transparency of the shading. Defaults to 0.2.
#' @param refline Logical. Should a diagonal reference line be plotted,
#' representing the value at which no difference is observed between the
#' reference and focal distributions? Defaults to \code{TRUE}.
#' @param refline_col Color of the reference line. Defaults to a dark gray.
#' @param refline_type The \link[ggplot2]{linetype} for the reference line.
#' Defaults to "dashed".
#' @param refline_width The width of the reference line. Defaults to 1, or
#' just slightly thinner than the PP lines.
#' @return A \link[ggplot2]{ggplot2} object displaying the specified PP plot.
#' @export
#' @examples
#' # PP plot examining differences by condition
#' pp_plot(star, math ~ condition)
#'
#' # The sample size gets very small in the above within cells (e.g., wild
#' # changes within the "other" group in particular). Overall, the effect doesn't
#' # seem to change much by condition.
#'
#' # Look at something a little more interesting
#' \dontrun{
#' pp_plot(benchmarks, math ~ ell + season + frl)
#' }
#' # Add some cut scores
#' pp_plot(benchmarks, math ~ ell, cuts = c(190, 210, 215))
#'
#' ## Make another interesting plot. Use ggplot to customize
#' \dontrun{
#' library(tidyr)
#' library(ggplot2)
#' benchmarks %>%
#' gather(subject, score, reading, math) %>%
#' pp_plot(score ~ ell + subject + season,
#' ref_group = "Non-ELL") +
#' scale_fill_brewer(name = "ELL Status", palette = "Pastel2") +
#' scale_color_brewer(name = "ELL Status", palette = "Pastel2") +
#' labs(title = "Differences among English Language Learning Groups",
#' subtitle = "Note crossing of reference line") +
#' theme_minimal()
#' }
#'
pp_plot <- function(data, formula, ref_group = NULL, cuts = NULL,
cut_labels = TRUE, cut_label_x = 0.02, cut_label_size = 3,
lines = TRUE, linetype = "solid", linewidth = 1.1,
shade = TRUE, shade_alpha = 0.2, refline = TRUE,
refline_col = "gray40", refline_type = "dashed",
refline_width = 1.1) {
rhs <- labels(terms(formula))
lhs <- all.vars(formula)[1]
if(is.null(ref_group)) {
group_means <- tapply(data[[lhs]], data[[rhs[1]]], mean, na.rm = TRUE)
ref_group <- names(group_means)[which.max(group_means)]
}
d <- paired_ecdf(data, formula, cuts) %>%
unnest(cols = .data$matched) %>%
filter(!!sym(rhs[1]) == ref_group)
if(length(rhs) == 2) {
d <- filter(d, !!sym(rhs[2]) == !!sym(paste0(rhs[2], 1)))
}
if(length(rhs) == 3) {
d <- filter(d,
!!sym(rhs[2]) == !!sym(paste0(rhs[2], 1)),
!!sym(rhs[3]) == !!sym(paste0(rhs[3], 1)))
}
p <- ggplot(d, aes_(quote(y_foc), quote(y_ref)))
if(shade) {
p <- p +
geom_ribbon(aes_(fill = as.name(paste0(rhs[1], 1)),
ymin = -Inf,
ymax = quote(y_ref)),
alpha = shade_alpha)
}
if(refline) {
p <- p + geom_abline(intercept = 0,
slope = 1,
color = refline_col,
linetype = refline_type,
size = refline_width)
}
if(lines) {
p <- p + geom_line(aes_(color = as.name(paste0(rhs[1], 1))),
linetype = linetype,
size = linewidth)
}
if(!is.null(cuts)) {
cut_data <- d %>%
filter(.data$x %in% cuts)
p <- p +
geom_segment(aes_(x = quote(y_foc),
xend = quote(y_foc),
y = -Inf,
yend = quote(y_ref),
color = as.name(paste0(rhs[1], 1))),
cut_data) +
geom_segment(aes_(x = -Inf,
xend = quote(y_foc),
y = quote(y_ref),
yend = quote(y_ref),
color = as.name(paste0(rhs[1], 1))),
cut_data)
if(cut_labels) {
p <- p +
geom_label(aes_(x = 0.02,
y = quote(y_ref),
label = quote(x)),
cut_data,
size = 3)
}
}
if(length(rhs) == 2) {
p <- p + facet_wrap(as.formula(paste0("~", rhs[2])))
}
if(length(rhs) == 3) {
p <- p + facet_grid(as.formula(paste0(rhs[2], "~", rhs[3])))
}
p + labs(x = "Focal Group",
y = ref_group)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.