R/plot_plans.R

Defines functions redist.plot.trace redist.plot.plans redist.plot.distr_qtys redist.plot.scatter hist.redist_plans redist.plot.hist plot.redist_plans is_const_num

Documented in hist.redist_plans plot.redist_plans redist.plot.distr_qtys redist.plot.hist redist.plot.plans redist.plot.scatter redist.plot.trace

##############################################
## Author: Cory McCartan
## Institution: Harvard University
## Date Created: 2021/01/28
## Purpose: redist functions for a tidy workflow
##############################################


# helper: TRUE if `x is constant within `grps`
is_const_num <- function(x, grps) {
    all(tapply(x, grps, FUN = function(y) diff(range(y)) == 0))
}

#' Summary plots for \code{\\link{redist_plans}}
#'
#' If no arguments are passed, defaults to plotting the sampling weights for
#' the \code{\link{redist_plans}} object. If no weights exist, plots district
#' populations.
#'
#' @param x the \code{redist_plans} object.
#' @param ... passed on to the underlying function
#' @param type the name of the plotting function to use. Will have
#' \code{redist.plot.}, prepended to it; e.g., use \code{type="plans"} to call
#' \code{\link{redist.plot.plans}}.
#'
#' @concept plot
#' @export
plot.redist_plans <- function(x, ..., type = "distr_qtys") {
    if (rlang::dots_n(...) == 0) {
        wgts <- get_plans_weights(subset_sampled(x, matrix = FALSE))
        if (is.null(wgts))
            return(redist.plot.distr_qtys(x, total_pop, size = 0.1))
        n <- length(wgts)
        iqr <- IQR(wgts)
        bins <- min(max(round(diff(range(wgts))/(2*iqr/n^(1/3))), 3), 100)
        if (iqr == 0) bins <- 3

        ggplot(NULL, aes(x = wgts)) +
            geom_histogram(bins = bins) +
            ggplot2::scale_x_continuous(name = "Weights", trans = "log10") +
            ggplot2::labs(y = NULL, title = "Plan weights")
    } else {
        get(paste0("redist.plot.", type))(x, ...)
    }
}

#' Plot a histogram of a summary statistic
#'
#' Plots a histogram of a statistic of a \code{\link{redist_plans}} object,
#' with a reference line for each reference plan, if applicable.
#'
#' @param plans the \code{redist_plans} object.
#' @param qty \code{\link[dplyr:dplyr_data_masking]{<data-masking>}} the statistic.
#' @param bins the number of bins to use in the histogram. Defaults to Freedman-Diaconis rule.
#' @param ... passed on to \code{\link[ggplot2]{geom_histogram}}
#'
#' @returns A ggplot
#'
#' @examples
#' library(dplyr)
#' data(iowa)
#'
#' iowa <- redist_map(iowa, existing_plan = cd_2010, pop_tol = 0.05)
#' plans <- redist_smc(iowa, nsims = 100, silent = TRUE)
#' group_by(plans, draw) %>%
#'     summarize(pop_dev = max(abs(total_pop/mean(total_pop) - 1))) %>%
#'     redist.plot.hist(pop_dev)
#'
#' @concept plot
#' @export
redist.plot.hist <- function(plans, qty, bins = NULL, ...) {
    if (!inherits(plans, "redist_plans")) cli_abort("{.arg plans} must be a {.cls redist_plans}")
    if (missing(qty))
        cli_abort("Must provide a {.arg qty} to make the histogram from.")

    val <- rlang::eval_tidy(rlang::enquo(qty), plans)
    rg <- diff(range(val, na.rm = TRUE))
    is_int <- isTRUE(all.equal(as.integer(val), val)) && rg <= 100
    if (is.null(bins)) {
        if (is_int) {
            bins <- 3*rg + 1
        } else {
            n <- length(val)
            if (is_const_num(val, plans$draw)) {
                n <- n/nrow(get_plans_matrix(plans))
            }
            bins <- ceiling(2.4*n^(1/3)) # Modified Rice rule
        }
    }

    percent <- function(x) sprintf("%1.0f%%", 100*x)
    p <- ggplot(subset_sampled(plans, matrix = FALSE), aes({{ qty }})) +
        ggplot2::geom_histogram(aes(y = ggplot2::after_stat(density*width)), ...,
            boundary = 0.5*is_int, bins = bins) +
        ggplot2::scale_y_continuous(expand = ggplot2::expansion(mult = c(0, 0.05)),
            labels = percent) +
        labs(y = "Fraction of plans")
    if (get_n_ref(plans) > 0)
        p <- p + labs(color = "Plan") +
            ggplot2::geom_vline(aes(xintercept = {{ qty }}, color = .data$draw),
                linewidth = 1.15, data = subset_ref(plans))
    p
}

#' @rdname redist.plot.hist
#' @param x \code{\link[dplyr:dplyr_data_masking]{<data-masking>}} the statistic.
#' @export
hist.redist_plans <- function(x, qty, ...) {
    if (missing(qty))
        cli_abort("Must provide a {.arg qty} to make the histogram from.")
    qty <- rlang::enquo(qty)
    redist.plot.hist(x, !!qty, ...)
}

#' Scatter plot of plan summary statistics
#'
#' Makes a scatterplot of two quantities of interest across districts or plans.
#'
#' @param plans the \code{redist_plans} object.
#' @param x \code{\link[dplyr:dplyr_data_masking]{<data-masking>}} the
#' quantity to plot on the horizontal axis.
#' @param y \code{\link[dplyr:dplyr_data_masking]{<data-masking>}} the
#' quantity to plot on the vertical axis.
#' @param ... passed on to \code{\link[ggplot2:geom_point]{geom_point}}.
#' @param bigger if TRUE, make the point corresponding to the reference plan larger.
#'
#' @returns A ggplot
#'
#' @examples
#' library(dplyr)
#' data(iowa)
#'
#' iowa <- redist_map(iowa, existing_plan = cd_2010, pop_tol = 0.05, total_pop = pop)
#' plans <- redist_smc(iowa, nsims = 100, silent = TRUE)
#' plans %>%
#'     mutate(comp = distr_compactness(iowa)) %>%
#'     group_by(draw) %>%
#'     summarize(pop_dev = max(abs(total_pop/mean(total_pop) - 1)),
#'         comp = comp[1]) %>%
#'     redist.plot.scatter(pop_dev, comp)
#'
#' @concept plot
#' @export
redist.plot.scatter <- function(plans, x, y, ..., bigger = TRUE) {
    if (!inherits(plans, "redist_plans")) cli_abort("{.arg plans} must be a {.cls redist_plans}")

    p <- ggplot(subset_sampled(plans, matrix = FALSE), aes(x = {{ x }}, y = {{ y }})) +
        ggplot2::geom_point(...)
    if (get_n_ref(plans) > 0) {
        p <- p + labs(color = "Plan")
        if (bigger) {
            p <- p + ggplot2::geom_point(aes(color = .data$draw), shape = 15,
                size = 3, data = subset_ref(plans))
        } else {
            p <- p + ggplot2::geom_point(aes(color = .data$draw), shape = 15,
                data = subset_ref(plans))
        }
    }

    p
}

#' Plot quantities by district
#'
#' Plots a boxplot of a quantity of interest across districts, with districts
#' optionally sorted by this quantity. Adds reference points for each reference
#' plan, if applicable.
#'
#' @param plans the \code{redist_plans} object.
#' @param qty \code{\link[dplyr:dplyr_data_masking]{<data-masking>}} the
#' quantity of interest.
#' @param sort set to \code{"asc"} to sort districts in ascending order of
#' \code{qty} (the default), \code{"desc"} for descending order, or
#' \code{FALSE} or \code{"none"} for no sorting.
#' @param geom the `ggplot2` geom to use in plotting the simulated districts: either
#' \code{"jitter"} or \code{"boxplot"}. Can also take in a function, so long as the
#' function accepts `...`.
#' @param color_thresh if a number, the threshold to use in coloring the points.
#' Plans with quantities of interest above the threshold will be colored
#' differently than plans below the threshold.
#' @param size The dot size for \code{geom="jitter"}.
#' @param ref_geom The reference plan geometry type. \code{"line"} or \code{"point"}
#' can be passed for reasonable defaults. Can also take in a function, so long as the
#' function accepts `...`.
#' @param ref_label A human-readable name for the reference plan. By default
#' the name in the `plan` column is used. This can also take in a function which returns
#' a call to `ggplot2::labs()`.
#' @param ... passed on to \code{\link[ggplot2]{geom_boxplot}}
#'
#' @returns A ggplot
#'
#' @section Using `ggdist`:
#'
#' For custom functions in `geom`, we can also create more complicated things like rainclouds
#' using the `ggdist` package. For example:
#'
#' ```r
#' raincloud <- function(...) {
#' list(
#'     ggdist::stat_slab(aes(thickness = ggplot2::after_stat(pdf*n)), scale = 0.7),
#'    ggdist::stat_dotsinterval(side = "bottom", scale = 0.7,
#'                              slab_size = NA, quantiles = 200)
#' )
#' }
#' ```
#' These functions can be then passed to `geom`.
#'
#' @examples
#' library(dplyr)
#' data(iowa)
#'
#' iowa <- redist_map(iowa, existing_plan = cd_2010, pop_tol = 0.05, total_pop = pop)
#' plans <- redist_smc(iowa, nsims = 100, silent = TRUE)
#' plans <- plans %>% mutate(pct_dem = group_frac(iowa, dem_08, tot_08))
#' redist.plot.distr_qtys(plans, pct_dem)
#'
#' # It also takes custom functions:
#' redist.plot.distr_qtys(plans, pct_dem, geom = ggplot2::geom_violin)
#'
#' # With the raincloud example, if you have `ggdist`, you can run:
#' # redist.plot.distr_qtys(plans, pct_dem, geom = raincloud)
#'
#' # The reference geom can also be changed via `reg_geom`
#' r_geom <- function(...) ggplot2::geom_segment(ggplot2::aes(as.integer(.data$.distr_no) - 0.5,
#'                           xend = as.integer(.data$.distr_no) + 0.5,
#'                           yend = pct_dem,
#'                           color = .data$draw),
#'                       linewidth = 1.2, ...)
#'
#'
#'
#' # Finally, the `ref_label` argument can also be swapped for a function, like so:
#' redist.plot.distr_qtys(plans, pct_dem, geom = ggplot2::geom_violin, ref_geom = r_geom,
#'     ref_label = function() ggplot2::labs(color = 'Ref.'))
#'
#' @concept plot
#' @export
redist.plot.distr_qtys <- function(plans, qty, sort = "asc", geom = "jitter",
                                   color_thresh = NULL, size = 0.1, ref_geom,
                                   ref_label, ...) {
    if (!inherits(plans, "redist_plans")) cli_abort("{.arg plans} must be a {.cls redist_plans}")

    if (isFALSE(sort) || sort == "none") {
        plans <- dplyr::group_by(plans, .data$draw) %>%
            dplyr::mutate(.distr_no = as.factor(.data$district))
    } else {
        ord <- if (sort == "asc") 1  else if (sort == "desc") -1 else
            cli_abort("{.arg sort} not recognized: {.code {sort}}")
        plans <- dplyr::group_by(plans, .data$draw) %>%
            dplyr::mutate(.distr_no = as.factor(rank(ord*{{ qty }},
                ties.method = "first")))
    }

    val <- eval_tidy(enquo(qty), plans)
    if (is_const_num(val, plans$draw)) {
        cli_warn(c("{.arg {rlang::as_label(enquo(qty))}} is constant across districts. ",
            "Consider using {.fun hist} instead."))
    }

    pl_samp <- as.data.frame(subset_sampled(plans, matrix = FALSE))
    if (is.null(color_thresh)) {
        p <- ggplot(pl_samp, aes(.data$.distr_no, {{ qty }}))
    } else {
        if (!is.numeric(color_thresh)) cli_abort("{.arg color_thresh} must be numeric.")
        p <- ggplot(pl_samp, aes(.data$.distr_no, {{ qty }},
            color = {{ qty }} >= color_thresh)) +
            ggplot2::guides(color = "none")
    }

    if (is.character(geom)) {
        if (geom == "jitter") {
            geom <- function(...) ggplot2::geom_jitter(size = size, ...)
            if (missing(ref_geom)) {
                if (is.null(color_thresh)) {
                    ref_geom <- function(...) {
                        ggplot2::geom_segment(aes(as.integer(.data$.distr_no) - 0.5,
                                                  xend = as.integer(.data$.distr_no) + 0.5,
                                                  yend = {{ qty }},
                                                  color = .data$draw),
                                              linewidth = 1.2, ...)
                    }
                   if (missing(ref_label))
                       ref_label <- function() labs(color = "Plan", shape = "Plan")
                } else{
                    ref_geom <- function(...) {
                        ggplot2::geom_point(aes(color = .data$draw), shape = 15,
                                            size = 2, ...)
                    }
                    if (missing(ref_label))
                        ref_label <- function() labs(color = "Plan", shape = "Plan")
                }
            }
        } else if (geom == "boxplot") {
           geom <- function(...) ggplot2::geom_boxplot(..., outlier.size = 1)
           if (missing(ref_geom)) {
               if (is.null(color_thresh)) {
                   ref_geom <- function(...) {
                       ggplot2::geom_segment(
                           aes(x = as.integer(.data$.distr_no) - 0.5,
                               xend = as.integer(.data$.distr_no) + 0.5,
                               yend = {{ qty }},
                               color = .data$draw
                           ), ...,
                           linewidth = 1.2)
                   }
                   if (missing(ref_label))
                       ref_label <- function() labs(color = "Plan")
               } else{
                   ref_geom <- function(...) {
                       ggplot2::geom_point(aes(shape = .data$draw), color = "black",
                                           size = 2, ...)
                   }
                   if (missing(ref_label))
                       ref_label <- function() labs(shape = "Plan")
               }
           }
        } else {
            cli_abort("{.arg geom} must be either \"jitter\" or \"boxplot\"")
        }
    }

    p <- p + geom(...)

    if (isFALSE(sort) || sort == "none")
        p <- p + labs(x = "District")
    else
        p <- p + labs(x = "Ordered district")

    if (get_n_ref(plans) > 0) {
        pl_ref <- as.data.frame(subset_ref(plans, matrix = FALSE))
        if (missing(ref_geom))
            ref_geom <- function(...) ggplot2::geom_point(
                aes(color = .data$draw), shape = 15, size = 2, ...
                )
        if (missing(ref_label)) ref_label <- function() labs(color = "Plan", shape = "Plan")
        p <- p + ref_geom(data = pl_ref) + ref_label()
    }

    p
}

#' Plot a district assignment
#'
#' @param plans a \code{redist_plans} object.
#' @param draws the plan(s) to plot. Will match the \code{draw} column of \code{x}.
#' @param geom,shp the \code{redist_map} geometry to use (`geom` is deprecated).
#' @param qty the quantity to plot. Defaults to the district assignment.
#' @param interactive if \code{TRUE}, show an interactive map in the viewer
#' rather than a static map. Only uses the first element of \code{draws}
#' @param ... additional arguments passed to the plotting functions.
#'
#' @returns A ggplot
#'
#' @examples
#' library(dplyr)
#' data(iowa)
#'
#' iowa <- redist_map(iowa, existing_plan = cd_2010, pop_tol = 0.05, total_pop = pop)
#' plans <- redist_smc(iowa, nsims = 100, silent = TRUE)
#' redist.plot.plans(plans, c(1, 2, 3, 4), iowa)
#'
#' @concept plot
#' @export
redist.plot.plans <- function(plans, draws, shp, qty = NULL, interactive = FALSE, ..., geom = NULL) {
    if (!missing(geom)) {
        .Deprecated("shp", old = "geom")
        if (missing(shp)) shp <- geom
    }
    if (!inherits(plans, "redist_plans")) cli_abort("{.arg plans} must be a {.cls redist_plans}")
    m <- get_plans_matrix(plans)
    if (nrow(shp) != nrow(m))
        cli_abort("{.arg plans} and {.arg shp} must have the same number of precincts.")

    if (interactive) {
        if (length(draws) > 1)
            cli_warn("Only first of {.arg draws} plotted when {.arg interactive = TRUE}")

        draw <- draws[1]
        draw_idx <- match(as.character(draw), levels(plans$draw))
        qty <- eval_tidy(enquo(qty), plans[plans$draw == as.character(draw), ])
        if (is.null(qty)) {
            qty <- as.factor(m[, draw_idx])
        } else {
            qty <- qty[m[, draw_idx]]
        }
        return(redist.plot.interactive(shp, fill = qty, ...))
    }

    plot_single <- function(draw) {
        draw_idx <- match(as.character(draw), levels(plans$draw))
        lab <- rlang::quo_text(enquo(qty))
        title <- if (suppressWarnings(is.na(as.numeric(draw)))) draw else paste0("Plan #", draw)

        qty <- eval_tidy(enquo(qty), plans[plans$draw == as.character(draw), ])
        if (is.null(qty)) {
            qty <- as.factor(m[, draw_idx])
        } else {
            qty <- qty[m[, draw_idx]]
        }

        redist.plot.map(shp, fill = qty, fill_label = lab, ...) +
            ggplot2::labs(title = title)
    }

    if (length(draws) == 1) {
        plot_single(draws)
    } else {
        plots <- lapply(draws, plot_single)
        patchwork::wrap_plots(plots)
    }
}

#' Make a traceplot for a summary statistic
#'
#' For a statistic in a \code{\link{redist_plans}} object,
#' make a traceplot showing the evolution of the statistic over MCMC iterations.
#'
#' @param plans the \code{redist_plans} object.
#' @param qty \code{\link[dplyr:dplyr_data_masking]{<data-masking>}} the statistic.
#' @param district for \code{redist_plans} objects with multiple districts,
#' which \code{district} to subset to for plotting. Set to \code{NULL} to
#' perform no subsetting.
#' @param ... passed on to \code{\link[ggplot2]{geom_line}}
#'
#' @returns A ggplot
#'
#' @examples
#' library(dplyr)
#' data(iowa)
#'
#' iowa_map <- redist_map(iowa, existing_plan = cd_2010, pop_tol = 0.05)
#' plans <- redist_mergesplit_parallel(iowa_map, nsims = 200, chains = 2, silent = TRUE) %>%
#'     mutate(dem = group_frac(iowa_map, dem_08, dem_08 + rep_08)) %>%
#'     number_by(dem)
#' redist.plot.trace(plans, dem, district = 1)
#'
#' @concept plot
#' @export
redist.plot.trace <- function(plans, qty, district = 1L, ...) {
    if (!"chain" %in% names(plans)) plans$chain <- 1
    plans <- as.data.frame(plans) %>%
        filter(!is.na(.data$chain)) %>%
        mutate(chain = as.factor(.data$chain)) %>%
        arrange(.data$draw)
    if (!is.null(district) && "district" %in% names(plans)) {
        plans <- plans[plans$district == district, ]
    }

    plans <- group_by(plans, .data$chain) %>%
        mutate(.draw = seq_len(n()))

    ggplot(plans, aes(.data$.draw, {{ qty }}, color = .data$chain)) +
        ggplot2::geom_line(...) +
        ggplot2::labs(x = "Iteration", color = "Chain")
}

utils::globalVariables(c("density", "width", "total_pop"))

Try the redist package in your browser

Any scripts or data that you put into this service are public.

redist documentation built on April 3, 2023, 5:46 p.m.