R/redist_plans.R

Defines functions print.redist_plans rbind.redist_plans dplyr_reconstruct.redist_plans dplyr_row_slice.redist_plans get_mh_acceptance_rate subset_ref subset_sampled get_sampling_info get_n_ref weights.redist_plans get_plans_weights set_plan_matrix as.matrix.redist_plans get_plans_matrix redist_plans reconstruct.redist_plans validate_redist_plans new_redist_plans

Documented in as.matrix.redist_plans get_mh_acceptance_rate get_plans_matrix get_plans_weights get_sampling_info print.redist_plans rbind.redist_plans redist_plans subset_ref subset_sampled weights.redist_plans

##############################################
## Author: Cory McCartan
## Institution: Harvard University
## Date Created: 2021/01/28
## Purpose: redist functions for a tidy workflow
##############################################


# constructors and reconstructors -----------------------------------------


# plans has n_precinct columns and n_sims rows
# map is a redist_map
# algorithm is one of "smc" or "mcmc"
# wgt is the weights before any resampling or truncation
# ... will depend on the algorithm
new_redist_plans <- function(plans, map, algorithm, wgt, resampled = TRUE, ndists = attr(map, "ndists"), ...) {
    n_sims <- ncol(plans)
    if (n_sims < 1) cli_abort("Need at least one simulation draw.")

    n_prec <- nrow(plans)
    map_dists <- attr(map, "ndists")
    partial <- ndists < map_dists

    prec_pop <- map[[attr(map, "pop_col")]]
    if (!partial) {
        distr_range <- 1:ndists
        distr_pop <- pop_tally(plans, prec_pop, ndists)
    } else {
        distr_range <- 1:ndists - 1L
        distr_pop <- pop_tally(plans, prec_pop, ndists)
        pl_tmp <- plans + 1L
        distr_pop <- pop_tally(pl_tmp, prec_pop, ndists)
    }

    attr_names <- c("redist_attr", "plans", "ndists", "algorithm", "wgt",
        "resampled", "ndists", "merge_idx", "prec_pop",
        names(list(...)))

    structure(tibble(draw = rep(as.factor(1:n_sims), each = ndists),
        district = rep(distr_range, n_sims),
        total_pop = as.numeric(distr_pop)),
    plans = plans, ndists = ndists, algorithm = algorithm, wgt = wgt,
    resampled = resampled, merge_idx = attr(map, "merge_idx"),
    prec_pop = prec_pop, redist_attr = attr_names, ...,
    class = c("redist_plans", "tbl_df", "tbl", "data.frame"))
}

validate_redist_plans <- function(x) {
    if (names(x)[1] != "draw") cli_abort("First column must be named \"{.field draw}\"")
    if (!is.factor(x$draw)) cli_abort("{.field draw} column must be a factor")

    plan_m <- attr(x, "plans")
    if (is.null(plan_m)) cli_abort("Missing plans matrix")

    min_distr <- colmin(plan_m)
    max_distr <- colmax(plan_m)
    if (any(min_distr != 1) || any(diff(max_distr) != 0))
        cli_abort("District numbers must start at 1 and run sequentially to the number of districts.")

    x
}

reconstruct.redist_plans <- function(data, old) {
    if (!("draw" %in% names(data)))
        return(data)

    if (!missing(old)) {
        for (name in attr(old, "redist_attr")) {
            if (is.null(attr(data, name)))
                attr(data, name) <- attr(old, name)
        }
    }

    classes <- c("tbl_df", "tbl", "data.frame")
    if (inherits(data, "grouped_df"))
        classes <- c("grouped_df", classes)

    if (inherits(data, "rowwise_df"))
        classes <- c("rowwise_df", classes)

    class(data) <- c("redist_plans", classes)

    data
}

#' A set of redistricting plans
#'
#' A \code{redist_plans} object is essentially a data frame of summary
#' information on each district and each plan, along with the matrix of district
#' assignments and information about the simulation process used to generate the
#' plans.
#'
#' The first two columns of the data frame will be \code{draw}, a factor indexing
#' the simulation draw, and \code{district}, an integer indexing the districts
#' within a plan. The data frame will therefore have \code{n_sims*ndists} rows.
#' As a data frame, the usual \code{\link{dplyr}} methods will work.
#'
#' Other useful methods for \code{redist_plans} objects:
#' * \code{\link{summary.redist_plans}}
#' * \code{\link{add_reference}}
#' * \code{\link{subset_sampled}}
#' * \code{\link{subset_ref}}
#' * \code{\link{pullback}}
#' * \code{\link{number_by}}
#' * \code{\link{match_numbers}}
#' * \code{\link{is_county_split}}
#' * \code{\link{prec_assignment}}
#' * \code{\link{plan_distances}}
#' * \code{\link{get_plans_matrix}}
#' * \code{\link{get_plans_weights}}
#' * \code{\link{get_sampling_info}}
#' * \code{\link{as.matrix.redist_plans}}
#' * \code{\link{plot.redist_plans}}
#'
#' @param plans a matrix with \code{n_precinct} columns and \code{n_sims} rows,
#' or a single vector of precinct assignments.
#' @param map a \code{\link{redist_map}} object
#' @param algorithm the algorithm used to generate the plans (usually "smc" or "mcmc")
#' @param wgt the weights to use, if any.
#' @param ... Other named attributes to set
#'
#' @returns a new \code{redist_plans} object.
#'
#' @examples
#' data(iowa)
#'
#' iowa <- redist_map(iowa, existing_plan = cd_2010, pop_tol = 0.05, total_pop = pop)
#' rsg_plan <- redist.rsg(iowa$adj, iowa$pop, ndists = 4, pop_tol = 0.05)$plan
#' redist_plans(rsg_plan, iowa, "rsg")
#'
#' @md
#' @concept analyze
#' @export
redist_plans <- function(plans, map, algorithm, wgt = NULL, ...) {
    if (is.numeric(plans) && length(plans) == nrow(map)) {
        plans <- matrix(as.integer(plans), ncol = 1)
    }
    if (!is.matrix(plans)) cli_abort("{.arg plans} must be a matrix.")
    if (nrow(plans) != nrow(map)) cli_abort("{.arg plans} matrix must have as many rows as {.arg map} has precincts.")
    if (!inherits(map, "redist_map")) cli_abort("{.arg map} must be a {.cls redist_map}")

    if (min(plans) == 0L) plans <- plans + 1L
    storage.mode(plans) <- "integer"

    obj <- new_redist_plans(plans, map, algorithm, wgt = wgt,
        resampled = FALSE, ...)
    validate_redist_plans(obj)
}


# getters / setters ------------------------------------------------------------


#' Extract the matrix of district assignments from a redistricting simulation
#'
#' @param x the \code{redist_plans} object
#' @param ... ignored
#' @return matrix
#' @concept analyze
#' @export
get_plans_matrix <- function(x) {
    if (!inherits(x, "redist_plans")) cli_abort("Not a {.cls redist_plans}")
    attr(x, "plans")
}
#' @rdname get_plans_matrix
#' @method as.matrix redist_plans
#' @return matrix
#' @export
as.matrix.redist_plans <- function(x, ...) get_plans_matrix(x)

# internal -- no check performed!
set_plan_matrix <- function(x, mat) {
    attr(x, "plans") <- mat
    x
}

#' Extract the sampling weights from a redistricting simulation.
#'
#' May be \code{NULL} if no weights exist (MCMC or optimization methods).
#'
#' @param plans,object the \code{redist_plans} object
#'
#' @returns A numeric vector of weights, with an additional attribute
#' \code{resampled} indicating whether the plans have been resampled according
#' to these weights. If weights have been resampled, this returns the weights
#' before resampling (i.e., they do not correspond to the resampled plans).
#'
#' @concept analyze
#' @export
get_plans_weights <- function(plans) {
    if (!inherits(plans, "redist_plans")) cli_abort("Not a {.cls redist_plans}")
    wgt <- attr(plans, "wgt")
    if (!is.null(wgt))
        attr(wgt, "resampled") <- attr(plans, "resampled")
    wgt
}

#' @rdname get_plans_weights
#' @param ... Ignored.
#' @importFrom stats weights
#' @method weights redist_plans
#' @return numeric vector
#' @export
weights.redist_plans <- function(object, ...) {
    get_plans_weights(object)
}

get_n_ref <- function(x) {
    if (!inherits(x, "redist_plans")) cli_abort("Not a {.cls redist_plans}")
    plans_m <- get_plans_matrix(x)
    if (is.null(colnames(plans_m))) return(0)
    sum(nchar(colnames(plans_m)) > 0)
}

#' Extract the sampling information from a redistricting simulation
#'
#' @param plans the \code{redist_plans} object
#'
#' @returns a list of parameters and information about the sampling problem.
#'
#' @concept analysis
#' @export
get_sampling_info <- function(plans) {
    if (!inherits(plans, "redist_plans")) cli_abort("Not a {.cls redist_plans}")
    all_attr <- attributes(plans)

    all_attr$names <- NULL
    all_attr$row.names <- NULL
    all_attr$class <- NULL
    all_attr$plans <- NULL
    all_attr$redist_attr <- NULL

    all_attr
}

#' Subset to sampled or reference draws
#'
#' @param plans the \code{redist_plans} object
#' @param matrix if \code{TRUE}, the default, also subset the plans matrix. If
#' the plans matrix is not needed, turning this off may save some time.
#'
#' @returns a \code{redist_plans} object, with only rows corresponding to
#' simulated (or reference) draws remaining.
#'
#' @concept analyze
#' @export
subset_sampled <- function(plans, matrix = TRUE) {
    plans_m <- get_plans_matrix(plans)
    if (is.null(colnames(plans_m))) return(plans)

    nm_lengths <- nchar(colnames(plans_m))
    draw_ints <- as.integer(plans$draw)
    idxs <- which(nm_lengths[draw_ints] == 0)

    out <- vctrs::vec_slice(plans, idxs)
    out$draw <- droplevels(out$draw)

    idxs <- which(nm_lengths[unique(draw_ints)] == 0)
    attr(out, "wgt") <- attr(out, "wgt")[idxs]
    if (isTRUE(matrix)) {
        out <- set_plan_matrix(out, plans_m[, idxs, drop = FALSE])
    }

    out
}

#' @rdname subset_sampled
#' @export
subset_ref <- function(plans, matrix = TRUE) {
    plans_m <- get_plans_matrix(plans)
    if (is.null(colnames(plans_m))) return(plans)

    nm_lengths <- nchar(colnames(plans_m))
    draw_ints <- as.integer(plans$draw)
    idxs <- which(nm_lengths[draw_ints] > 0)

    out <- vctrs::vec_slice(plans, idxs)
    out$draw <- droplevels(out$draw)

    idxs <- which(nm_lengths[unique(draw_ints)] > 0)
    attr(out, "wgt") <- attr(out, "wgt")[idxs]
    out <- set_plan_matrix(out, plans_m[, idxs, drop = FALSE])

    out
}

#' Extract the Metropolis Hastings Acceptance Rate
#'
#' @param plans the \code{redist_plans} object
#'
#' @returns a numeric acceptance rate
#'
#' @concept analysis
#' @export
get_mh_acceptance_rate <- function(plans) {
    if (!inherits(plans, "redist_plans")) cli_abort("Not a {.cls redist_plans}")
    alg <- attr(plans, "algorithm")

    if (alg %in% c("flip", "mergesplit")) {
        attr(plans, "mh_acceptance")
    } else {
        NA_real_
    }
}

# generics ----------------------------------------------------------------


#' @method dplyr_row_slice redist_plans
#' @export
dplyr_row_slice.redist_plans <- function(data, i, ...) {
    if (is.logical(i)) i <- which(i)

    draws <- rle(as.integer(data$draw))
    draws$values <- seq_along(draws$values)
    draws_left <- unique(inverse.rle(draws)[i])
    y <- vctrs::vec_slice(data, i)
    plans_m <- get_plans_matrix(data)

    # if we don't have every district present in every row
    # this check is necessary but not sufficient for what we want
    if (length(i) > 0 && "district" %in% colnames(y)) {
        distrs <- table(as.integer(y$district))
        ndists <- max(plans_m[, 1])
        if (any(distrs != distrs[1]) || length(distrs) != ndists)
            cli_warn(c("Some districts may have been dropped. This will prevent summary statistics from working correctly.",
                ">" = "To avoid this message, coerce using {.fun as_tibble}."))
    }

    if (length(draws_left) != ncol(plans_m)) {
        attr(y, "wgt") <- attr(y, "wgt")[draws_left]
        y <- set_plan_matrix(y, plans_m[, draws_left, drop = FALSE])
    }

    if (is.factor(y$draw)) {
        y$draw <- droplevels(y$draw)
    }

    y
}

# 'template' is the old df
#' @method dplyr_reconstruct redist_plans
#' @export
dplyr_reconstruct.redist_plans <- function(data, template) {
    reconstruct.redist_plans(data, template)
}

#' Combine multiple sets of redistricting plans
#'
#' Only works when all the sets are compatible---generated from the same map,
#' with the same number of districts.  Sets of plans will be indexed by the
#' `chain` column.
#'
#' @param ... The [`redist_plans`] objects to combine.  If named arguments are
#' provided, the names will be used in the `chain` column; otherwise, numbers
#' will be used for the `chain` column.
#' @param deparse.level Ignored.
#'
#' @return A new [`redist_plans`] object.
#'
#' @md
#' @concept analyze
#' @export
rbind.redist_plans <- function(..., deparse.level = 1) {
    objs <- rlang::list2(...)
    n_obj <- length(objs)
    if (n_obj == 1) return(objs[[1]])

    # check types
    n_prec <- nrow(get_plans_matrix(objs[[1]]))
    prec_pop <- attr(objs[[1]], "prec_pop")
    ndists <- attr(objs[[1]], "ndists")
    constr <- attr(objs[[1]], "constraints")
    resamp <- attr(objs[[1]], "resampled")
    comp <- attr(objs[[1]], "compactness")
    distr_ord <- is.ordered(objs[[1]]$district)
    for (i in 2:n_obj) {
        if (nrow(get_plans_matrix(objs[[i]])) != n_prec)
            cli_abort("Number of precincts must match for all sets of plans.")
        if (!identical(attr(objs[[i]], "prec_pop"), prec_pop))
            cli_abort("Precinct populations must match for all sets of plans.")
        if (!identical(attr(objs[[i]], "ndists"), ndists))
            cli_abort("Number of districts must match for all sets of plans.")
        if (attr(objs[[i]], "resampled") != resamp)
            cli_abort("Some sets of plans are resampled while others are not.")
        if (!is.null(comp)) {
            if (attr(objs[[i]], "compactness") != comp) {
                cli_warn("Compactness values differ across sets of plans.")
                comp <- NA
            }
        } else {
            if (!is.null(attr(objs[[i]], "compactness"))) {
                cli_warn("Some compactness values were non-NULL. Set to {.val NA}.")
                comp <- NA
            }
        }
        if (!identical(attr(objs[[i]], "constraints"), constr)) {
            cli_inform("Constraints may not match for all sets of plans.")
            constr <- NA
        }
        if (is.ordered(objs[[i]]$district) != distr_ord) {
            cli_abort(c("Some sets of plans have had district numbers matched to a reference plan,
                         while others have not. This may cause problems in analysis.",
                        "i"="Do one of the following:",
                        ">"="Match the district labels on the unmatched plans with
                            {.fn match_numbers} [recommended]",
                        ">"="Convert the matched plans district labels to integers with
                            {.code as.integer(district)}")
            )
        }
    }

    ret <- bind_rows(lapply(objs, dplyr::as_tibble), .id = "chain")
    ret$chain <- as.integer(ret$chain)
    ret <- reconstruct.redist_plans(ret, objs[[1]])
    attr(ret, "compactness") <- comp
    attr(ret, "constraints") <- constr
    attr(ret, "ndists") <- ndists
    attr(ret, "prec_pop") <- prec_pop
    attr(ret, "diagnostics") <- do.call(c, lapply(objs, function(x) attr(x, "diagnostics")))
    attr(ret, "plans") <- do.call(cbind, lapply(objs, function(x) get_plans_matrix(x)))
    attr(ret, "wgt") <- do.call(c, lapply(objs, function(x) get_plans_weights(x)))
    attr(ret, "n_eff") <- sum(do.call(c, lapply(objs, function(x) attr(x, "n_eff"))))

    ret
}


#' Print method for \code{redist_plans}
#'
#' @param x a [redist_plans] object
#' @param \dots additional arguments (ignored)
#'
#' @method print redist_plans
#' @return The original object, invisibly.
#' @export
print.redist_plans <- function(x, ...) {
    plans_m <- get_plans_matrix(x)
    n_ref <- get_n_ref(x)
    n_samp <- ncol(plans_m) - n_ref
    nd <- attr(x, "ndists")
    if (is.null(nd)) nd <- max(plans_m[, 1])

    if (n_ref > 0)
        cli_text("A {.cls redist_plans} containing {n_samp} sampled plan{?s} and
                 {n_ref} reference plan{?s}")
    else
        cli_text("A {.cls redist_plans} containing {n_samp} sampled plan{?s}")

    if (ncol(plans_m) == 0) return(invisible(x))

    alg_name <- c(mcmc = "Flip Markov chain Monte Carlo",
        smc = "Sequential Monte Carlo",
        mergesplit = "Merge-split Markov chain Monte Carlo",
        rsg = "random seed-and-grow",
        crsg = "compact random seed-and-grow",
        enumpart = "Enumpart",
        shortburst = "short bursts",
        none = "a custom collection")[attr(x, "algorithm")]
    if (is.na(alg_name)) alg_name <- "an unknown algorithm"

    cli_text("Plans have {nd} districts from a {nrow(plans_m)}-unit map,
             and were drawn using {alg_name}.")

    merge_idx <- attr(x, "merge_idx")
    if (!is.null(merge_idx))
        cat("Merged from another map with reindexing:",
            utils::capture.output(str(merge_idx, vec.len = 2)), "\n", sep = "")

    if (!is.null(attr(x, "wgt"))) {
        if (attr(x, "resampled"))
            cat("With plans resampled from weights\n")
        else
            cat("With plans not resampled from weights\n")
    }

    cat("Plans matrix:", utils::capture.output(str(plans_m, give.attr = FALSE)),
        "\n", sep = "")

    utils::getS3method("print", "tbl")(x)

    invisible(x)
}

Try the redist package in your browser

Any scripts or data that you put into this service are public.

redist documentation built on April 3, 2023, 5:46 p.m.