R/summary.R

Defines functions .print_summary_internal .summary_internal plot.summary.optweight print.summary.optweight.svy summary.optweight.svy print.summary.optweightMV summary.optweightMV print.summary.optweight summary.optweight

Documented in plot.summary.optweight summary.optweight summary.optweightMV summary.optweight.svy

#' Summarize, print, and plot information about estimated weights
#'
#' These functions summarize the weights resulting from a call to
#' [optweight()] or [optweight.svy()]. `summary()` produces summary statistics on the distribution of weights, including their
#' range and variability, and the effective sample size of the weighted sample
#' (computing using the formula in McCaffrey, Rudgeway, & Morral, 2004). `plot()` creates a histogram of the weights.
#'
#' @param object An `optweight`, `optweightMV`, or `optweight.svy` object; the output of a call to [optweight()] or [optweight.svy()].
#' @param top How many of the largest and smallest weights to display. Default
#' is 5.
#' @param ignore.s.weights Whether or not to ignore sampling weights when
#' computing the weight summary. If `FALSE`, the default, the estimated
#' weights will be multiplied by the sampling weights (if any) before values
#' are computed.
#' @param weight.range `logical`; whether display statistics about the range of weights and the highest and lowest weights for each group. Default is `TRUE`.
#' @param x A `summary.optweight`, `summary.optweightMV`, or `summary.optweight.svy` object; the output of a call to `summary.optweight()`, `summary.optweightMV()`, or ()`summary.optweight.svy`.
#' @param ...  Additional arguments. For `plot()`, additional arguments
#' passed to [graphics::hist()] to determine the number of bins,
#' though [ggplot2::geom_histogram()] from \pkg{ggplot2} is actually
#' used to create the plot.
#'
#' @returns
#' For point treatments (i.e., `optweight` objects),
#' `summary()` returns a `summary.optweight` object with the following
#' elements:
#' \item{weight.range}{The range (minimum and maximum) weight for
#' each treatment group.}
#' \item{weight.top}{The units with the greatest weights
#' in each treatment group; how many are included is determined by `top`.}
#' \item{l2}{The square root of the L2 norm of the estimated weights from the base weights, weighted by the sampling weights (if any): \eqn{\sqrt{\frac{1}{n}\sum_i {s_i(w_i - b_i)^2}}}}
#' \item{l1}{The L1 norm of the estimated weights from the base weights, weighted by the sampling weights (if any): \eqn{\frac{1}{n}\sum_i {s_i \vert w_i - b_i \vert}}}
#' \item{linf}{The L\eqn{\infty} norm (maximum absolute deviation) of the estimated weights from the base weights: \eqn{\max_i {\vert w_i - b_i \vert}}}
#' \item{rel.ent}{The relative entropy between the estimated weights and the base weights (entropy norm), weighted by the sampling weights (if any): \eqn{\frac{1}{n}\sum_i {s_i w_i \log\left(\frac{w_i}{b_i}\right)}}. Only computed if all weights are positive.}
#' \item{num.zeros}{The number of units with a weight equal to 0.}
#' \item{effective.sample.size}{The effective sample size for each treatment group before and after weighting.}
#'
#' For multivariate treatments (i.e., `optweightMV` objects), a list of
#' the above elements for each treatment.
#'
#' For `optweight.svy` objects, a list of the above elements but with no
#' treatment group divisions.
#'
#' `plot()` returns a `ggplot` object with a histogram displaying the
#' distribution of the estimated weights. If the estimand is the ATT or ATC,
#' only the weights for the non-focal group(s) will be displayed (since the
#' weights for the focal group are all 1). A dotted line is displayed at the
#' mean of the weights (the mean of the base weights, or 1 if not supplied).
#'
#' @seealso
#' [plot.optweight()] for plotting the values of the dual variables.
#'
#' @references
#' McCaffrey, D. F., Ridgeway, G., & Morral, A. R. (2004).
#' Propensity Score Estimation With Boosted Regression for Evaluating Causal
#' Effects in Observational Studies. *Psychological Methods*, 9(4), 403–425. \doi{10.1037/1082-989X.9.4.403}
#'
#' @examplesIf requireNamespace("cobalt", quietly = TRUE)
#' library("cobalt")
#' data("lalonde", package = "cobalt")
#'
#' #Balancing covariates between treatment groups (binary)
#' (ow1 <- optweight(treat ~ age + educ + married +
#'                     nodegree + re74, data = lalonde,
#'                   tols = .001,
#'                   estimand = "ATT"))
#'
#' (s <- summary(ow1))
#'
#' plot(s, breaks = 12)

#' @exportS3Method summary optweight
summary.optweight <- function(object, top = 5, ignore.s.weights = FALSE, weight.range = TRUE, ...) {

  chk::chk_flag(ignore.s.weights)
  chk::chk_flag(weight.range)

  if (weight.range) {
    chk::chk_count(top)
  }

  sw <- {
    if (ignore.s.weights || is_null(object$s.weights)) rep_with(1, object$weights)
    else object$s.weights
  }

  bw <- {
    if (is_null(object$b.weights)) rep_with(1, object$weights)
    else object$b.weights
  }

  t <- object$treat

  out <- .summary_internal(t, attr(t, "treat.type"),
                           object$weights, sw, bw,
                           top, weight.range)

  w <- object$weights * sw
  if (is_not_null(object$focal)) {
    w <- w[t != object$focal]
    attr(w, "focal") <- object$focal
  }

  attr(out, "weights") <- w

  class(out) <- "summary.optweight"
  out
}

#' @exportS3Method print summary.optweight
print.summary.optweight <- function(x, ...) {
  cat("Summary of weights:\n\n")

  .print_summary_internal(x, ...)

  invisible(x)
}

#' @rdname summary.optweight
#' @exportS3Method summary optweightMV
summary.optweightMV <- function(object, top = 5, ignore.s.weights = FALSE, weight.range = TRUE, ...) {
  chk::chk_flag(ignore.s.weights)
  chk::chk_flag(weight.range)

  if (weight.range) {
    chk::chk_count(top)
  }

  sw <- {
    if (ignore.s.weights || is_null(object$s.weights)) rep_with(1, object$weights)
    else object$s.weights
  }

  bw <- {
    if (is_null(object$b.weights)) rep_with(1, object$weights)
    else object$b.weights
  }

  out.list <- lapply(object$treat.list, function(t) {
    .summary_internal(t, attr(t, "treat.type"),
                      object$weights, sw, bw,
                      top, weight.range)
  })

  attr(out.list, "weights") <- object$weights * sw

  class(out.list) <- c("summary.optweightMV", "summary.optweight")

  out.list
}

#' @exportS3Method print summary.optweightMV
print.summary.optweightMV <- function(x, ...) {
  only.one <- length(x) == 1L || all_apply(x, identical, x[[1L]])

  cat("Summary of weights:\n\n")
  for (ti in seq_along(x)) {
    if (!only.one) {
      cat(sprintf(" - - - - - - - - - - Treatment %s - - - - - - - - - -\n", ti))
    }

    .print_summary_internal(x[[ti]], ...)

    if (only.one) {
      break
    }
  }

  invisible(x)
}

#' @rdname summary.optweight
#' @exportS3Method summary optweight.svy
summary.optweight.svy <- function(object, top = 5, ignore.s.weights = FALSE, weight.range = TRUE, ...) {
  chk::chk_flag(ignore.s.weights)
  chk::chk_flag(weight.range)

  if (weight.range) {
    chk::chk_count(top)
  }

  sw <- {
    if (ignore.s.weights || is_null(object$s.weights)) rep_with(1, object$weights)
    else object$s.weights
  }

  bw <- {
    if (is_null(object$b.weights)) rep_with(1, object$weights)
    else object$b.weights
  }

  out <- .summary_internal(NULL, "svy", object$weights, sw, bw, top, weight.range)

  attr(out, "weights") <- object$weights * sw

  class(out) <- c("summary.optweight.svy", "summary.optweight")

  out
}

#' @exportS3Method print summary.optweight.svy
print.summary.optweight.svy <- function(x, ...) {
  cat("Summary of weights:\n\n")

  .print_summary_internal(x, ...)

  invisible(x)
}

#' @rdname summary.optweight
#' @exportS3Method plot summary.optweight
plot.summary.optweight <- function(x, ...) {
  w <- attr(x, "weights")
  focal <- attr(w, "focal")

  subtitle <- if (is_not_null(focal)) sprintf("For Units Not in Treatment Group %s", add_quotes(focal))

  ggplot(mapping = aes(x = w)) +
    geom_histogram(breaks = graphics::hist(w, plot = FALSE, ...)$breaks,
                   color = "black",
                   fill = "gray", alpha = .8) +
    scale_y_continuous(expand = expansion(c(0, .05))) +
    geom_vline(xintercept = mean(w), linetype = "12") +
    labs(x = "Weight", y = "Count", title = "Distribution of Weights",
         subtitle = subtitle) +
    theme_bw()
}

.summary_internal <- function(t, treat.type, w, sw, bw, top, weight.range) {
  outnames <- c("weight.range", "weight.top", "weight.ratio",
                "l2", "l1", "linf", "rel.ent", "num.zeros",
                "effective.sample.size")

  out <- make_list(outnames)

  treat.type[treat.type == "multinomial"] <- "multi-category"

  ww <- w * sw

  if (treat.type == "binary") {
    tx <- list(treated = which(t == 1),
               control = which(t == 0))
  }
  else if (treat.type == "multi-category") {
    tx <- lapply(levels(t), function(i) which(t == i)) |>
      setNames(levels(t))
  }
  else {
    tx <- list(all = seq_along(w))
  }

  if (weight.range) {
    out$weight.range <- lapply(tx, function(ti) c(min(ww[ti]), max(w[ti]))) |>
      setNames(names(tx))

    top.weights <- lapply(tx, function(ti) sort(ww[ti], decreasing = TRUE)[seq_len(top)]) |>
      setNames(names(tx))

    out$weight.top <- lapply(names(tx), function(i) {
      sort(setNames(top.weights[[i]], which(ww[tx[[i]]] %in% top.weights[[i]])[seq_len(top)]))
    }) |>
      setNames(names(tx))
  }

  out$l2 <- vapply(tx, function(ti) rms_dev(w[ti], bw = bw[ti], sw = sw[ti]), numeric(1L))
  out$l1 <- vapply(tx, function(ti) mean_abs_dev(w[ti], bw = bw[ti], sw = sw[ti]), numeric(1L))
  out$linf <- vapply(tx, function(ti) max_abs_dev(w[ti], bw = bw[ti], sw = sw[ti]), numeric(1L))
  out$rel.ent <- if (all(w > 0)) vapply(tx, function(ti) rel_ent(w[ti], bw = bw[ti], sw = sw[ti]), numeric(1L))
  out$num.zeros <- vapply(tx, function(ti) sum(ww[ti] == 0), numeric(1L))

  if (treat.type == "binary") {
    nn <- make_df(c("Control", "Treated"),
                  c("Unweighted", "Weighted"))

    nn[["Control"]] <- c(ESS(sw[tx$control]), ESS(ww[tx$control]))
    nn[["Treated"]] <- c(ESS(sw[tx$treated]), ESS(ww[tx$treated]))
  }
  else if (treat.type == "multi-category") {
    nn <- make_df(levels(t),
                  c("Unweighted", "Weighted"))

    for (i in levels(t)) {
      nn[[i]] <- c(ESS(sw[tx[[i]]]), ESS(ww[tx[[i]]]))
    }
  }
  else {
    nn <- make_df("Total",
                  c("Unweighted", "Weighted"))
    nn[["Total"]] <- c(ESS(sw), ESS(ww))
  }

  out$effective.sample.size <- nn

  out
}

.print_summary_internal <- function(x, ...) {
  if (is_not_null(x$weight.range)) {
    cat("- Weight ranges:\n")

    x$weight.range |>
      text_box_plot(width = 28L) |>
      round_df_char(digits = 4, pad = " ") |>
      print.data.frame(...)

    cat(sprintf("\n- Units with %s greatest weights by group:\n",
                length(x$weight.top[[1L]])))

    top <- max(lengths(x$weight.top))
    data.frame(unlist(lapply(names(x$weight.top), function(y) c(" ", y))),
               matrix(unlist(lapply(x$weight.top, function(y) c(names(y), character(top - length(y)),
                                                                round(y, 4), character(top - length(y))))),
                      byrow = TRUE, nrow = 2 * length(x$weight.top))) |>
      setNames(character(1L + top)) |>
      print.data.frame(row.names = FALSE)

    cat("\n")
  }

  matrix(c(x$l2, x$l1, x$linf, x$rel.ent, x$num.zeros),
         nrow = length(x$l2),
         byrow = FALSE,
         dimnames = list(names(x$l2),
                         c("L2", "L1", "L\u221E",
                           "Rel Ent"[is_not_null(x$rel.ent)],
                           "# Zeros"))) |>
    as.data.frame() |>
    round_df_char(digits = 3, pad = " ") |>
    print.data.frame(...)

  cat("\n- Effective Sample Sizes:\n")
  x$effective.sample.size |>
    round_df_char(digits = 2, pad = " ") |>
    print.data.frame(...)
  cat("\n")

  invisible(x)
}

Try the optweight package in your browser

Any scripts or data that you put into this service are public.

optweight documentation built on Sept. 9, 2025, 5:42 p.m.