R/circ.psa.R
In PSAgraphics: Propensity Score Analysis Graphics

Documented in circ.psa

#' Generates a Propensity Score Assessment Plot
#'
#' Displays a graphic that summarizes outcomes in a propensity score analysis,
#' based on strata that have been defined in the first Phase of a propensity
#' score analysis (PSA).  The graphic displays contributions of individual
#' strata to the overall effect, weighing contributions of individual strata
#' according to the relative sizes of the respective strata. The overall effect
#' is plotted as a heavy dashed diagonal line that runs parallel to the
#' identity diagonal.
#'
#' A circle is plotted for each stratum, centered on the means for the
#' treatment and control groups (for the \code{X} and \code{Y} axes)
#' respectively. The sizes of the circles correspond to the relative sizes of
#' the strata. A diagonal line (lower left to upper right) shows the identity,
#' \code{X=Y}, so that circles on, say, the lower side of this line show that
#' the corresponding \code{X} mean is larger than the \code{Y} mean for that
#' stratum, and vice-versa. Parallel projections are made from the centers of
#' the strata-cum-circles to difference scores that are plotted on a line
#' segment on the lower-left corner of the graphic; the average difference,
#' which corresponds to the average treatment effect (ATE) for the overall
#' treatment effect, is plotted as a heavy (dark blue) dashed line parallel to
#' the identity diagonal. Rug plots are shown on the upper and right margins of
#' the graphic, for the \code{X} and \code{Y} marginal distributions. A 95\%
#' confidence interval for the overall effect is plotted to the left of the
#' distribution of the stratum difference scores, centered on the ATE. Trimmed
#' means can replace the conventional mean for both the ATE and the marginal
#' distributions (however, the confidence interval calculations are likely to
#' become less trustworthy as larger values of the trim argument are used).
#'
#' @param response Either a numeric vector containing the response of interest
#' in a propensity score analysis, or a three column array containing response,
#' treatment and strata.
#' @param treatment Binary variable of same length as \code{response};
#' generally 0 for 'control,' 1 for 'treatment'.  A character vector with two
#' labels or factor with two levels are also accepted.
#' @param strata Generally integer variable; a vector of same length as
#' \code{response} indicating the derived strata from estimated propensity
#' scores. Generally 5 or 6 strata used, but function is effective for more
#' strata. In the case when strata are defined via unique propensity scores (as
#' from a tree), user may wish to define strata using \code{factor}.
#' @param summary Logical (default \code{FALSE}).  If \code{TRUE} then
#' \code{response} must have rows corresponding to number of strata; the first
#' two columns should contain treatment and control group sizes for each
#' stratum, and the pair of columns should contain the appropriate summary
#' statistics for each statum.  For example, the four summary columns might
#' have been generated by the \code{strata.summary} output of \code{loess.psa}.
#' @param statistic A scalar summary of the center of the response
#' distribution. Seen next item below.  Default = "mean".  Note that to
#' generate this statistic the full vector of responses must have been input,
#' not summaries.
#' @param trim Allows for a trimmed mean as outcome measure, where trim is from
#' 0 to .5 (.5 implying median).
#' @param revc Logical; if \code{TRUE} then X and Y axes are interchanged in
#' plot.
#' @param confint Logical; if \code{TRUE} adds an approximate 95\% confidence
#' interval for the mean.  The interval may not be realistic it the \code{trim}
#' argument exceeds zero.
#' @param sw Numerical argument (default = 0.4); extends axes on lower ends,
#' effectively moving circles to lower left.
#' @param ne Numerical argument (default = 0.5); extends axes on upper ends,
#' effectively moving circles to upper right.
#' @param inc Numerical argument (default = 0.35); controls circle sizes, but
#' relative circle sizes are controlled via \code{pw}. In general one wants
#' circle areas to appear subjectively to be sized in accordance with strata
#' sizes.
#' @param pw numerical argument (default = 0.4); controls relative circle
#' sizes. \code{pw} denotes power or exponent for radius of circle.
#' @param lab Logical (default TRUE); labels circles with stratum labels.
#' @param labcex numerical argument (default = 1); controls the size of the
#' circle labels.
#' @param xlab Label for horizontal axis, by default taken from
#' \code{treatment}.
#' @param ylab Label for vertical axis, by default taken from \code{treatment}.
#' @param main Main label for graph.
#' @param mai margin parameters.
#' @return Generate a Propensity Assessment Plot, as well as numerical data for
#' \item{summary.strata}{An array with rows corresponding to strata and four
#' columns; these show counts for control and treatment groups, as well as
#' (possibly trimmed) mean response values for control and treatment.}
#' \item{wtd.Mn.(Name1)}{Weighted mean of response for (Name1) group.  Name
#' taken from \code{treatment}.} \item{wtd.Mn.(Name2)}{Weighted mean of
#' response for (Name2) group.  Name taken from \code{treatment}.}
#' \item{ATE}{Average Treatment Effect.} \item{se.wtd}{Weighted standard error
#' for \code{ATE}} \item{approx.t}{Ratio of the average treatement effect and a
#' standard error based on weighting of stratum variances.} \item{df}{Estimate
#' of degree of freedom; response vector length minus twice number of strata.}
#' \item{CI.95}{Approximate 95\% confidence interval for overall effect size.}
#' @author James E. Helmreich \email{James.Helmreich@@Marist.edu}
#'
#' Robert M. Pruzek \email{RMPruzek@@yahoo.com}
#' @seealso \code{\link{loess.psa}}
#' @keywords hplot
#' @examples
#'
#' ##Random data with effect size 0
#' response <- rnorm(1000)
#' treatment <- sample(c(0,1), 1000, replace = TRUE)
#' strata <- sample(1:6, 1000, replace = TRUE)
#' circ.psa(response, treatment, strata)
#'
#' ##Random data with effect size -.2
#' response <- c(rnorm(500, 0, 12), rnorm(500, 6, 12))
#' treatment <- c(rep(0, 500), rep(1,500))
#' strata <- sample(1:5, 1000, replace = TRUE)
#' aaa <- cbind(response, treatment, strata)
#' circ.psa(aaa)
#'
#' ##Random data with effect size -2
#' response <- c(rt(100,3) * 2 + 20, rt(100,12) * 2 + 18)
#' treatment <- rep(c("A","B"), each = 100)
#' strata <- sample(c("X","Y","Z","U","V"), 200, replace = TRUE)
#' circ.psa(response, treatment, strata)
#'
#' ##Tree derived strata
#' library(rpart)
#' data(lindner)
#' attach(lindner)
#' lindner.rpart <- rpart(abcix ~ stent + height + female + diabetic +
#'      acutemi + ejecfrac + ves1proc, data = lindner, method = "class")
#' lindner.tree<-factor(lindner.rpart$where, labels = 1:6)
#' circ.psa(log(cardbill), abcix, lindner.tree)
#'
#' ##Loess derived strata
#' lindner.ps <- glm(abcix ~ stent + height + female +
#'       diabetic + acutemi + ejecfrac + ves1proc,
#'       data = lindner, family = binomial)
#' ps<-lindner.ps$fitted
#' lindner.loess<-loess.psa(log(cardbill), abcix, ps)
#' circ.psa(lindner.loess$summary.strata[, 1:4], summary = TRUE,
#'       inc = .1, labcex = .7)
#' @export circ.psa
circ.psa <- function(response,
                     treatment = NULL,
                     strata = NULL,
                     summary = FALSE,
                     statistic = "mean",
                     trim = 0,
                     revc = FALSE,
                     confint = TRUE,
                     sw = 0.4,
                     ne = .5,
                     inc = 0.25,
                     pw = 0.4,
                     lab = TRUE,
                     labcex = 1,
                     xlab = NULL,
                     ylab = NULL,
                     main = NULL,
                     mai = c(1, 1.7, 1, 1.7)) {
   # Given 'treatment' (0=Control, 1=Treatment), function calculates 'statistic'
   # for response within strata and treatment levels. Within strata, 'statistic' is
   # plotted as circles centered at (X,Y) = (C-stat,T-stat). The radii of the circles correspond to
   # the sizes of the strata. The X and Y scores are used to generate
   # differences X - Y (default) or as Y - X (if revc = T; see below).
   # The main 45 degree diagonal line (black) corresponds to X = Y, a reference diagonal.
   # Mean of weighted differences corresponds to heavy (blue) dashed line, also with
   # slope = 1. The marginal distribution of the weighted differences is
   # plotted after making (parallel) projections to line segment at the lower
   # part left of the figure.  Marginal distributions of Y & X are given as rug
   # plots, top and right side; the weighted means of these marginal distributions
   # are shown as vertical and horizontal lines. Note that the mean D corresponds
   # to the intersection of means for X and Y.

   # 'statistic' is any univariate function of the response data that returns a single
   #       value, eg "mean", "median" or
   #       "tr.mean" where user has predefined the latter as (say):
   #       tr.mean<-function(x){mean(x,trim=.1)}.  Default is 'mean'
   # 'revc = TRUE' reverses X,Y axes;
   # 'sw' extends axes on lower ends, effectively moving circles to lower left, or southwest.
   # 'ne' extends ases on upper ends, effectively moving circles to upper right, or northeast.
   #       Making both sw and ne smaller moves circles farther apart, both larger moves them closer together.
   # 'inc' controls circle sizes, but RELATIVE circle sizes are controlled via 'pw'. In general one wants
   #       circle areas to appear SUBJECTIVELY to be sized in accordance w/ strata sizes.
   # 'pw'  controls relative circle sizes.
   # 'lab' labels circles with strata number.
   # 'labcex' controls the size of the axes labels.

   #Setting margins, forcing square plot to aid interpretations (saving current settings; restore settings at end).
   op <- par(no.readonly = TRUE)
   on.exit(par(op))
   par(mai = mai, pty = "s")

   #If "response" has three columns, treat as r, t, s.
   if (dim(as.data.frame(response))[2] == 3) {
      treatment   <- response[, 2]
      strata      <- response[, 3]
      response    <- response[, 1]
   }

   tr.mean <- function(x) {
      mean(x, trim = trim)
   }
   if (!trim == 0) {
      statistic <- tr.mean
   }

   if (!summary) {
      n <- length(response)
      nstrat <- dim(table(strata))
      ct.means <- tapply(response, list(strata, treatment), statistic)
      ncontrol <- as.data.frame(table(strata, treatment))[1:nstrat, 3]
      ntreat <-
         as.data.frame(table(strata, treatment))[(nstrat + 1):(2 * nstrat), 3]
      summary.strata <- cbind(ncontrol, ntreat, ct.means)
   } else{
      summary.strata <- response
      ct.means <- response[, 3:4]
      n <- sum(response[, 1:2])
      nstrat <- length(response[, 1])
   }
   wts <- rowSums(summary.strata[, 1:2])

   #Defining variables (reversed if called);
   #difference values for each x,y combination; number of strata
   x <- ct.means[, 1]
   y <- ct.means[, 2]
   if (revc) {
      x <- ct.means[, 2]
      y <- ct.means[, 1]
   }
   d <- y - x

   #Upper and lower bound of plot;
   #Constants 'sw' and 'ne' may need modification to assure a pleasing appearance
   xr <- range(x)
   yr <- range(y)
   min.xy <- min(xr[1], yr[1])
   max.xy <- max(xr[2], yr[2])
   lwb <- min.xy - 1.2 * ne * (max.xy - min.xy)
   upb <- max.xy + .5 * sw * (max.xy - min.xy)

   #Weighted mean difference; standard deviation of strata estimates
   diff.wtd <- sum(d * wts) / n

   #Calculating the weighted st.dev using Conniffe's definition
   #Note: 7/30/8: really calculating the standard error here, though I've called it st.dev.
   if (!summary) {
      o <- order(treatment)
      ord.strata <- strata[o]
      nc <- table(treatment)[1]
      nt <- table(treatment)[2]
      ord.response <- response[o]

      var.0 <- tapply(ord.response[1:nc], ord.strata[1:nc], var)
      ni.0 <- table(ord.strata[1:nc])
      frac.0 <- var.0 / ncontrol

      ncp1 <- nc + 1
      ncpnt <- nc + nt
      var.1 <- tapply(ord.response[ncp1:ncpnt], ord.strata[ncp1:ncpnt], var)
      ni.1 <- table(ord.strata[ncp1:ncpnt])
      frac.1 <- var.1 / ntreat

      se.wtd <- ((sum(frac.0) + sum(frac.1)) ^ .5) / nstrat

      strat.labels <- sort(unique(strata))
   }

   #If data are summarized, can't calculate se.wtd.
   if (summary) {
      se.wtd <- NULL
      strat.labels <- rownames(response)
   }


   #Finding radii and plotting circles
   wtt <- wts / max(wts)
   dfrng <- diff(c(lwb, upb))
   radii <- (abs(dfrng / 20)) * wtt
   symbols(
      x,
      y,
      circles = radii ^ pw,
      inches = inc,
      xlim = c(lwb, upb),
      ylim =
         c(lwb, upb),
      cex = 0.86,
      xlab = "",
      ylab = "",
      main = main,
      lwd = 1.5
   )
   if (lab) {
      for (i in 1:nstrat) {
         legend(
            x[i],
            y[i],
            strat.labels[i],
            bty = "n",
            xjust = .71,
            yjust = .48,
            cex = labcex
         )
      }
   }

   #Y=X line and parallel line representing weighted mean difference
   abline(0, 1, lwd = 2)
   abline(diff.wtd,
          1,
          lwd = 3,
          col = 4,
          lty = 3)

   #Vertical and horizontal lines through X Y means
   wtss <- wts / n
   mnx <- sum(x * wtss)
   mny <- sum(y * wtss)
   mnxy <- (mnx + mny) / 2
   abline(h = mny,
          v = mnx,
          lty = 3,
          col = 2)

   #Dashed lines from circles to perpendicular cross line in lower left corner
   kp <- (3 * lwb + upb) / 4
   ex <- .008 * (upb - lwb)
   for (i in 1:nstrat) {
      segments(kp - d[i] / 2 + ex,
               kp + d[i] / 2 + ex,
               x[i],
               y[i],
               lty = 2,
               lwd = .8)
      points(
         kp - d[i] / 2 + ex,
         kp + d[i] / 2 + ex,
         pch = 3,
         lwd = 1.4,
         col = 4,
         cex = 1.7
      )
   }

   #Perpendicular cross segment: location fixed to lower quarter of plot
   ext <- .025 * (upb - lwb)
   segments((lwb + upb) / 2 + ext, lwb - ext, lwb - ext, (lwb + upb) / 2 + ext, lwd = 2)

   #Rug plots of centers of circles, ie strata means
   rug(x, side = 3)
   rug(y, side = 4)

   #Labels for axes
   dimnamez <- NULL
   dimnamezz <- unlist(dimnames(ct.means)[2])
   dimnamez <- unlist(dimnames(ct.means)[2])
   if (revc)
      dimnamez <- dimnamez[c(2, 1)]
   if (is.null(xlab)) {
      title(xlab = unlist(dimnamez)[1])
   } else{
      title(xlab = xlab)
   }
   if (is.null(ylab)) {
      title(ylab = unlist(dimnamez)[2])
   } else{
      title(ylab = ylab)
   }

   Cname <- unlist(dimnamez)[1]
   Tname <- unlist(dimnamez)[2]

   C.wtd <- mnx
   T.wtd <- mny

   #if(!revc){diff.wtd<-(-1*diff.wtd)}

   approx.t <- diff.wtd / se.wtd
   df <- n - 2 * nstrat

   if (!summary) {
      colnames(summary.strata) <-
         c(
            paste("n.", dimnamezz[1], sep = ""),
            paste("n.", dimnamezz[2], sep = ""),
            paste("means.", dimnamezz[1], sep = ""),
            paste("means.", dimnamezz[2], sep = "")
         )
   }

   #Note: changed out name of diff.wtd to ATE, did not change internal name diff.wtd.

   out <-
      list(
         summary.strata,
         C.wtd,
         T.wtd,
         diff.wtd,
         se.wtd,
         approx.t = approx.t,
         df = df
      )
   names(out) <-
      c(
         "summary.strata",
         paste("wtd.Mn.", Cname, sep = ""),
         paste("wtd.Mn.", Tname, sep = ""),
         "ATE",
         "se.wtd",
         "approx.t",
         "df"
      )

   #Putting the CI below the cross plot
   if (confint) {
      ci.diff <- -diff.wtd
      #if(revc==FALSE)ci.diff<- -ci.diff
      ci <- c(ci.diff - qt(.975, df) * se.wtd,
              ci.diff + qt(.975, df) * se.wtd)
      ci.out <- -ci[c(2, 1)]
      xl <- .75 * lwb + .25 * upb - ext + ci[1] / 2
      yl <- .75 * lwb + .25 * upb - ext - ci[1] / 2
      xu <- .75 * lwb + .25 * upb - ext + ci[2] / 2
      yu <- .75 * lwb + .25 * upb - ext - ci[2] / 2
      segments(xl, yl, xu, yu, lwd = 5, col = "green3")
      segments(
         xl - .05 * ext + .7 * ext / 2,
         yl + .05 * ext + .7 * ext / 2,
         xl - .05 * ext - .7 * ext / 2,
         yl + .05 * ext - .7 * ext / 2,
         lwd = 2,
         col = "green3"
      )
      segments(
         xu + .05 * ext + .7 * ext / 2,
         yu - .05 * ext + .7 * ext / 2,
         xu + .05 * ext - .7 * ext / 2,
         yu - .05 * ext - .7 * ext / 2,
         lwd = 2,
         col = "green3"
      )
      out <-
         list(summary.strata,
              C.wtd,
              T.wtd,
              diff.wtd,
              se.wtd,
              approx.t,
              df,
              ci.out)
      names(out) <-
         c(
            "summary.strata",
            paste("wtd.Mn.", Cname, sep = "", collapse = ""),
            paste("wtd.Mn.", Tname, sep = ""),
            "ATE",
            "se.wtd",
            "approx.t",
            "df",
            "CI.95"
         )

   }


   return(out)
}