# R/ATEcluster.R In experiment: R Package for Designing and Analyzing Randomized Experiments

#### Documented in ATEcluster

#' Estimation of the Average Treatment Effects in Cluster-Randomized
#' Experiments
#'
#' This function estimates various average treatment effect in
#' cluster-randomized experiments without using pre-treatment covariates. The
#' treatment variable is assumed to be binary. Currently, only the matched-pair
#' design is allowed. The details of the methods for this design are given in
#' Imai, King, and Nall (2007).
#'
#'
#' @param Y The outcome variable of interest.
#' @param Z The (randomized) cluster-level treatment variable. This variable
#' should be binary. Two units in the same cluster should have the same value.
#' @param grp A variable indicating clusters of units. Two units in the same
#' cluster should have the same value.
#' @param data A data frame containing the relevant variables.
#' @param match A variable indicating matched-pairs of clusters. Two units in
#' the same matched-pair of clusters should have the same value. The default is
#' \code{NULL} (i.e., no matching).
#' @param weights A variable indicating the population cluster sizes, which
#' will be used to construct weights for each pair of clusters. Two units in
#' the same cluster should have the same value. The default is \code{NULL}, in
#' which case sample cluster sizes will be used for constructing weights.
#' @param fpc A logical variable indicating whether or not finite population
#' correction should be used for estimating the lower bound of CACE variance.
#' This is relevant only when \code{weights} are specified.
#' @return A list of class \code{ATEcluster} which contains the following
#' items: \item{call}{ The matched call.  } \item{n}{ The total number of
#' units.  } \item{n1}{ The total number of units in the treatment group.  }
#' \item{n0}{ The total number of units in the control group.  } \item{Y}{ The
#' outcome variable.  } \item{Y1bar}{ The cluster-specific (unweighted) average
#' value of the observed outcome for the treatment group.  } \item{Y0bar}{ The
#' cluster-specific (unweighted) average value of the observed outcome for the
#' treatment group.  } \item{Y1var}{ The cluster-specific sample variance of
#' the observed outcome for the treatment group.  } \item{Y0var}{ The
#' cluster-specific sample variance of the observed outcome for the control
#' group.  } \item{Z}{ The treatment variable.  } \item{grp}{ The
#' cluster-indicator variable.  } \item{match}{ The matched-pair indicator
#' variable.  } \item{weights}{ The weight variable in its original form.  }
#' \item{est}{ The estimated average treatment effect based on the arithmetic
#' mean weights.  } \item{var}{ The estimated variance of the average treatment
#' effect estimator based on the arithmetic mean weights. This uses the
#' variance formula provided in Imai, King, and Nall (2007).  } \item{var.lb}{
#' The estimated sharp lower bound of the cluster average treatment effect
#' estimator using the arithmetic mean weights.  } \item{est.dk}{ The estimated
#' average treatment effect based on the harmonic mean weights.  }
#' \item{var.dk}{ The estimated variance of the average treatment effect
#' estimator based on the harmonic mean weights. This uses the variance formula
#' provided in Donner and Klar (1993).  } \item{dkvar}{ The estimated variance
#' of the average treatment effect estimator based on the harmonic mean
#' weights. This uses the variance formula provided in Imai, King, and Nall
#' (2007).  } \item{eff}{ The estimated relative efficiency of the matched-pair
#' design over the completely randomized design (the ratio of two estimated
#' variances).  } \item{m}{ The number of pairs in the matched-pair design.  }
#' \item{N1}{ The population cluster sizes for the treatment group.  }
#' \item{N0}{ The population cluster sizes for the control group.  } \item{w1}{
#' Cluster-specific weights for the treatment group.  } \item{w0}{
#' Cluster-specific weights for the control group.  } \item{w}{ Pair-specific
#' normalized arithmetic mean weights. These weights sum up to the total number
#' of units in the sample, i.e., \code{n}.  } \item{w.dk}{ Pair-specific
#' normalized harmonic mean weights. These weights sum up to the total number
#' of units in the sample, i.e., \code{n}.  } \item{diff}{ Within-pair
#' differences if the matched-pair design is analyzed. This equals the
#' difference between \code{Y1bar} and \code{Y0bar}.  }
#' @author Kosuke Imai, Department of Politics, Princeton University
#' \email{[email protected]@Princeton.Edu}, \url{http://imai.princeton.edu};
#' @references Donner, A. and N. Klar (1993). \dQuote{Confidence interval
#' construction for effect measures arising from cluster randomized trials.}
#' Journal of Clinical Epidemiology. Vol. 46, No. 2, pp. 123-131.
#'
#' Imai, Kosuke, Gary King, and Clayton Nall (2007). \dQuote{The Essential Role
#' of Pair Matching in Cluster-Randomized Experiments, with Application to the
#' Mexican Universal Health Insurance Evaluation}, Technical Report. Department
#' of Politics, Princeton University.
#' @keywords design
#' @export ATEcluster
ATEcluster <- function(Y, Z, grp, data = parent.frame(),
match = NULL, weights = NULL, fpc = TRUE) {

call <- match.call()
Y <- eval(call$Y, envir = data) Z <- eval(call$Z, envir = data)
grp <- eval(call$grp, envir = data) match <- eval(call$match, envir = data)
weights <- eval(call$weights, envir = data) n <- length(Y) res <- list(call = call, n = n, Y = Y, Z = Z, grp = grp, match = match, weights = weights) if (is.null(match)) stop("This option is not yet available.") else { res$m <- m <- length(unique(match))
res$Y1bar <- Y1bar <- tapply(Y[Z==1], match[Z==1], mean) res$Y0bar <- Y0bar <- tapply(Y[Z==0], match[Z==0], mean)
res$diff <- diff <- Y1bar-Y0bar res$n1 <- n1 <- tapply(rep(1, sum(Z==1)), match[Z==1], sum)
res$n0 <- n0 <- tapply(rep(1, sum(Z==0)), match[Z==0], sum) } if (is.null(weights)) { ## variance for PATE1 (sampling of clusters) N1 <- w1 <- n1 N0 <- w0 <- n0 } else { ## variance for PATE2 (double sampling) w1 <- N1 <- tapply(weights[Z==1], match[Z==1], mean) w0 <- N0 <- tapply(weights[Z==0], match[Z==0], mean) } w <- w1 + w0 w <- n*w/sum(w) ## estimates ATE.est <- weighted.mean(diff, w) ATE.var <- m*sum((w*diff-n*ATE.est/m)^2)/((m-1)*(n^2)) ## donner&klar methods: w.dk <- w1*w0/(w1 + w0) w.dk <- n*w.dk/sum(w.dk) ATEdk.est <- weighted.mean(diff, w.dk) ATEdk.var <- sum(w.dk^2)*sum(w.dk*(diff-ATEdk.est)^2)/(n^3) ATE.dkvar <- sum(w^2)*sum(w*(diff-ATE.est)^2)/(n^3) ## lower bound for CATE variance if (!is.null(weights)) { Y1var <- tapply(Y[Z==1], match[Z==1], var)/n1 Y0var <- tapply(Y[Z==0], match[Z==0], var)/n0 if (fpc) { Y1var <- (1-n1/N1)*Y1var Y0var <- (1-n0/N0)*Y0var if ((sum(n0 > N0)+sum(n1 > N1))>0) stop("population size is smaller than sample size") } res$Y1var <- Y1var
res$Y0var <- Y0var res$var.lb <- sum((w/n)^2*(Y1var+Y0var))
}
## unbiased estimation
##Y1sum <- tapply(Y[Z==1], match[Z==1], sum)
##Y0sum <- tapply(Y[Z==0], match[Z==0], sum)
##ATE.estU <- 2*(sum(Y1sum)-sum(Y0sum))/n
##ATE.varU <- 4*m*var(Y1sum-Y0sum)/(n^2)

## return the resutls
res$est <- ATE.est res$est.dk <- ATEdk.est
res$var <- ATE.var res$dkvar <- ATE.dkvar
res$var.dk <- ATEdk.var res$eff <- 1/(1-2*cov(w*Y1bar, w*Y0bar)/(var(w*Y1bar)+var(w*Y0bar)))
res$w <- w res$w.dk <- w.dk
if (!is.null(match)) {
res$w1 <- w1 res$w0 <- w0
res$N0 <- N0 res$N1 <- N1
}
class(res) <- "ATEcluster"
return(res)
}


## Try the experiment package in your browser

Any scripts or data that you put into this service are public.

experiment documentation built on April 30, 2018, 5:04 p.m.