###
### Regression-based method for the ITT effects and the complier average direct effect/spillover effect
###
###
#' Regression-based method for the ITT effects and the complier average direct effect/spillover effect
#'
#'
#' This function computes the point estimates and variance estimates of the direct effect and spillover effect for ITT and CADE/CASE
#'
#'
#' For the details of the method implemented by this function, see the
#' references.
#'
#' @param data A data frame containing the relevant variables. The names for the variables should be: ``Z'' for the treatment assignment, ``D'' for the actual received treatment, ``Y'' for the outcome, ``A'' for the treatment assignment mechanism and ``id'' for the cluster ID. The variable for the cluster id should be a factor.
#' @param assign.prob A double between 0 and 1 specifying the assignment probability to either assignment mechanism.
#' @param ci.level A double between 0 and 1 specifying the confidence interval level to be output.
#' @return A list of class \code{CADEparamreg} which contains the following items:
#' \item{ITT.DE}{ Estimate of direct effect under ITT regresion. }\item{ITT.SE}{ Estimate of spillover effect under ITT regresion. }
#' \item{ITT.DE.CI}{ Confidence itnerval of direct effect under ITT regresion. }
#' \item{ITT.SE.CI}{ Confidence itnerval of spillover effect under ITT regresion. }
#' \item{IV.DE}{ Estimate of direct effect under IV regresion. }\item{IV.SE}{ Estimate of spillover effect under IV regresion. }
#' \item{IV.DE.CI}{ Confidence interval of direct effect under IV regresion. }
#' \item{IV.SE.CI}{ Confidence interval of spillover effect under IV regresion. }
#' \item{IV.DE.CI}{ Confidence interval of direct effect under IV regresion. }
#' \item{ITT.tstat}{ t-stats from ITT regression. }\item{IV.tstat}{ t-stats from IV regression. }
#' \item{ITT.pvals}{ p-values from ITT regression. }\item{IV.pvals}{ p-values from IV regression. }
#'
#' data(india)
#' india$id <- factor(india$id)
#' CADEreg(india, ci.level = 0.90)
#'
#'
#' @author Kosuke Imai, Department of Statistics, Harvard University
#' \email{imai@harvard.edu}, \url{https://imai.fas.harvard.edu/};
#' Zhichao Jiang, School of Public Health and Health Sciences, University of Massachusetts Amherst
#' \email{zhichaojiang@umass.edu};
#' Karissa Huang, Department of Statistics, Harvard College
#' \email{krhuang@college.harvard.edu}
#' @references Kosuke Imai, Zhichao Jiang and Anup Malani (2018).
#' \dQuote{Causal Inference with Interference and Noncompliance in the Two-Stage Randomized Experiments}, \emph{Technical Report}. Department of Politics, Princeton
#' University.
#' @keywords two-stage randomized experiments
#' @importFrom stats qnorm
#' @importFrom stats lm
#' @importFrom stats pt
#'
#' @export CADEparamreg
CADEparamreg<-function(data, assign.prob, ci.level=0.95){
data <- data
## validate ci.level
if(ci.level>1){stop('Please specify the confidence interval as a decimal between 0 and 1.')}
## transform the data into list
if(!is.factor(data$id)){stop('The cluster_id should be a factor variable.')}
cluster.id<-unique(data$id)
n.cluster<-length(cluster.id)
for (i in 1:n.cluster){
if (length(unique(data$A[data$id==cluster.id[i]]))!=1){
stop( paste0('The assignment mechanism in cluster ',i,' should be the same.'))
}
}
A <- tapply(data$A, data$id, mean)
data<-data[!is.na(data$Y),]
n <- tapply(data$Z, data$id, length)
Z_sum <- tapply(data$Z, data$id, sum)
id.remove<- cluster.id[n-Z_sum<=1 | Z_sum<=1]
data<-data[!(data$id %in% id.remove),]
### model-based analysis
## ITT effects
data.ITT<-data
data.ITT$A[data.ITT$A==1]<-assign.prob
data.ITT$A[data.ITT$A==0]<-1-assign.prob
lmITT<-lm(Y~Z+A+Z:A, data=data.ITT)
var.lmITT<-sandwich::vcovHC(lmITT, type = "HC2", cluster = "id", adjust = T)
matrix1<-array(0,dim=c(4,4))
matrix1[1,]<-c(0,1,0,1)
matrix1[2,2]<-1
matrix1[3,c(3,4)]<-1
matrix1[4,3]<-1
effect<-as.numeric(matrix1%*%lmITT$coefficients)
var.effect<-matrix1%*%var.lmITT%*%t(matrix1)
sd.effect<-sqrt(diag(var.effect))
## iv estimate
data.iv<-data
data.iv$propD<-data.iv$D
### create a new variable for the proportion of D
cluster.id<-unique(data.iv$id)
n.cluster<-length(cluster.id)
for (i in 1:n.cluster){
data.iv$propD[data.iv$id==cluster.id[i]]<- mean(data.iv$D[data.iv$id==cluster.id[i]])
}
lmiv<- AER::ivreg(Y~D+propD+D:propD | Z+A+Z:A,data=data.iv)
var.lmiv<-sandwich::vcovHC(lmiv, type = "HC2", cluster = "id", adjust = T)
effect.iv<-as.numeric(matrix1%*%lmiv$coefficients)
var.effect.iv<-matrix1%*%var.lmiv%*%t(matrix1)
sd.effect.iv<-sqrt(diag(var.effect.iv))
## results
ci.tail<-(1-ci.level)/2
qnorm<-qnorm(ci.level+ci.tail)
ITT.effect<-round(effect)
ITT.LCI<-round(effect-qnorm*sd.effect)
ITT.RCI<-round(effect+qnorm*sd.effect)
IV.effect<-round(effect.iv)
IV.LCI<-round(effect.iv-qnorm*sd.effect.iv)
IV.RCI<-ITT.RCI<-round(effect+qnorm*sd.effect)
t.stat.ITT<-summary(lmITT)$coefficients[,3]
p.vals.ITT<-2*pt(t.stat.ITT, sum(n)-2, lower.tail = F)
t.stat.iv<-summary(lmiv)$coefficients[,3]
p.vals.iv<-2*pt(t.stat.iv, sum(n)-2, lower.tail = F)
output <- list(ITT.DE=ITT.effect[c(1,2)], ITT.SE=ITT.effect[c(3,4)], ITT.DE.CI=list(c(ITT.LCI[1], ITT.RCI[1]), c(ITT.LCI[2], ITT.RCI[2])),
ITT.SE.CI=list(c(ITT.LCI[3], ITT.RCI[3]), c(ITT.LCI[4], ITT.RCI[4])),
IV.DE=IV.effect[c(1, 2)], IV.SE=IV.effect[c(3, 4)], IV.DE.CI=list(c(IV.LCI[1], IV.RCI[1]), c(IV.LCI[2], IV.RCI[2])),
IV.SE.CI=list(c(IV.LCI[3], IV.RCI[3]), c(IV.LCI[4], IV.RCI[4])),
ITT.tstat=t.stat.ITT, ITT.pvals=p.vals.ITT, IV.tstat=t.stat.iv, IV.pvals=p.vals.iv)
class(output) <- "parametric"
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.