R/plac.dist.R

Defines functions plac.dist

Documented in plac.dist

#' @title Plot the distribution of placebo samples for synthetic control 
#'     analysis with multiple treated units.
#' @description Takes the output object of \code{\link{multiple.synth}} creates a 
#'     distribution of placebo average treatment effects, to test the 
#'     significance of the observed ATE. Does so by sampling k placebos 
#'     (where k = the number of treated units) nboots times, and calculating 
#'     the average treatment effect of the k placebos each time.
#' @param multiple.synth multiple.synth}{An object returned by the function 
#'    \code{\link{multiple.synth}}
#' @param nboots Number of bootstrapped samples of placebos to take.
#' Default is \code{500}. It should be higher for more reliable inference.
#' @return \describe{
#' \item{p}{The plot.}
#' \item{att.t}{The observed average treatment effect.}
#' \item{df}{Dataframe where each row is the ATT for one bootstrapped placebo 
#'    sample, used to build the distribution plot.}
#' \item{p.value}{Proportion of bootstrapped placebo samples ATTs which are
#' more extreme than the observed average treatment effect. Equivalent to a 
#' p-value in a two-tailed test.}
#' }
#' @examples 
#' \dontrun{
#' ## Using the toy data from Synth:
#' library(Synth)
#' data(synth.data)
#' set.seed(42)
#' ## Run the function similar to the dataprep() setup:
#' multi <- multiple.synth(foo = synth.data,
#'                        predictors = c("X1", "X2", "X3"),
#'                        predictors.op = "mean",
#'                        dependent = "Y",
#'                        unit.variable = "unit.num",
#'                        time.variable = "year",
#'                        treatment.time = 1990,
#'                        special.predictors = list(
#'                          list("Y", 1991, "mean"),
#'                          list("Y", 1985, "mean"),
#'                          list("Y", 1980, "mean")
#'                        ),
#'                        treated.units = c(2,7),
#'                        control.units = c(29, 13, 17, 32),
#'                        time.predictors.prior = c(1984:1989),
#'                        time.optimize.ssr = c(1984:1990),
#'                        unit.names.variable = "name",
#'                        time.plot = 1984:1996, gen.placebos = TRUE, Sigf.ipop = 2,
#'                        strategy = 'multicore' )
#' 
#' ## Plot with the average path of the treated units and the average of their
#' ## respective synthetic controls:
#' 
#' multi$p
#' 
#' ## Bootstrap the placebo units to get a distribution of placebo average
#' ## treatment effects, and plot the distribution with a vertical line 
#' ## indicating the actual ATT:
#' 
#' att.test <- plac.dist(multi)
#' att.test$p
#' }
#' 
#' @export

plac.dist<-function(multiple.synth,
                    nboots = 500){
  
  if(!is.numeric(nboots) || nboots < 10){
    stop("`nboots` is not numeric or is less than 10.\nPlease use a number greater than 10 in order to make valid inferences")
  }
  
  if(!is_tdf_multi(multiple.synth)){
    stop("Please pass a valid `tdf_multi` object the tdf argument.\nThese are generated by the `multiple.synth` function.")
  }
  
  year <- atts <- Time <- NULL
  
  post.treat.t<-subset(multiple.synth$b, 
                       Time > multiple.synth$treatment.time)
  
  att.t<-sqrt(mean((post.treat.t$Treated - post.treat.t$Control)^2))
  
  df.cont<-multiple.synth$df.plac[multiple.synth$df.plac$year > multiple.synth$treatment.time,]
  
  storage.matrix<-matrix(NA, ncol=1, nrow=nboots)
  
  for(i in 1:nboots){
    cs<-sample(c(1:length(multiple.synth$control.units)),
               length(multiple.synth$treated.units), 
               replace =F)
    
    df.cont.temp<-df.cont[,c(c(cs),c(cs+length(multiple.synth$control.units)))]
    
    storage.vector<-matrix(NA,ncol=1,nrow=length(cs))
    
    for(j in 1:length(storage.vector)){
      
      m<-sqrt(mean((df.cont.temp[,j]-df.cont.temp[,j+length(cs)])^2))
      
      storage.vector[j,1]<-m
    }
    storage.matrix[i,1]<-mean(storage.vector)
  }
  storage.matrix<-data.frame(storage.matrix)
  
  colnames(storage.matrix)<-'atts'
  
  p<-ggplot2::ggplot(data=storage.matrix,
                     ggplot2::aes(x = atts))+
    ggplot2::geom_histogram()+
  	ggplot2::ylab(NULL) + 
  	ggplot2::xlab('Distribution of placebo ATTs') +
    ggplot2::geom_vline(xintercept=att.t,size=1, linetype = 'dotted')+
  	ggplot2::scale_x_continuous(breaks = c(pretty(storage.matrix$atts), att.t), 
  															labels = c(pretty(storage.matrix$atts), 'ATT'))+
  	ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = 'gray80'),
  								 panel.grid.minor=ggplot2::element_line(colour='gray90'),
  								 panel.background = ggplot2::element_blank(),
  								 axis.line.x = ggplot2::element_line(colour = 'black'),
  								 axis.line.y = ggplot2::element_line(colour = 'black'),
  								 axis.text.y=ggplot2::element_text(colour='black'),
  								 axis.text.x=ggplot2::element_text(colour='black'),
  								 legend.position='bottom',
  								 legend.key=ggplot2::element_blank()
  	)
  
  p.value = sum(abs(storage.matrix$atts) >= abs(att.t))/nboots
  
    out<-list(p = p, 
              att.t = att.t, 
              df = storage.matrix,
    					p.value = p.value)
  
    return(out)
}

#' @rdname plac.dist
#' @export
plac_dist <- plac.dist

Try the SCtools package in your browser

Any scripts or data that you put into this service are public.

SCtools documentation built on June 9, 2022, 5:06 p.m.