csabounds: Bounds on Distributional Treatment Effect Parameters

Documented in cia.bounds compute.csa.bounds compute.DoTTBounds csa.bounds ggCSABounds l.inner u.inner wd.l wd.l.inner wd.u wd.u.inner

## QoTT

#' @title F.Y1
#'
#' @description calculate F(y|ytmin1), the conditional distribution
#'  of treated potential outcomes conditional on ytmin1.  This function
#'  is typically computed internally in the \code{csabounds} package but
#'  is provided here for convenience.
#'
#' @inheritParams csa.bounds
#' @param data the data.frame.  It should contain colums called "Y1t" and
#'  "Y0tmin1" which correspond to treated potential outcomes in the third
#'  period and untreated potential outcomes in the second period
#' @param retF whether or not to return the distribution function itself;
#'  the default is \code{TRUE}.  If false, it returns a \code{distreg::DR} object
#' @param yvals Sequence of values to compute distributions over
#'  (currently not used as these are determined internally)
#' @param tvals Sequence of values to compute conditional distribution over
#'  (currently not used as these are determined internally)
#'
#' @return distribution F(y|ytmin1)
#'
#' @export
F.Y1 <- function(firststep=c("dr","qr","ll"), xformla, data, yvals, tvals, h=NULL, retF=TRUE) {

    firststep <- firststep[1]
    formla <- BMisc::toformula("Y1t", c("Y0tmin1" , BMisc::rhs.vars(xformla)))
    if (firststep=="dr") {
        dr <- distreg::distreg(formla, data=data, yvals=data$Y1t)
    } else if (firststep=="ll") {
        dr <- distreg::lldistreg(formla, xformla=xformla, data=data, yvals=data$Y1t,
                                       tvals=data$Y0tmin1, h=h)
    } else {
        dr <- quantreg::rq(formla, data=data, tau=seq(.01,.99,.01))
    }

    if (retF) {
        F.y1 <- distreg::Fycondx(dr, data$Y1t, xdf=model.frame(formla, data=data))
        F.y1 ## list with n_1 distribution functions
    } else {
        return(list(dr=dr, yvals=data$Y1t))
    }
}

#' @title F.Y0
#'
#' @description compute F(y|ytmin1) where F is the conditional
#'  distribution of untreated potential outcomes for the treated group
#'  conditional on ytmin1.  This is computed under the copula
#'  stability assumption.  This function
#'  is typically computed internally in the \code{csabounds} package but
#'  is provided here for convenience.  
#'
#' @inheritParams F.Y1
#' @param retZ whether or not to return the distribution of the transformed
#'  random variables due to the copula stability assumption.  This is mainly
#'  used in the numerical bootstrap procedure and, therefore, the default is
#'  \code{FALSE}.
#' @return distribution F(y|ytmin1)
#'
#' @export
F.Y0 <- function(firststep=c("dr","qr","ll"), xformla, data, yvals, tvals, h=NULL, retF=TRUE, retZ=FALSE) {

    ## "adjust" the outcomes under copula stability assumption
    ## here, some things are hard-coded, in particular, using first step quantile regression...
    n <- nrow(data)
    firststep <- firststep[1]
    qformla <- BMisc::toformula("y0t", BMisc::rhs.vars(xformla))
    QR0t <- quantreg::rq(qformla, tau=seq(.01,.99,.01), data=data) ## tau hard-coded...
    QR0tQ <- predict(QR0t, newdata=data, type="Qhat", stepfun=TRUE)
    qformla <- BMisc::toformula("Y0tmin1", BMisc::rhs.vars(xformla))
    QR0tmin1 <- quantreg::rq(qformla, tau=seq(.01,.99,.01), data=data)
    QR0tmin1F <- predict(QR0tmin1, newdata=data, type="Fhat", stepfun=TRUE)
    ## "adjusted" outcome in period t
    Zt <- sapply(1:n, function (i) QR0tQ[[i]](QR0tmin1F[[i]](data$Y0tmin1[i])))
    QR0tmin1Q <- predict(QR0tmin1, newdata=data, type="Qhat", stepfun=TRUE)
    QR0tmin2 <- quantreg::rq(qformla, tau=seq(.01,.99,.01), data=data)
    QR0tmin2F <- predict(QR0tmin2, newdata=data, type="Fhat", stepfun=TRUE)
    ## "adjusted" outcome in period tmin1
    Ztmin1 <- sapply(1:n, function(i) QR0tmin1Q[[i]](QR0tmin2F[[i]](data$Y0tmin2[i])))

    dd <- data.frame(Zt=Zt,Ztmin1=Ztmin1)
    dd <- cbind.data.frame(dd, data)
    formla <- BMisc::toformula("Zt", c("Ztmin1", BMisc::rhs.vars(xformla)))
    if (firststep=="dr") {
        dr <- distreg::distreg(formla, data=dd, yvals=Zt)
    } else if (firststep=="ll") {
        dr <- distreg::lldistreg(formla, data=dd, yvals=y.seq, tvals=dd$Ztmin1)
    } else {
        dr <- quantreg::rq(formla, tau=2)
    }

    if (retF & retZ) {
        F.y0 <- distreg::Fycondx(dr, Zt, xdf=model.frame(formla, data=dd))
        return(list(F.y0=F.y0, xdf=model.frame(formla, data=dd)))
    } else if (retF) {  ## default behavior; everything else is a bit hack way of getting things needed for numerical bootstrap
        F.y0 <- distreg::Fycondx(dr, Zt, xdf=model.frame(formla, data=dd))
        return(F.y0)
    } else {
        return(list(dr=dr,yvals=Zt))
    }
}




#' @title l.inner
#'
#' @description Obtains the lower bound on the Distribution of the
#'  Treatment Effect for the Treated (DoTT), DoTT(delt)
#'  under the Copula Stability Assumption.  These are generic in the
#'  sense that it is going to work without the CSA or with or without covariates.
#'  You just need to provide a vector of distributions -- if there are no
#'  covariates, all of these distributions are going to be the same
#'  and there is not going to be any tightening of the bounds.  If there are
#'  covariates or otherwise conditional distributions (e.g. coming from CSA),
#'  then same computational approach will work, but it will also lead to
#'  (potentially) tighter bounds.
#'
#' @param delt the value to obtain the DoTT for
#' @param y.seq possible values of y
#' @param F.y0 list of distributions of counterfactual untreated outcomes for the treated group
#'  (length of list should be n_1)
#' @param F.y1 list of distributions of treated outcomes for the treated group (length of list should be n_1)
#'
#' @return the value of the lower bound of DoTT(
#'
#' @export
l.inner <- function(delt, y.seq, F.y1, F.y0) {
    n <- length(F.y1)
    condqott <- sapply(1:n, function(i) {
        inner.y <- sapply(y.seq, function(y) {
            max(F.y1[[i]](y) - F.y0[[i]](y-delt), 0)
        })
        max(inner.y) ## this is sup step
    })
    mean(condqott) ## return the average
}

#' @title u.inner
#'
#' @description Obtains the upper bound on the Distribution of the
#'  Treatment Effect for the Treated (DoTT), DoTT(delt)
#'  under the Copula Stability Assumption
#'
#' @inheritParams l.inner
#'
#' @return the value of the upper bound of DoTT(delt)
#' @export
u.inner <- function(delt, y.seq, F.y1, F.y0) {
    n <- length(F.y1)
    condqott <- sapply(1:n, function(i) {
        inner.y <- sapply(y.seq, function(y) {
            1+min(F.y1[[i]](y) - F.y0[[i]](y-delt), 0)
        })
        min(inner.y)
    })
    mean(condqott)
}

#' @title wd.l.inner
#'
#' @description Williamson-Downs bounds inner
#'
#' @inheritParams l.inner
#'
#' @keywords internal
#' @export
wd.l.inner <- function(y, delt, Y1t, Y0tqteobj) {
    max(ecdf(Y1t)(y) - Y0tqteobj$F.treated.t.cf(y-delt),0)
}

#' @title wd.l
#'
#' @description Williamson-Downs lower bound.
#'
#' @inheritParams l.inner
#'
#' @keywords internal
#' @export
wd.l <- function(delt, y.seq, Y1t, ddid) {
    max(vapply(y.seq, wd.l.inner, 1.0, delt, Y1t, ddid))
}

#' @title wd.u.inner
#'
#' @description Williamson-Downs bounds inner
#'
#' @inheritParams l.inner
#'
#' @keywords internal
#' @export
wd.u.inner <- function(y, delt, Y1t, ddid) {
    1 + min(ecdf(Y1t)(y) - ddid$F.treated.t.cf(y-delt),0)
}


#' @title wd.u
#'
#' @description Williamson-Downs upper bound
#'
#' @inheritParams l.inner
#'
#' @keywords internal
#' @export
wd.u <- function(delt, y.seq, Y1t, ddid) {
    min(vapply(y.seq, wd.u.inner, 1.0, delt, Y1t, ddid))
}



setupTreatedData <- function(data, Y0tqteobj, formla, xformla, idname, tname, t, tmin1, tmin2) {
    form  <- BMisc::toformula(BMisc::lhs.vars(formla),c(BMisc::rhs.vars(formla),BMisc::rhs.vars(xformla)))
    ##form <- as.formula(formla)
    dta <- model.frame(terms(form,data=data),data=data)
    colnames(dta) <- c("y","treat",colnames(dta)[-c(1,2)]) ## TODO: update when including covariates
    dta <- cbind.data.frame(dta,data[,c(idname,tname)])

    ## only need treated group for most computations here
    data1 <- subset(dta, treat==1)
    dta1 <- subset(dta, treat==1)

    ## shortened versions of each term
    Y1t <- dta1[dta1[,tname]==t,]$y
    Y0tmin1 <- dta1[dta1[,tname]==tmin1,]$y
    Y0tmin2 <- dta1[dta1[,tname]==tmin2,]$y

    ## this takes covariates from teh first time period
    dta1 <- dta1[dta1[,tname]==tmin2,]
    data1 <- data1[data1[,tname]==tmin2,]
    dta1$Y1t <- Y1t
    dta1$Y0tmin1 <- Y0tmin1
    dta1$Y0tmin2 <- Y0tmin2

    y0t <- Y0tqteobj$y0t
    dta1$y0t <- y0t

    ## if (is.null(xformla)) {
    ##     xformla <- ~1
    ## }

    ## dta1 <- cbind.data.frame(dta1, model.frame(xformla, data=data1))
    dta1
}

#' @title compute.csa.bounds
#'
#' @description Compute bounds on the distribution and quantile of the
#'  treatment effect as given in Callaway (2017) under the copula
#'  stability assumption and when a first step estimator of the counterfactual
#'  distribution of untreated potential outcomes for the treated group is
#'  available.
#'
#' @inheritParams F.Y0
#' @inheritParams F.Y1
#' @param formla outcomevar ~ treatmentvar
#' @param data a panel data frame
#' @param t the 3rd period
#' @param tmin1 the 2nd period
#' @param tmin2 the 1st period
#' @param tname the name of the column containing periods
#' @param idname the name of the column containing ids
#' @param delt.seq the possible values to compute bounds on the distribution
#'  of the treatment effect for
#' @param y.seq the possible values for y to take
#' @param Y0tqteobj a previously computed first step estimator of the
#'  distribution of counterfactual outcomes for the treated group
#' @param F.y0 (optional) pre-computed distribution of counterfactual untreated outcomes for the treated group
#' @param F.y1 (optional) pre-computed distribution of treated outcomes for the treated group
#' @param cl (optional) number of multi-cores to use
#'
#' @import stats
#' @importFrom pbapply pblapply
#'
#' @return csaboundsobj
#'
#' @export
compute.csa.bounds <- function(formla, t, tmin1, tmin2, tname, idname,
                       data, delt.seq, y.seq, Y0tqteobj, F.y0=NULL, F.y1=NULL,
                       h=NULL,
                       xformla=~1,
                       firststep=c("dr","qr","ll"), cl=1) {


    firststep <- firststep[1]
    ## setup data
    dta1 <- setupTreatedData(data, Y0tqteobj, formla, xformla, idname, tname, t, tmin1, tmin2)

    n <- nrow(dta1)


    ## ytmin1.seq <- Y0tmin1
    ## ##ytmin1.seq <- unique(Y0tmin1)  ## if really continuous, this line
    ## ## doesn't do anything, if not it will add some extra computational
    ## ## time, but should still work, if not actually continuous, it will
    ## ## break the method when using rank method, that is why I am commenting
    ## ytmin1.seq <- ytmin1.seq[order(ytmin1.seq)]
    ##seq(min(Y1t),max(Y1t), length.out=100)

    ## calculate conditional distributions

    ##print("Step 1 of 4: Calculating conditional distribution of treated potential outcomes...")

    if (is.null(F.y1)) {
        ##F.y1 <- F.Y1(firststep, xformla, dta1, yvals=dta1$Y1t, tvals=dta1$Y0tmin1, h=h)
        F.y1 <- F.Y1(firststep, xformla, dta1, yvals=y.seq, tvals=dta1$Y0tmin1, h=h)
    }

    ##print("Step 2 of 4: Calculating conditional distribution of untreated potential outcomes...")

    if (is.null(F.y0)) {
        ##F.y0 <- F.Y0(firststep, xformla, dta1, yvals=dta1$Y1t, tvals=dta1$Y0tmin1, h=h)
        F.y0 <- F.Y0(firststep, xformla, dta1, yvals=y.seq, tvals=dta1$Y0tmin1, h=h)
    }

    out <- compute.DoTTBounds(delt.seq, y.seq, F.y1, F.y0)

    ##also compute WD Bounds
    ## compute Williamson-Downs bounds (these are fast, so can compute
    ## them whether or not we report the results)
    wd.l.vec <- vapply(delt.seq, wd.l, 1.0, y.seq, dta1$Y1t, Y0tqteobj)
    wd.u.vec <- vapply(delt.seq, wd.u, 1.0, y.seq, dta1$Y1t, Y0tqteobj)

    F.wd.l <- BMisc::makeDist(delt.seq, wd.l.vec)
    F.wd.u <- BMisc::makeDist(delt.seq, wd.u.vec)

    out$F.wd.l <- F.wd.l
    out$F.wd.u <- F.wd.u

    out
}



#' @title compute.DoTTBounds
#'
#' @description function that does the heavy lifting for computing bounds on the DoTT (and hence
#'  on the QoTT) when conditional distributions of Y1t and Y0t are available
#'
#' @inheritParams compute.csa.bounds
#'
#' @return List with bounds coming from conditional distributions and Williamson-Downs bounds
#' @export
compute.DoTTBounds <- function(delt.seq, y.seq, F.y1, F.y0) {
    ## compute lower bound;
    ##print("Step 3 of 4: Calculating lower bound")

    l.vec <- sapply(delt.seq, l.inner, y.seq, F.y1, F.y0)
    u.vec <- sapply(delt.seq, u.inner, y.seq, F.y1, F.y0)

    ## turn lower and upper bounds into distributions
    F.l <- BMisc::makeDist(delt.seq, l.vec)
    F.u <- BMisc::makeDist(delt.seq, u.vec)

    return(list(F.l=F.l, F.u=F.u))
}

simpleBoot <- function(data) {
    rownums <- sample(1:nrow(data), replace=TRUE)
    data[rownums,]
}




## This is a really ambitious attempt to write this generically for any first
## step estimator; there are some very trick issues here to get this to work --
## a main one is that the first step estimator needs to be computed again for
## each bootstrap draw which is a real headache.  I am giving up on this
## for the moment and just going to write it for the cases under selection
## on observables and under CSA.  These are main cases.  Under CSA, the first
## step estimator is hard coded to be Change-in-changes; but it would be fairly
## minor modification to the code to replace that with something else...
## #' @title DoTTBounds
## #'
## #' @description Compute bounds on the distribution of the treatment effect (DoTT) when the conditional distribution of Y1t and Y0t are available
## #'
## #' @param Y0method This should be a function that can be called with data=data and other parameters
## #'  specified in ... that returns a list of length n_1 of distribution functison for Y0
## #' @param Y1method This should be a function that can be called with data=data and other parameters
## #'  specified in ... that returns a list of length n_1 of distribution functions for Y1
## #' @param simpleBoot A method to bootstrap the data that can be called with data=data and other
## #'  parameters specified in ...; the default is simpleBoot which should work for cross sectional
## #'  data but is going to fail in other cases; in the case with panel data, one can use BMisc::blockBootSample
## #'  but also user-supplied functions can be used here
## #' @inheritParams compute.DoTTBounds
## #'
## #' @return a list containing estimates and standard errors for the DoTT
## #' @export
## DoTTBounds <- function(delt.seq, y.seq, Y0method, Y1method, data, bootmethod=simpleBoot,
##                        se=FALSE, bootiters=100, cl=1, ...) {

##     stop("Not implemented yet...still some bugs...")
##     F.y0 <- Y0method(data=data, ...)
##     F.y1 <- Y1method(data=data, ...)

##     dott <- compute.DoTTBounds(delt.seq, y.seq, F.y1, F.y0)

##     if (se) {
##         n <- nrow(ata)
##         en <- n^(-1/2.1) ## update this...
##         boot.out  <- pbapply::pblapply(1:bootiters, function(b) {
##             boot.data <- bootmethod(data,...)
##             boot.F.y0 <- Y0method(data=boot.data,...)
##             boot.F.y1 <- Y1method(data=boot.data,...)

##             ## ***left off, how to get hat.F...,etc. is not clear...

##             hl.F.y0t <- lapply(1:nrow(boot.dta1), function(i) makeDist(y.seq, hat.F.y0t[[i]](y.seq) - en*sqrt(n)*(hat.F.y0t[[i]](y.seq) - boot.F.y0t[[i]](y.seq)), rearrange=TRUE, force01=TRUE))
##             hl.F.y1t <- lapply(1:nrow(boot.dta1), function(i) makeDist(y.seq, hat.F.y1t[[i]](y.seq) - en*sqrt(n)*(hat.F.y1t[[i]](y.seq) - boot.F.y0t[[i]](y.seq))))

##             csa.boot <- compute.csa.bounds(formla, t, tmin1, tmin2, tname,
##                                            idname, boot.data, delt.seq,
##                                            y.seq, boot.Y0tqteobj, F.y0=hl.F.y0t, F.y1=hl.F.y1t, h,
##                                            xformla, firststep)
##             csa.boot
##         }, cl=cl)


##         ## ***this should work...
##         ## get pointwise confidence intervals for DoTT and QoTT
##         ## CSA lower
##         boot.F.l <- t(sapply(1:bootiters, function(b) {
##             boot.out[[b]]$F.l(delt.seq)
##         }))
##         F.l.upper <- apply(boot.F.l, 2, quantile, (1-alp))
##         F.l.lower <- apply(boot.F.l, 2, quantile, alp)
##         ## CSA upper
##         boot.F.u <- t(sapply(1:bootiters, function(b) {
##             boot.out[[b]]$F.u(delt.seq)
##         }))
##         F.u.upper <- apply(boot.F.u, 2, quantile, (1-alp))
##         F.u.lower <- apply(boot.F.u, 2, quantile, alp)
##         ## WD lower
##         boot.F.wd.l <- t(sapply(1:bootiters, function(b) {
##             boot.out[[b]]$F.wd.l(delt.seq)
##         }))
##         F.wd.l.upper <- apply(boot.F.wd.l, 2, quantile, (1-alp))
##         F.wd.l.lower <- apply(boot.F.wd.l, 2, quantile, alp)
##         ## WD upper
##         boot.F.wd.u <- t(sapply(1:bootiters, function(b) {
##             boot.out[[b]]$F.wd.u(delt.seq)
##         }))
##         F.wd.u.upper <- apply(boot.F.wd.u, 2, quantile, (1-alp))
##         F.wd.u.lower <- apply(boot.F.wd.u, 2, quantile, alp)

##         ## add results to object to return
##         csa.res$F.u.upper <- F.u.upper
##         csa.res$F.u.lower <- F.u.lower
##         csa.res$F.l.upper <- F.l.upper
##         csa.res$F.l.lower <- F.l.lower
##         csa.res$F.wd.u.upper <- F.wd.u.upper
##         csa.res$F.wd.u.lower <- F.wd.u.lower
##         csa.res$F.wd.l.upper <- F.wd.l.upper
##         csa.res$F.wd.l.lower <- F.wd.l.lower
##     }


## }

#' @title csa.bounds
#'
#' @description The main function of the \code{csabounds} package.
#'  It computes bounds on the distribution of the treatment effect
#'  when panel data is available and under the Copula Stability Assumption.
#'  The function can also compute tighter bounds when other covariates are
#'  available.
#' 
#' @param formla A formula of the form: outcome ~ treatment
#' @param t the value for the third time period
#' @param tmin1 the value of the second time period
#' @param tmin2 the value of the first time period
#' @param tname the name of the variable in \code{data} that contains the
#'  time period
#' @param idname the name of the variable in \code{data} that contains the
#'  id variable
#' @param data the name of the \code{data.frame}.  It should be in "long"
#'  format rather than "wide" format.
#' @param delt.seq a vector of values to compute the distribution of the
#'  treatment effect for
#' @param y.seq a vectof of values to compute first-step distributions over
#'  (this is currently not used as it is computed internally)
#' @param Y0tmethod the name of a function to estimate the distribution
#'  of Y0t in a first step; for example \code{qte::panel.qtet},
#'  \code{qte::ddid2}, or \code{qte::CiC}
#' @param h optional bandwidth when using local linear regression
#' @param firststep whether to use distribution regression ("dr"),
#'  quantile regression ("qr"), or local linear distribution regression ("ll")
#'  for the first step estimation of condtional distributions
#' @param xformla a formula for which covariates to use
#' @param se whether or not to compute standard errors (if \code{TRUE},
#'  they are computed using the bootstrap
#' @param bootiters if computing standard errors using the bootstrap, how
#'  many bootstrap iterations to use
#' @param cl if computing standard errors using the bootstrap, how many
#'  cores to use in parallel computation (default is 1)
#' @param alp significance level for confidence intervals
#' @param ... whatever extra arguments need to be passed to Y0tmethod
#'
#' @export
csa.bounds <- function(formla,
                       t,
                       tmin1,
                       tmin2,
                       tname,
                       idname,
                       data,
                       delt.seq,
                       y.seq=NULL,
                       Y0tmethod,
                       h=NULL,
                       xformla=~1,
                       firststep=c("dr","qr","ll"),
                       se=FALSE,
                       bootiters=100,
                       cl=1,
                       alp=.05,...) {

    ## this is not robust at all, probably only works with condcic method,
    ## but you can see how to make it work in more complicated cases...
    Y0tqteobj <- Y0tmethod(formla=formla, xformla=xformla,
                           t=t, tmin1=tmin1, tname=tname,
                           idname=idname, data=data, se=FALSE,...)
    dta1 <- setupTreatedData(data, Y0tqteobj, formla, xformla, idname, tname, t, tmin1, tmin2)
    if (is.null(y.seq)) y.seq <- sort(unique(c(dta1$Y1t,Y0tqteobj$y0t)))
    F.y1 <- F.Y1(firststep, xformla, dta1, yvals=y.seq, tvals=dta1$Y0tmin1, h=h)
    F.y0 <- F.Y0(firststep, xformla, dta1, yvals=y.seq, tvals=dta1$Y0tmin1, h=h)
    csa.res <- compute.csa.bounds(formla, t, tmin1, tmin2, tname,
                                  idname, data, delt.seq, y.seq, Y0tqteobj, F.y0=F.y0, F.y1=F.y1,
                                  h=h, xformla=xformla, firststep=firststep)


    if (se) {
        cat("boostrapping standard errors...\n")
        ########################################################
        ##
        ## empirical bootstrap
        ##
        ########################################################

        ## boot.out  <- pbapply::pblapply(1:bootiters, function(b) {
        ##     boot.data <- blockBootSample(data, idname)
        ##     boot.Y0tqteobj <- Y0tmethod(formla=formla, xformla=xformla,
        ##                        t=t, tmin1=tmin1, tname=tname,
        ##                        idname=idname, data=boot.data, se=FALSE,...)
        ##     boot.dta1 <- setupTreatedData(boot.data, boot.Y0tqteobj, formla, xformla, idname, tname, t, tmin1, tmin2)
        ##     boot.F.y0t <- F.Y0(firststep, xformla, boot.dta1, yvals=dta1$Y1t, tvals=dta1$Y0tmin1, h=h)
        ##     boot.F.y1t <- F.Y1(firststep, xformla, boot.dta1, yvals=dta1$Y1t, tvals=dta1$Y0tmin1, h=h)
        ##     csa.boot <- compute.csa.bounds(formla, t, tmin1, tmin2, tname,
        ##                                    idname, boot.data, delt.seq,
        ##                                    y.seq, boot.Y0tqteobj, F.y0=boot.F.y0t, F.y1=boot.F.y1t, h,
        ##                                    xformla, firststep)
        ##     csa.boot
        ## }, cl=cl)


        #########################################################
        ##
        ## numerical bootstrap (hong and li)
        ##
        #########################################################

        ##do some work out of loop...
        n <- nrow(dta1)
        en <- n^(-1/2.5) ## update this...
        dr1 <- F.Y1(firststep, xformla, dta1, yvals=y.seq, tvals=dta1$Y0tmin1, h=h, retF=FALSE)
        dr0 <- F.Y0(firststep, xformla, dta1, yvals=y.seq, tvals=dta1$Y0tmin1, h=h, retF=FALSE)
        boot.out  <- pbapply::pblapply(1:bootiters, function(b) {
            boot.data <- BMisc::blockBootSample(data, idname)
            ##boot.data <- data
            boot.Y0tqteobj <- Y0tmethod(formla=formla, xformla=xformla,
                                        t=t, tmin1=tmin1, tname=tname,
                                        idname=idname, data=boot.data, se=FALSE,...)
            boot.dta1 <- setupTreatedData(boot.data, boot.Y0tqteobj, formla, xformla, idname, tname, t, tmin1, tmin2)
            boot.F.y0t.inner <- F.Y0(firststep, xformla, boot.dta1, yvals=y.seq, tvals=dta1$Y0tmin1, h=h, retZ=TRUE)
            boot.F.y0t <- boot.F.y0t.inner$F.y0
            boot.F.y1t <- F.Y1(firststep, xformla, boot.dta1, yvals=y.seq, tvals=dta1$Y0tmin1, h=h)
            hat.F.y0t <- distreg::Fycondx(dr0$dr, dr0$yvals, xdf=boot.F.y0t.inner$xdf)
            hat.F.y1t <- distreg::Fycondx(dr1$dr, dr0$yvals, xdf=boot.dta1)

            hl.F.y0t <- lapply(1:nrow(boot.dta1), function(i) BMisc::makeDist(y.seq, hat.F.y0t[[i]](y.seq) - en*sqrt(n)*(hat.F.y0t[[i]](y.seq) - boot.F.y0t[[i]](y.seq)), rearrange=TRUE, force01=TRUE, method="linear"))
            hl.F.y1t <- lapply(1:nrow(boot.dta1), function(i) BMisc::makeDist(y.seq, hat.F.y1t[[i]](y.seq) - en*sqrt(n)*(hat.F.y1t[[i]](y.seq) - boot.F.y1t[[i]](y.seq)), rearrange=TRUE, force01=TRUE, method="linear"))


            ## there are slight differences due to interpolations...
            ##sapply(1:nrow(boot.dta1), function(i) all( hl.F.y0t[[i]](y.seq)== F.y0[[i]](y.seq+.5)))
            
            ## first term in hong and li
            csa.boot1 <- compute.csa.bounds(formla, t, tmin1, tmin2, tname,
                                            idname, boot.data, delt.seq,
                                            y.seq, boot.Y0tqteobj, F.y0=hl.F.y0t, F.y1=hl.F.y1t, h,
                                            xformla, firststep)

            hl <- list()
            
            hl$F.l <- (csa.boot1$F.l(delt.seq) - csa.res$F.l(delt.seq))/en
            hl$F.u <- (csa.boot1$F.u(delt.seq) - csa.res$F.u(delt.seq))/en
            hl$F.wd.l <- (csa.boot1$F.wd.l(delt.seq) - csa.res$F.wd.l(delt.seq))/en
            hl$F.wd.u <- (csa.boot1$F.wd.u(delt.seq) - csa.res$F.wd.u(delt.seq))/en
            
            hl
                
        }, cl=cl)

        
        ## get pointwise confidence intervals for DoTT and QoTT
        ## CSA lower
        boot.F.l <- t(sapply(1:bootiters, function(b) {
            boot.out[[b]]$F.l
        }))
        F.l.upper <- BMisc::makeDist(delt.seq, csa.res$F.l(delt.seq) - 
                                apply(boot.F.l, 2, quantile, alp)/sqrt(n),
                              rearrange = TRUE, force01 = TRUE)
        F.l.lower <- BMisc::makeDist(delt.seq, csa.res$F.l(delt.seq) - 
                                apply(boot.F.l, 2, quantile, (1-alp))/sqrt(n),
                              rearrange = TRUE, force01 = TRUE)
        ## CSA upper
        boot.F.u <- t(sapply(1:bootiters, function(b) {
          boot.out[[b]]$F.u
        }))
        F.u.upper <- BMisc::makeDist(delt.seq, csa.res$F.u(delt.seq) - 
                                apply(boot.F.u, 2, quantile, alp)/sqrt(n),
                              rearrange = TRUE, force01 = TRUE)
        F.u.lower <- BMisc::makeDist(delt.seq, csa.res$F.u(delt.seq) - 
                                apply(boot.F.u, 2, quantile, (1-alp))/sqrt(n),
                              rearrange = TRUE, force01 = TRUE)
        ## WD lower
        boot.F.wd.l <- t(sapply(1:bootiters, function(b) {
          boot.out[[b]]$F.wd.l
        }))
        F.wd.l.upper <- BMisc::makeDist(delt.seq, csa.res$F.wd.l(delt.seq) -
                                   apply(boot.F.wd.l, 2, quantile, alp)/sqrt(n),
                                 rearrange=TRUE, force01=TRUE)
        F.wd.l.lower <- BMisc::makeDist(delt.seq, csa.res$F.wd.l(delt.seq) - 
                                   apply(boot.F.wd.l, 2, quantile, (1-alp))/sqrt(n),
                                 rearrange=TRUE,force01=TRUE)
        ## WD upper
        boot.F.wd.u <- t(sapply(1:bootiters, function(b) {
          boot.out[[b]]$F.wd.u
        }))
        F.wd.u.upper <- BMisc::makeDist(delt.seq, csa.res$F.wd.u(delt.seq) - 
                                   apply(boot.F.wd.u, 2, quantile, alp)/sqrt(n),
                                 rearrange=TRUE, force01=TRUE)
        F.wd.u.lower <- BMisc::makeDist(delt.seq, csa.res$F.wd.u(delt.seq) - 
                                   apply(boot.F.wd.u, 2, quantile, (1-alp))/sqrt(n),
                                 rearrange=TRUE, force01=TRUE)
        
        
        ## add results to object to return
        csa.res$F.u.upper <- F.u.upper
        csa.res$F.u.lower <- F.u.lower
        csa.res$F.l.upper <- F.l.upper
        csa.res$F.l.lower <- F.l.lower
        csa.res$F.wd.u.upper <- F.wd.u.upper
        csa.res$F.wd.u.lower <- F.wd.u.lower
        csa.res$F.wd.l.upper <- F.wd.l.upper
        csa.res$F.wd.l.lower <- F.wd.l.lower
    }

    return(csa.res)


}


#' @title cia.bounds
#'
#' @description Bounds on the distribution of the treatment effect and on
#'  the quantile of the treatment effect under a conditional independence
#'  assumption (cia).  It takes in a data.frame, that should indicate whether
#'  or not an individual is treated; separates individuals into a treated
#'  and untreated group; runs distribution or quantile regression to
#'  estimate the conditional distributions; then computes bounds on the
#'  DoTT or QoTT.
#'
#' @inheritParams csa.bounds
#' @param formla y ~ d
#' @param xformla ~ x1 + x2
#' @param alp significance level for confidence intervals
#' @param link optional argument to pass to \code{distreg} for which link
#'  function to use when running distribution regressions
#' @param ... whatever extra arguments need to be passed to Y0tmethod
#'
#' @export
cia.bounds <- function(formla, xformla=~1, data, delt.seq, y.seq=NULL, 
                       firststep=c("dr","qr","ll"),
                       link="logit",
                       se=FALSE, bootiters=100,
                       cl=1,alp=.05,...) {

    ## setup data
    form <- as.formula(formla)
    dta <- model.frame(terms(form,data=data),data=data)
    colnames(dta) <- c("y","treat") ## TODO: update when including covariates
    dta <- cbind.data.frame(dta, model.frame(xformla, data=data))
    dta1 <- subset(dta, treat==1)
    dta0 <- subset(dta, treat==0)                            
    if (is.null(y.seq)) y.seq <- sort(unique(dta1$y)) ## this is going to get "on the treated" results
    
    
    ## compute conditional distributions
    dformla <- BMisc::toformula("y", BMisc::rhs.vars(xformla))
    xdf <- dta1 ## (implies we get qott, etc. rather than qote)
    if (firststep=="dr") {
        dr1 <- distreg::distreg(dformla, data=dta1, yvals=y.seq,
                                      link=link)
        dr0 <- distreg::distreg(dformla, data=dta0, yvals=y.seq,
                                      link=link)
        
    } else {
        dr1 <- quantreg::rq(dformla, data=dta1, tau=seq(.01,.99,.01))
        dr0 <- quantreg::rq(dformla, data=dta0, tau=seq(.01,.99,.01))
    }
    F.y1 <- distreg::Fycondx(dr1, y.seq, xdf=xdf)
    F.y0 <- distreg::Fycondx(dr0, y.seq, xdf=xdf)

    ## compute estimates of bounds
    cia.res <- compute.DoTTBounds(delt.seq, y.seq, F.y1, F.y0)


    if (se) {
        cat("boostrapping standard errors...\n")
        #########################################################
        ##
        ## numerical bootstrap (hong and li)
        ##
        #########################################################

        ##do some work out of loop...
        n <- nrow(dta1)
        en <- n^(-1/2.5) ## update this...
        boot.out  <- pbapply::pblapply(1:bootiters, function(b) {
            ## setup bootstrap data
            boot.data <- simpleBoot(dta)
            boot.dta1 <- subset(boot.data, treat==1)
            boot.dta0 <- subset(boot.data, treat==0)                            

            ## roughly follow same steps as before, combining with Hong and Li
            ## compute conditional distributions
            boot.xdf <- boot.dta1 ## (implies we get qott, etc. rather than qote)
            if (firststep=="dr") {
                boot.dr1 <- distreg::distreg(dformla, data=boot.dta1, yvals=y.seq, link=link)
                boot.dr0 <- distreg::distreg(dformla, data=boot.dta0, yvals=y.seq, link=link)
                
            } else {
                boot.dr1 <- quantreg::rq(dformla, data=boot.dta1, tau=seq(.01,.99,.01))
                boot.dr0 <- quantreg::rq(dformla, data=boot.dta0, tau=seq(.01,.99,.01))
            }
            boot.F.y1 <- distreg::Fycondx(boot.dr1, y.seq, xdf=boot.xdf)
            boot.F.y0 <- distreg::Fycondx(boot.dr0, y.seq, xdf=boot.xdf)

            hat.F.y0 <- distreg::Fycondx(dr0, y.seq, xdf=boot.xdf)
            hat.F.y1 <- distreg::Fycondx(dr1, y.seq, xdf=boot.xdf)

            hl.F.y0 <- lapply(1:nrow(boot.dta1), function(i) BMisc::makeDist(y.seq, hat.F.y0[[i]](y.seq) - en*sqrt(n)*(hat.F.y0[[i]](y.seq) - boot.F.y0[[i]](y.seq)), rearrange=TRUE, force01=TRUE))
            hl.F.y1 <- lapply(1:nrow(boot.dta1), function(i) BMisc::makeDist(y.seq, hat.F.y1[[i]](y.seq) - en*sqrt(n)*(hat.F.y1[[i]](y.seq) - boot.F.y1[[i]](y.seq)), rearrange=TRUE, force01=TRUE))

            cia.boot <- compute.DoTTBounds(delt.seq, y.seq, F.y1=hl.F.y1, F.y0=hl.F.y0)
            
            hl <- list()
            ## note that these are not actually distributions; just use these to calculate confidence intervals later
            ## hl$F.l <- (csa.boot1$F.l(delt.seq) - csa.boot2$F.l(delt.seq))/en
            ## hl$F.u <- (csa.boot1$F.u(delt.seq) - csa.boot2$F.u(delt.seq))/en
            ## hl$F.wd.l <- (csa.boot1$F.wd.l(delt.seq) - csa.boot2$F.wd.l(delt.seq))/en
            ## hl$F.wd.u <- (csa.boot1$F.wd.u(delt.seq) - csa.boot2$F.wd.u(delt.seq))/en
            hl$F.l <- (cia.boot$F.l(delt.seq) - cia.res$F.l(delt.seq))/en
            hl$F.u <- (cia.boot$F.u(delt.seq) - cia.res$F.u(delt.seq))/en
            
            hl
            
        }, cl=cl)
        
        boot.F.l <- t(sapply(1:bootiters, function(b) {
          boot.out[[b]]$F.l
        }))
        F.l.upper <- BMisc::makeDist(delt.seq, cia.res$F.l(delt.seq) - 
                                apply(boot.F.l, 2, quantile, alp)/sqrt(n),
                              rearrange = TRUE, force01 = TRUE)
        F.l.lower <- BMisc::makeDist(delt.seq, cia.res$F.l(delt.seq) - 
                                apply(boot.F.l, 2, quantile, (1-alp))/sqrt(n),
                              rearrange = TRUE, force01 = TRUE)
        ## CSA upper
        boot.F.u <- t(sapply(1:bootiters, function(b) {
          boot.out[[b]]$F.u
        }))
        F.u.upper <- BMisc::makeDist(delt.seq, cia.res$F.u(delt.seq) - 
                                apply(boot.F.u, 2, quantile, alp)/sqrt(n),
                              rearrange = TRUE, force01 = TRUE)
        F.u.lower <- BMisc::makeDist(delt.seq, cia.res$F.u(delt.seq) - 
                                apply(boot.F.u, 2, quantile, (1-alp))/sqrt(n),
                              rearrange = TRUE, force01 = TRUE)
        # ## WD lower
        # boot.F.wd.l <- t(sapply(1:bootiters, function(b) {
        #   boot.out[[b]]$F.wd.l
        # }))
        # F.wd.l.upper <- BMisc::makeDist(delt.seq, cia.res$F.wd.l(delt.seq) -
        #                            apply(boot.F.wd.l, 2, quantile, alp)/sqrt(n),
        #                          rearrange=TRUE, force01=TRUE)
        # F.wd.l.lower <- BMisc::makeDist(delt.seq, cia.res$F.wd.l(delt.seq) - 
        #                            apply(boot.F.wd.l, 2, quantile, (1-alp))/sqrt(n),
        #                          rearrange=TRUE,force01=TRUE)
        # ## WD upper
        # boot.F.wd.u <- t(sapply(1:bootiters, function(b) {
        #   boot.out[[b]]$F.wd.u
        # }))
        # F.wd.u.upper <- BMisc::makeDist(delt.seq, cia.res$F.wd.u(delt.seq) - 
        #                            apply(boot.F.wd.u, 2, quantile, alp)/sqrt(n),
        #                          rearrange=TRUE, force01=TRUE)
        # F.wd.u.lower <- BMisc::makeDist(delt.seq, cia.res$F.wd.u(delt.seq) - 
        #                            apply(boot.F.wd.u, 2, quantile, (1-alp))/sqrt(n),
        #                          rearrange=TRUE, force01=TRUE)
        
        ## add results to object to return
        cia.res$F.u.upper <- F.u.upper
        cia.res$F.u.lower <- F.u.lower
        cia.res$F.l.upper <- F.l.upper
        cia.res$F.l.lower <- F.l.lower
        # cia.res$F.wd.u.upper <- F.wd.u.upper
        # cia.res$F.wd.u.lower <- F.wd.u.lower
        # cia.res$F.wd.l.upper <- F.wd.l.upper
        # cia.res$F.wd.l.lower <- F.wd.l.lower
    }

    return(cia.res)

}



#' @title ggCSABounds
#'
#' @description plot bounds on the quantile of the treatment effect
#'  using ggplot2
#'
#' @param csaboundsobj an object returned from the csa.bounds method
#' @param tau vector of values between 0 and 1 to plot quantiles for
#' @param csalabel label for bounds under copula stability assumption
#' @param wdbounds boolean whether or not to also plot Williamson-Downs bounds
#' @param wdlabel label for worst case bounds
#' @param otherdist1 optional ecdf of the distribution of the treatment effect
#'  under cross sectional rank invariance
#' @param otherdist1label label for other distribution 1
#' @param otherdist2 optional ecdf of the distribution of the treatment effect
#'  under panel rank invariance
#' @param otherdist2label label for other distribution 2
#' @param plotSE whether or not to plot standard errors (default is \code{TRUE})
#' @param colors optional vector of colors to use in place of \code{ggplot}'s
#'  default scheme
#' @param legend whether or not to include a legend (default is \code{TRUE})
#'
#' @import ggplot2
#'
#' @export
ggCSABounds <- function(csaboundsobj, tau=seq(.05,.95,.05),
                        csalabel="CSA Bounds",
                        wdbounds=FALSE,
                        wdlabel="WD Bounds",
                        otherdist1=NULL,
                        otherdist1label="CS RI",
                        otherdist2=NULL,
                        otherdist2label="Panel RI", plotSE=TRUE,
                        colors=NULL, legend=TRUE) {
    tau <- seq(0.05, 0.95, .05)
    c <- csaboundsobj

    qu <- quantile(c$F.l, tau, type=1)
    ql <- quantile(c$F.u, tau, type=1)
    qwdu <- quantile(c$F.wd.l, tau, type=1)
    qwdl <- quantile(c$F.wd.u, tau, type=1)

    cmat <- data.frame(tau=tau, qu=qu, ql=ql, group=csalabel)
    cmat2 <- data.frame(tau=tau, qu=qwdu, ql=qwdl, group=wdlabel)
    if (wdbounds) {
        cmat <- rbind.data.frame(cmat, cmat2)
    }
    if (!is.null(otherdist1)) {
        cmat3 <- data.frame(tau=tau, qu=quantile(otherdist1, tau, type=1),
                            ql=ql, group=otherdist1label)
        cmat <- rbind.data.frame(cmat, cmat3)
    }
    if (!is.null(otherdist2)) {
        cmat4 <- data.frame(tau=tau, qu=quantile(otherdist2, tau, type=1),
                            ql=ql, group=otherdist2label)
        cmat <- rbind.data.frame(cmat, cmat4)
    }


    p <- ggplot(data=cmat) +
        geom_line(aes(x=tau, y=qu, color=factor(group)), size=1) +
        geom_line(aes(x=tau, y=ql, color=factor(group)), size=1) +
        scale_x_continuous(limits=c(0,1)) +
        theme_bw() +
        ylab("QoTT") + 
        theme(legend.title=element_blank())##%,
              ##legend.position="top",
    ##legend.direction="horizontal")

    if (!(is.null(colors))) {
        p <- p + scale_color_manual(values=colors)
    }

    if (!legend) {
        p <- p + theme(legend.position="none")
    }
    
    ## add confidence intervals, if they are provided
    if (plotSE) {
        if (!is.null(c$F.l.upper)) {
            dmat <- data.frame(tau=tau, uci=quantile(c$F.l.lower, probs=tau, type=1), lci=quantile(c$F.u.upper, probs=tau, type=1))
            p <- p + geom_line(data=dmat, mapping=aes(x=tau, y=uci), size=1, linetype=2)
            p <- p + geom_line(data=dmat, mapping=aes(x=tau, y=lci), size=1, linetype=2)
        }
    }

    p
}
bcallaway11/csabounds documentation built on July 27, 2022, 5:17 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
bcallaway11/csabounds
Bounds on Distributional Treatment Effect Parameters

R/qott.R
In bcallaway11/csabounds: Bounds on Distributional Treatment Effect Parameters

Defines functions ggCSABounds cia.bounds csa.bounds simpleBoot compute.DoTTBounds compute.csa.bounds setupTreatedData wd.u wd.u.inner wd.l wd.l.inner u.inner l.inner .Y0 .Y1

Documented in cia.bounds compute.csa.bounds compute.DoTTBounds csa.bounds ggCSABounds l.inner u.inner wd.l wd.l.inner wd.u wd.u.inner

R Package Documentation

Browse R Packages

We want your feedback!

bcallaway11/csabounds Bounds on Distributional Treatment Effect Parameters

R/qott.R In bcallaway11/csabounds: Bounds on Distributional Treatment Effect Parameters

Defines functions ggCSABounds cia.bounds csa.bounds simpleBoot compute.DoTTBounds compute.csa.bounds setupTreatedData wd.u wd.u.inner wd.l wd.l.inner u.inner l.inner .Y0 .Y1

Documented in cia.bounds compute.csa.bounds compute.DoTTBounds csa.bounds ggCSABounds l.inner u.inner wd.l wd.l.inner wd.u wd.u.inner

R Package Documentation

Browse R Packages

We want your feedback!

bcallaway11/csabounds
Bounds on Distributional Treatment Effect Parameters

R/qott.R
In bcallaway11/csabounds: Bounds on Distributional Treatment Effect Parameters