#' @title Two-Stage Hard Thresholding
#' @description Perform Two-Stage Hard Thresholding method, which provides the robust inference of the treatment effect in the presence of invalid instrumental variables.
#'
#' @param Y The outcome observation, a vector of length \eqn{n}.
#' @param D The treatment observation, a vector of length \eqn{n}.
#' @param Z The instrument observation of dimension \eqn{n \times p_z}.
#' @param X The covariates observation of dimension \eqn{n \times p_x}.
#' @param intercept Whether the intercept is included. (default = \code{TRUE})
#' @param method The method used to estimate the reduced form parameters. \code{"OLS"} stands for ordinary least squares, \code{"DeLasso"} stands for the debiased Lasso estimator, and \code{"Fast.DeLasso"} stands for the debiased Lasso estimator with fast algorithm. (default = \code{"OLS"})
#' @param voting The voting option used to estimate valid IVs. \code{'MP'} stands for majority and plurality voting, \code{'MaxClique'} stands for finding maximal clique in the IV voting matrix, and \code{'Conservative'} stands for conservative voting procedure. Conservative voting is used to get an initial estimator of valid IVs in the Searching-Sampling method. (default= \code{'MaxClique'}).
#' @param robust If \code{TRUE}, the method is robust to heteroskedastic errors. If \code{FALSE}, the method assumes homoskedastic errors. (default = \code{FALSE})
#' @param alpha The significance level for the confidence interval. (default = \code{0.05})
#' @param tuning.1st The tuning parameter used in the 1st stage to select relevant instruments. If \code{NULL}, it will be generated data-dependently, see Details. (default=\code{NULL})
#' @param tuning.2nd The tuning parameter used in the 2nd stage to select valid instruments. If \code{NULL}, it will be generated data-dependently, see Details. (default=\code{NULL})
#'
#' @details When \code{robust = TRUE}, the \code{method} will be input as \code{’OLS’}.
#' When \code{voting = MaxClique} and there are multiple maximum cliques, \code{betaHat},\code{beta.sdHat},\code{ci}, and \code{VHat} will be list objects
#' where each element of list corresponds to each maximum clique.
#' As for tuning parameter in the 1st stage and 2nd stage, if do not specify, for method "OLS" we adopt \eqn{\sqrt{\log n}} for both tuning parameters, and for other methods
#' we adopt \eqn{\max{(\sqrt{2.01 \log p_z}, \sqrt{\log n})}} for both tuning parameters.
#
#' @return \code{TSHT} returns an object of class "TSHT", which is a list containing the following components:
#' \item{\code{betaHat}}{The estimate of treatment effect.}
#' \item{\code{beta.sdHat}}{The estimated standard error of \code{betaHat}.}
#' \item{\code{ci}}{The 1-alpha confidence interval for \code{beta}.}
#' \item{\code{SHat}}{The set of selected relevant IVs.}
#' \item{\code{VHat}}{The set of selected relevant and valid IVs.}
#' \item{\code{voting.mat}}{The voting matrix.}
#' \item{\code{check}}{The indicator that the majority rule is satisfied.}
#' @export
#' @examples
#' data("lineardata")
#' Y <- lineardata[,"Y"]
#' D <- lineardata[,"D"]
#' Z <- as.matrix(lineardata[,c("Z.1","Z.2","Z.3","Z.4","Z.5","Z.6","Z.7","Z.8")])
#' X <- as.matrix(lineardata[,c("age","sex")])
#' TSHT.model <- TSHT(Y=Y,D=D,Z=Z,X=X)
#' summary(TSHT.model)
#'
#' @references {
#' Guo, Z., Kang, H., Tony Cai, T. and Small, D.S. (2018), Confidence intervals for causal effects with invalid instruments by using two-stage hard thresholding with voting, \emph{J. R. Stat. Soc. B}, 80: 793-815. \cr
#' }
TSHT <- function(Y,D,Z,X,intercept=TRUE, method=c("OLS","DeLasso","Fast.DeLasso"),
voting = c('MaxClique','MP','Conservative'), robust = FALSE, alpha=0.05,
tuning.1st=NULL, tuning.2nd=NULL) {
stopifnot(is.logical(robust))
method = match.arg(method)
if(method %in% c("DeLasso", "Fast.DeLasso") && robust==TRUE){
robust = FALSE
cat(sprintf("For methods %s, robust is set FALSE,
as we only consider homoscedastic noise.\n", method))
}
voting = match.arg(voting)
stopifnot(!missing(Y),(is.numeric(Y) || is.logical(Y)),is.vector(Y)||(is.matrix(Y) || is.data.frame(Y)) && ncol(Y) == 1)
stopifnot(all(!is.na(Y)))
if (is.vector(Y)) {
Y <- cbind(Y)
}
Y = as.numeric(Y)
# Check D
stopifnot(!missing(D),(is.numeric(D) || is.logical(D)),is.vector(D)||(is.matrix(D) || is.data.frame(D)) && ncol(D) == 1)
stopifnot(all(!is.na(D)))
if (is.vector(D)) {
D <- cbind(D)
}
D = as.numeric(D)
# Check Z
stopifnot(!missing(Z),(is.numeric(Z) || is.logical(Z)),(is.vector(Z) || is.matrix(Z)))
stopifnot(all(!is.na(Z)))
if (is.vector(Z)) {
Z <- cbind(Z)
}
# Check dimesions
stopifnot(nrow(Y) == nrow(D), nrow(Y) == nrow(Z))
# Check X, if present
if(!missing(X)) {
stopifnot((is.numeric(X) || is.logical(X)),(is.vector(X))||(is.matrix(X) && nrow(X) == nrow(Z)))
stopifnot(all(!is.na(X)))
if (is.vector(X)) {
X <- cbind(X)
}
W = cbind(Z,X)
} else {
W = Z
}
# centralize W
W = scale(W, center = T, scale = F)
# All the other argument
stopifnot(is.logical(intercept))
stopifnot(is.numeric(alpha),length(alpha) == 1,alpha <= 1,alpha >= 0)
stopifnot(method=='OLS' | method=='DeLasso'|method =="Fast.DeLasso")
stopifnot(voting=='MP' | voting=='MaxClique' | voting == 'Conservative')
# Derive Inputs for TSHT
n = length(Y); pz=ncol(Z)
if(method == "OLS") {
inputs = TSHT.OLS(Y,D,W,pz,intercept)
A = t(inputs$WUMat) %*% inputs$WUMat / n
} else if (method == "DeLasso") {
inputs = TSHT.SIHR(Y,D,W,pz,intercept)
A = diag(pz)
} else if (method =="Fast.DeLasso"){
inputs = TSHT.DeLasso(Y,D,W,pz,intercept)
A = diag(pz)
}
if (robust) {
ITT_Y = inputs$ITT_Y;
ITT_D = inputs$ITT_D;
WUMat = inputs$WUMat;
V.Gamma = inputs$V.Gamma
V.gamma = inputs$V.gamma
C = inputs$C
} else {
# Reduced form estimator
ITT_Y = inputs$ITT_Y;
ITT_D = inputs$ITT_D;
WUMat = inputs$WUMat;
SigmaSqD = inputs$SigmaSqD;
SigmaSqY = inputs$SigmaSqY;
SigmaYD=inputs$SigmaYD;
V.Gamma = SigmaSqY * t(WUMat)%*%WUMat / n
V.gamma = SigmaSqD * t(WUMat)%*%WUMat / n
C = SigmaYD * t(WUMat)%*%WUMat / n
}
# Estimate Valid IVs
SetHats = TSHT.VHat(n, ITT_Y, ITT_D, V.Gamma, V.gamma, C, voting, method=method, tuning.1st=tuning.1st, tuning.2nd=tuning.2nd)
VHat = SetHats$VHat; SHat = SetHats$SHat
check = T
if(typeof(VHat)=="list"){
if(length(VHat[[1]])< length(SHat)/2){
cat('Majority rule fails.','\n')
check=F
}
betaHat = beta.sdHat = ci = vector("list", length(VHat))
# name of list
names(VHat) = names(betaHat) = names(beta.sdHat) = names(ci) = paste0("MaxClique",1:length(VHat))
for(i.VHat in 1:length(VHat)){
VHat.i = VHat[[i.VHat]]
AVHat = solve(A[VHat.i,VHat.i])
betaHat.i = as.numeric((t(ITT_Y[VHat.i]) %*% AVHat %*% ITT_D[VHat.i]) / (t(ITT_D[VHat.i]) %*% AVHat %*% ITT_D[VHat.i]))
temp <- (V.Gamma -2*betaHat.i*C+betaHat.i^2*V.gamma)[VHat.i,VHat.i]
# 1-step iteration
AVHat = solve(temp)
betaHat.i = as.numeric((t(ITT_Y[VHat.i]) %*% AVHat %*% ITT_D[VHat.i]) / (t(ITT_D[VHat.i]) %*% AVHat %*% ITT_D[VHat.i]))
temp <- (V.Gamma -2*betaHat.i*C+betaHat.i^2*V.gamma)[VHat.i,VHat.i]
betaVarHat.i <- t(ITT_D[VHat.i])%*% AVHat %*%temp%*% AVHat %*%ITT_D[VHat.i] / (n*(t(ITT_D[VHat.i])%*% AVHat %*%ITT_D[VHat.i])^2)
# U-2*betaHat*UV+betaHat^2*V
ci.i = c(betaHat.i - qnorm(1-alpha/2) * sqrt(betaVarHat.i),betaHat.i + qnorm(1-alpha/2) * sqrt(betaVarHat.i))
# store
betaHat[[i.VHat]] = betaHat.i
beta.sdHat[[i.VHat]] = as.numeric(sqrt(betaVarHat.i))
ci[[i.VHat]] = ci.i
}
if(!is.null(colnames(Z))){
SHat = colnames(Z)[SHat]
VHat = lapply(VHat, FUN=function(x) colnames(Z)[x])
}
}else{
if(length(VHat)< length(SHat)/2){
cat('Majority rule fails.','\n')
check=F
}
AVHat = solve(A[VHat,VHat])
betaHat = as.numeric((t(ITT_Y[VHat]) %*% AVHat %*% ITT_D[VHat]) / (t(ITT_D[VHat]) %*% AVHat %*% ITT_D[VHat]))
temp <- (V.Gamma -2*betaHat*C+betaHat^2*V.gamma)[VHat,VHat]
# 1-step iteration
AVHat = solve(temp)
betaHat = as.numeric((t(ITT_Y[VHat]) %*% AVHat %*% ITT_D[VHat]) / (t(ITT_D[VHat]) %*% AVHat %*% ITT_D[VHat]))
temp <- (V.Gamma -2*betaHat*C+betaHat^2*V.gamma)[VHat,VHat]
betaVarHat <- t(ITT_D[VHat])%*% AVHat %*%temp%*% AVHat %*%ITT_D[VHat] / (n*(t(ITT_D[VHat])%*% AVHat %*%ITT_D[VHat])^2)
# U-2*betaHat*UV+betaHat^2*V
ci = c(betaHat - qnorm(1-alpha/2) * sqrt(betaVarHat),betaHat + qnorm(1-alpha/2) * sqrt(betaVarHat))
beta.sdHat = as.numeric(sqrt(betaVarHat))
if (!is.null(colnames(Z))) {
SHat = colnames(Z)[SHat]
VHat = colnames(Z)[VHat]
}
}
TSHTObject <- list(betaHat=betaHat,beta.sdHat=beta.sdHat,ci=ci,SHat=SHat,
VHat = VHat,voting.mat=SetHats$voting.mat,check = check,alpha=alpha)
class(TSHTObject) <- 'TSHT'
return(TSHTObject)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.