#' @title High-Dimensional Mahalanobis balancing
#' @description High-Dimensional Mahalanobis balancing is a multivariate
#' perspective of approximate covariate balancing method to estimate average
#' treatment effect in causal inference. We use function \code{hdMB} to select variable then
#' Mahalanobis balancing method will be applied to estimate ATE.
#'
#' @details
#' \code{group1},\code{group2}
#' \itemize{
#' \item To estimate \eqn{E(Y (1))} (average treatment effect for group 1),
#' you need to set \code{group1} = 1 and ignore \code{group2}. Similarly,
#' To estimate \eqn{E(Y (0))} (average treatment effect for group 0),
#' you need to set \code{group1} = 0 and ignore \code{group2}.
#'
#' \item To estimate average treatment effect on the control group \eqn{E(Y (1) | T = 0)},
#' you need to set \code{group1} = 1 and \code{group2} = 0. Similarly, To estimate
#' average treatment effect on the treated group \eqn{E(Y (0) | T = 1)},
#' you need to set \code{group1} = 0 and \code{group2} = 1.
#'
#' \item This function is feasible when there are more than two groups.
#' }
#'
#' \code{method} can be a valid string, including
#' \itemize{
#' \item "MB": We choose the weighting matrix \eqn{{W}_1=[diag(\hat{\Sigma})]^{-1}}
#' where \eqn{\hat{\Sigma}} denotes sample covariance matrix.
#' \item "MB2": We choose the weighting matrix \eqn{{W}_2={[\hat{\Sigma}]^{-1}}}
#' where \eqn{\hat{\Sigma}} denotes sample covariance matrix.
#' }
#'
#' \code{delta.space} grid of values for the tuning parameter, a vector of
#' candidate values for the degree of approximate covariate balance. The default
#' is c(1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6).
#'
#' \code{GASMD.bound} GASMD is an imbalance measure. We first standardize our covariate,
#' then \eqn{GASMD_{a,k} = (X_{1,a,k} + ... + X_{n_a,a,k}) / n_a - mean_k} where \eqn{X_{a}} denotes
#' the covariate in group a, k denotes the k-th dimension of covariate and \eqn{n_a} denotes the
#' sample size in group a. Note that GASMD is different from ASMD. In our paper, we suggest
#' using ASMD to select the variable. However, when there are more than two groups,
#' ASMD is unsuitable. Therefore, we use GASMD instead of ASMD to select the variable.
#'
#' @param x covariates.
#' @param treat treatment indicator vector.
#' @param group1 see Details.
#' @param group2 see Details, Default: NA.
#' @param outcome outcome vector.
#' @param method a string that takes values in {"MB", "MB2"}, See Details, Default: 'MB'.
#' @param delta.space a vector of candidate values for tuning parameter, See Details,
#' Default: c(1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6).
#' @param GASMD.bound An upper bound for choosing the variables, See Details, Default: 0.1.
#' @param iterations iteration times in optimization problem, Default: 1000.
#'
#' @return a hdMB object with the following attributes:
#' \itemize{
#' \item{GMIM:}{ Generalized Multivariate Imbalance Measure that defines in our paper.}
#' \item{GASMD:}{ Generalized Absolute Standardized Mean Difference, See Details.}
#' }
#'
#' @examples
#' #hdMB
#' ##generating high-dimensional data
#' set.seed(0521)
#' data <- si.data(sample.size = 200, dimension = 100)
#' dimension <- dim(data$X)[2]
#'
#' ##estimating ATE##
#'
#' ##choosing variable
#' hdMB1 <- hdMB(x = data$X, treat = data$Tr, group1 = 1, outcome = data$Y, method = "MB")
#' hdMB1$GMIM
#' threshold <- 24
#' index1 <- rank(hdMB1$GASMD) >= (dimension - threshold + 1)
#'
#' ##choosing variable
#' hdMB2 <- hdMB(x = data$X, treat = data$Tr, group1 = 0, outcome = data$Y, method = "MB")
#' hdMB2$GMIM
#' threshold <- 25
#' index2 <- rank(hdMB2$GASMD) >= (dimension - threshold + 1)
#'
#' ##an estimate of ATE
#' result1 <- MB(x = data$X[,index1], treat = data$Tr, group1 = 1, outcome = data$Y)
#' result2 <- MB(x = data$X[,index2], treat = data$Tr, group1 = 0, outcome = data$Y)
#' result1$AT - result2$AT
#'
#' ##estimating ATC##
#' hdMB3 <- hdMB(x = data$X, treat = data$Tr, group1 = 1, group2 = 0, outcome = data$Y, method = "MB")
#' hdMB3$GMIM
#' threshold <- 7
#' index3 <- rank(hdMB3$GASMD) >= (dimension - threshold + 1)
#' result3 <- MB(x = data$X[,index3], treat = data$Tr, group1 = 1, group2 = 0, outcome = data$Y)
#'
#' ##an estimate of ATC
#' result3$AT - mean(data$Y[data$Tr == 0])
#' @rdname hdMB
#' @export
hdMB <- function(x, treat , group1 , group2 = NA, outcome, method = "MB", delta.space = c(1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6), GASMD.bound = 0.1, iterations = 1000){
data.matrix <- cbind(x , treat , outcome)
dimension <- dim(x)[2]
sample.size <- dim(x)[1]
group1.number <- sum(treat == group1)
x <- scale(x)
GASMD <- rep(NA,dimension)
if(is.na(group2)){GASMD <- abs(apply(x[treat == group1,],2,mean) - apply(x,2,mean))}
else{GASMD <- abs(apply(x[treat == group1,],2,mean) - apply(x[treat == group2,],2,mean))}
if(sum(abs(GASMD) > GASMD.bound) == 0){stop("GASMD.bound is too large!")}
GMIM <- rep(0,sum(abs(GASMD) > GASMD.bound))
for(threshold in 1:sum(abs(GASMD) > GASMD.bound)){
index <- rank(GASMD) >= (dimension - threshold + 1)
GMIM[threshold] <- MB(x = x[,index], treat = treat, group1 = group1, group2 = group2, outcome = outcome, method = method, delta.space = delta.space)$GMIM / threshold
}
return(list(GMIM = GMIM, GASMD = GASMD))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.