#' Tuning for Local Balance (T4LB) Algorithm
#'
#' The T4LB algorithm optimze the balancing property of the estimated propensity score by minimizing a local balance
#' criteria, while still controlling for the global balance. In addition to the the absolute standardized difference (S/D)
#' of the covariates in the whole population, the T4LB selects the model with the minimum "mean" or "max" of the S/D in the
#' pre-specified local neighborhoods. Hence, the selected model would have the best balancing property among the candidate models,
#' and the estimated scores retain the interpretation as an appropriate propensity score, and the weights have the intepretation
#' as an inverse probability weights (IPW). T4LB can be used in propensity score estimation approaches that requires parameter
#' tuning, such as using machine learning approaches to estimate propensity scores.
#'
#' @param ps A matrix contains the estimated propensity scores from candidate models. Each column of the "ps" matrix is the
#' propensity scores estimated from each candidate model corresponding to each set of parameters.
#' @param weight A matrix contains the weight calculated from the corresponding input propensity score. Each column of
#' the "weight" matrix should correspond to each column of the "ps" matrix. If weight = NULL, the IPW of ATE
#' will be calculated using the input propensity scores.
#' @param X The covariate matrixes used for balance evaluation. It can be one or a list of covariate matrixes. If one covariate
#' matrix is provided, this matrix will be used for balance evaluation for all candidate models. If a list of
#' covariate matrixes are provided, each element of the list will be used for balance evaluation for each candidate models.
#' The length of the "X" list must equal to the number of column of the "ps"/"weight" matrix. The rows of "X" matrix
#' correspond to the subjects/participants, and the columns of the "X" matrix correspond to the covariates.
#' @param Z The binary treatment indicator vector. A vector with 2 unique numeric values in 0 = untreated and 1 = treated.
#' @param ej The matrix of the local neighborhoods, which contains two columns of positive values greater or equal to 0 and less or equal to 1.
#' The rows of ej represent the neighborhoods. The first column is the start point of the local neighborhoods.
#' The second column is the end point of the local neighborhoods.
#' @param para The matrix whose each row contains the set of parameters for each candidate model.
#' @param criteria Choose "mean" to use the mean of the absolute S/D among all covariates and neighborhoods as the criteria of local balance;
#' choose "max" to use the max of the absolute S/D among all covariates and neighborhoods as the criteria of local balance.
#' Default is "mean".
#'
#' @return A list containing the following components:
#' \itemize{
#' \item{"weight"}: The optimal weight from the selected model with the best local balance.
#' \item{"propensity.score"}: The optimal propensity score from the selected model with the best local balance.
#' \item{"parameter"}: The set of parameters of the selected model with the best local balance.
#' \item{"balance"}: The matrix containing the global and local balance of the selected propensity score model.
#' The first row contains the absolute S/D of each covariates in the whole study population,
#' which represents the global covariate balance. The rest rows contain the absolute
#' S/D of the covariates in the PS-stratified sub-population corresponding to the loal neighborhoods of "ej",
#' which represent the local covariate balance.
#' }
#'
#' @references Lee, B. K., Lessler, J. and Stuart, E. A. (2009) Improving propensity score weighting using
#' machine learning. Statistics in Medicine, 29, 337-346.
#' @examples
#' KS = Kang_Schafer_Simulation(n = 1500, seeds = 371834)
#' # Misspecified propensity score model
#' X = KS$Data[,7:10]
#' Z = KS$Data[,2]
#' # Local neighborhoods
#' ej = cbind(seq(0,0.8,0.2), seq(0.2,1,0.2))
#' print(ej)
#' # Transform X with Gaussian kernel using 10 sigma
#' x.k = Gaussian_Kernel_feature(X = X, n_sigma = 10)
#' sigma = rep(x.k$sigma_seq, each = 3)
#' r = as.vector(t(x.k$r))
#' para = cbind("sigma" = sigma, "l" = r)
#' # Fitting the kernelized logistic regression model
#' m = matrix(seq(1,30), nrow = 10, ncol = 3, byrow = T)
#' k.ps = matrix(nrow = nrow(X), ncol = nrow(para))
#' k.w = k.ps
#' for (i in 1:10) {
#' for (j in 1:3) {
#' log.fit = glm(Z~x.k$features[[i]][[j]], family = "binomial")
#' A = weight.calculate.ps(Z, log.fit$fitted.values, standardize = T)
#' k.ps[,m[i,j]] = A$ps
#' k.w[,m[i,j]] = A$weight
#' }
#' }
#' rm(A,log.fit,i,j,m)
#' best.glm = T4LB(ps = k.ps, weight = k.w, X = X, Z = Z, ej = ej, para = para)
#' str(t4lb.find)
#' best.glm$balance
#'
#' @export
T4LB = function(ps, weight, X, Z, ej, para, criteria = "mean") {
n = nrow(ps)
p = ncol(ps)
if (p != nrow(para)) {
stop("Please make sure there is one set of propensity score for each set of parameters")
}
if (is.list(X) && length(X)!= p) {
stop("The number of the covariate matrix for balance checking should be one or as many as the number of sets of parameters.")
}
# IPW for ATE will be calculated if weight is not provided. Weight will be standardized.
if (is.null(weight)) {
weight = matrix(nrow = n, ncol = p)
for (i in 1:p) {
weight[,i] = weight.calculate.ps(Z = Z, ps = ps[,i], standardize = TRUE)
}
} else if (!is.null(weight)) {
weight = weight
}
std = list()
std.summary = matrix(nrow = p, ncol = 5)
for (i in 1:p) {
if (is.matrix(X)) {
A = abs(Fitted.strata.diff(X = X, Z = Z, ps = ps[,i], weight = weight[,i], ej))
} else if (is.list(X)) {
A = abs(Fitted.strata.diff(X = X[[i]], Z = Z, ps = ps[,i], weight = weight[,i], ej))
}
std[[i]] = A
std.summary[i,] = Standardized.Diff.Stat(std = A, ej = ej, p1 = 15, p2 = 5)
}
best.I = CBPS.BestIndex(std.list = std.summary, para)
if (is.na(best.I$best.index[1]) | is.na(best.I$best.index[2])) {
print("Uncontrolledly large global covariate balance")
out = NULL
} else {
if (criteria == "mean") {
I = best.I$best.index[1]
} else if (criteria == "max") {
I = best.I$best.index[2]
}
out.ps = ps[,I]
out.w = weight[,I]
out.para = para[I,]
names(out.para) = colnames(para)
out = list(weight = out.w, propensity.score = out.ps, parameter = out.para,
balance = std[[I]])
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.