Nothing
#' Summarize and plot covariate balance
#'
#' Summarizes and plots covariate balance between treatment and control groups
#' before and after the distribution balancing weighting.
#'
#' Estimated weights are normalized to sum to one. Position of the legend is
#' determined internally. Set \code{absolute = TRUE} and \code{sort = TRUE} for choosing
#' a better location automatically.
#'
#' @export
#'
#' @param x an object of class “dbw”, usually, a result of a call to \code{\link{dbw}}.
#' @param addcov a one-sided formula specifying additional covariates whose
#' balance is checked. Covariates containing NAs are automatically dropped.
#' @param standardize a logical value indicating whether weighted mean
#' differences are standardized or not.
#' @param plot a logical value indicating whether a covariate balance plot is
#' displayed.
#' @param absolute a logical value indicating whether the absolute values of
#' differences in weighted means are used in the covariate balance plot.
#' @param threshold an optional numeric vector used as threshold markers in the
#' covariate balance plot.
#' @param sort a logical value indicating whether covariates in the covariate
#' balance plot are sorted by the values of differences in the weighted means
#' before the distribution balancing weighting.
#' @param ... other graphical parameters (see \code{\link[graphics]{par}}).
#'
#' @return A matrix whose rows are the covariates and columns are the
#' differences in the (un)standardized weighted mean between the treatment and
#' control groups before (\code{diff.un}) and after (\code{diff.adj}) the
#' distribution balancing weighting. The standardized weighted mean is the
#' weighted mean divided by the standard deviation of the covariate for the
#' target population (the treatment group for the average treatment effects on
#' the treated estimation, the control group for the average treatment effects
#' on the controlled estimation and the whole population for the other
#' quantity of interest). The differences in the categorical variables are not
#' standardized.
#'
#' @author Hiroto Katsumata
#'
#' @examples
#' # Simulation from Kang and Shafer (2007) and Imai and Ratkovic (2014)
#' # ATE estimation
#' # True ATE is 10
#' tau <- 10
#' set.seed(12345)
#' n <- 1000
#' X <- matrix(stats::rnorm(n * 4, mean = 0, sd = 1), nrow = n, ncol = 4)
#' prop <- 1 / (1 + exp(X[, 1] - 0.5 * X[, 2] +
#' 0.25 * X[, 3] + 0.1 * X[, 4]))
#' treat <- rbinom(n, 1, prop)
#' y <- 210 +
#' 27.4 * X[, 1] + 13.7 * X[, 2] + 13.7 * X[, 3] + 13.7 * X[, 4] +
#' tau * treat + stats::rnorm(n = n, mean = 0, sd = 1)
#' ybinom <- (y > 210) + 0
#' df0 <- data.frame(X, treat, y, ybinom)
#' colnames(df0) <- c("x1", "x2", "x3", "x4", "treat", "y", "ybinom")
#'
#' # Variables for a misspecified model
#' Xmis <- data.frame(x1mis = exp(X[, 1] / 2),
#' x2mis = X[, 2] * (1 + exp(X[, 1]))^(-1) + 10,
#' x3mis = (X[, 1] * X[, 3] / 25 + 0.6)^3,
#' x4mis = (X[, 2] + X[, 4] + 20)^2)
#'
#' # Data frame and formulas for propensity score estimation
#' df <- data.frame(df0, Xmis)
#' formula_ps_c <- stats::as.formula(treat ~ x1 + x2 + x3 + x4)
#' formula_ps_m <- stats::as.formula(treat ~ x1mis + x2mis +
#' x3mis + x4mis)
#'
#' # Formula for a misspecified outcome model
#' formula_y <- stats::as.formula(y ~ x1mis + x2mis + x3mis + x4mis)
#'
#' # Set plot margins
#' oldpar <- par(no.readonly = TRUE) # Just for adjusting plot margins
#' par(mar = c(5.1, 5.1, 4.1, 2.1)) # Just for adjusting plot margins
#'
#' # Misspecified propensity score model
#'
#' # Distribution balancing weighting without regularization
#' fitdbwm <- dbw(formula_y = formula_y, formula_ps = formula_ps_m,
#' estimand = "ATE", method = "dbw",
#' method_y = "wls", data = df, normalize = TRUE,
#' vcov = TRUE, lambda = 0, weights = NULL,
#' clevel = 0.95)
#' plot(fitdbwm, addcov = ~ x1 + x2 + x3 + x4, threshold = c(0.1, 0.2))
#'
#' # Non-absolute values
#' plot(fitdbwm, addcov = ~ x1 + x2 + x3 + x4, absolute = FALSE,
#' threshold = c(0.1, 0.2))
#'
#' # No sorting
#' plot(fitdbwm, addcov = ~ x1 + x2 + x3 + x4,
#' threshold = c(0.1, 0.2), sort = FALSE)
#'
#'
#' # Covariate balancing weighting function without regularization
#' fitcbwm <- dbw(formula_y = formula_y, formula_ps = formula_ps_m,
#' estimand = "ATE", method = "cb",
#' method_y = "wls", data = df, normalize = TRUE,
#' vcov = TRUE, lambda = 0, weights = NULL,
#' clevel = 0.95)
#' plot(fitcbwm, addcov = ~ x1 + x2 + x3 + x4, threshold = c(0.1, 0.2))
#'
#' # Entropy balancing weighting function without regularization
#' fitebwm <- dbw(formula_y = formula_y, formula_ps = formula_ps_m,
#' estimand = "ATE", method = "eb",
#' method_y = "wls", data = df, normalize = TRUE,
#' vcov = TRUE, lambda = 0, weights = NULL,
#' clevel = 0.95)
#' plot(fitebwm, addcov = ~ x1 + x2 + x3 + x4, threshold = c(0.1, 0.2))
#'
#' # Standard logistic regression
#' fitmlem <- dbw(formula_y = formula_y, formula_ps = formula_ps_m,
#' estimand = "ATE", method = "mle",
#' method_y = "wls", data = df, normalize = FALSE,
#' vcov = TRUE, lambda = 0, weights = NULL,
#' clevel = 0.95)
#' plot(fitmlem, addcov = ~ x1 + x2 + x3 + x4, threshold = c(0.1, 0.2))
#'
#' par(oldpar)
#'
#' # Display the covariate balance matrix
#' cb <- plot(fitdbwm, addcov = ~ x1 + x2 + x3 + x4, plot = FALSE)
#' cb
plot.dbw <- function (x, addcov = NULL, standardize = TRUE, plot = TRUE,
absolute = TRUE, threshold = 0, sort = TRUE, ...) {
data <- x$data
formula_ps <- stats::as.formula(x$formula_ps)
model_ps <- stats::model.frame(formula_ps, data = data)
response <- c(stats::model.extract(model_ps, "response"))
x_ps <- as.matrix(stats::model.matrix(formula_ps, model_ps))
formula_y <- stats::as.formula(x$formula_y)
model_y <- stats::model.frame(formula_y, data = data)
z <- as.matrix(stats::model.matrix(formula_y, model_y))
names_z <- setdiff(x = colnames(z), y = colnames(x_ps))
z <- z[, names_z]
cov <- as.matrix(cbind(x_ps, z))
colnames(cov) <- c(colnames(x_ps), names_z)
original_weights <- x$original_weights
est_weights <- x$est_weights
if (is.null(addcov) == 0) {
attr(data, which = "na.action") <- stats::na.pass
formula2 <- stats::as.formula(addcov)
model2 <- stats::model.frame(formula2, data = data)
cov2 <- as.matrix(stats::model.matrix(formula2, model2))
incomplete_cov2 <- which(stats::complete.cases(t(cov2)) == 0)
if (length(incomplete_cov2) > 0) {
cov2 <- cov2[, -incomplete_cov2]
warning("Additional covariates which contain NA values are dropped")
}
colnamescov <- colnames(cov)
colnamescov2 <- colnames(cov2)[-1]
cov <- as.matrix(cbind(cov, cov2[, -1]))
colnames(cov) <- c(colnamescov, colnamescov2)
}
type <- apply(cov, 2, function(cov) bincont(cov))
type <- factor(type, levels = c("continuous", "binary"))
if (x$estimand == "AO") {
diff.adj <- apply(original_weights * cov, 2, sum) / sum(original_weights) -
apply(est_weights * cov * response, 2, sum) /
sum(est_weights * response)
diff.un <- apply(original_weights * cov, 2, sum) / sum(original_weights) -
apply(original_weights * cov * response, 2, sum) /
sum(original_weights * response)
} else { # ATT, ATC, ATE, and ATEcombined
diff.adj <- apply(est_weights * cov * response, 2, sum) /
sum(est_weights * response) -
apply(est_weights * cov * (1 - response), 2, sum) /
sum(est_weights * (1 - response))
diff.un <- apply(original_weights * cov * response, 2, sum) /
sum(original_weights * response) -
apply(original_weights * cov * (1 - response), 2, sum) /
sum(original_weights * (1 - response))
}
covariates <- factor(colnames(cov), levels = colnames(cov))
res <- data.frame(covariates = covariates,
type = type,
diff.adj = diff.adj,
diff.un = diff.un)
if (standardize == TRUE) {
if (x$estimand == "ATT") {
std <-
apply(cov[response == 1, ], 2,
function(cov) weighted_sd(cov,
weights = original_weights[response == 1]))
} else if (x$estimand == "ATC") {
std <-
apply(cov[response == 0, ], 2,
function(cov) weighted_sd(cov,
weights = original_weights[response == 0]))
} else { # ATE, ATEcombined and AO
std <- apply(cov, 2, function(cov) weighted_sd(cov, weights = original_weights))
}
std[type == "binary"] <- 1
std[1] <- 1
res$diff.adj <- res$diff.adj / std
res$diff.un <- res$diff.un / std
}
rownames(res) <- NULL
if (plot == TRUE) {
plot_balance(result = res, standardize = standardize,
absolute = absolute, threshold = threshold, sort = sort)
}
invisible(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.