R/barplot.R

Defines functions iv.diagnosis.plot iv.diagnosis

Documented in iv.diagnosis iv.diagnosis.plot

#' Diagnostics of instrumental variable analysis
#'
#' @param Y A numeric vector of outcomes.
#' @param D A vector of endogenous variables.
#' @param Z A vector of instruments.
#' @param X A vector, matrix or data frame of (exogenous) covariates.
#'
#' @return a list or data frame
#' \describe{
#' \item{x.mean1}{Mean of X under Z = 1 (reported if Z is binary)}
#' \item{x.mean0}{Mean of X under Z = 0 (reported if Z is binary)}
#' \item{coef}{OLS coefficient of X ~ Z (reported if Z is not binary)}
#' \item{se}{Standard error of OLS coefficient (reported if Z is not binary)}
#' \item{p.val}{p-value of the independence of Z and X (Fisher's test if both are binary, logistic regression if Z is binary, linear regression if Z is continuous)}
#' \item{x.sd}{sample standard deviations of X}
#' \item{stand.diff}{Standardized difference (reported if Z is binary)}
#' \item{bias.ratio}{Bias ratio}
#' \item{bias.amplify}{Amplification of bias ratio}
#' \item{bias.ols}{Bias of OLS}
#' \item{bias.2sls}{Bias of two stage least squares)}
#' }
#'
#' @author Qingyuan Zhao
#' @references
#' \itemize{
#' \item{Baiocchi, M., Cheng, J., & Small, D. S. (2014). Instrumental variable methods for causal inference. Statistics in Medicine, 33(13), 2297-2340.}
#' \item{Jackson, J. W., & Swanson, S. A. (2015). Toward a clearer portrayal of confounding bias in instrumental variable applications. Epidemiology, 26(4), 498.}
#' \item{Zhao, Q. & Small, D. S. Graphical diagnosis of confounding bias in instrumental variable analysis. Epidemiology, forthcoming.}
#'}
#'
#' @importFrom stats fisher.test
#' @importFrom stats glm
#' @importFrom stats sd
#'
#' @export
#'
#' @examples
#' n <- 10000
#' Z <- rbinom(n, 1, 0.5)
#' X <- data.frame(matrix(rbinom(n * 5, 1, 0.5), n))
#' D <- rbinom(n, 1, plogis(Z + X[, 1] + X[, 2] + X[, 3]))
#' Y <- D + X[, 1] + X[, 2] + rnorm(n)
#' output <- iv.diagnosis(Y, D, Z, X)
#' print(output)
#' iv.diagnosis.plot(output)
#'
iv.diagnosis <- function(Y, D, Z, X) {

    if (length(dim(X)) == 2) { ## if x is a matrix/data.frame
        return(t(apply(X, 2, function(X) unlist(iv.diagnosis(Y, D, Z, X)))))
    }

    if (length(unique(X)) == 2 & length(unique(Z)) == 2) {
        p.val <- fisher.test(X, Z)$p.value
        stand.diff <- (mean(X[Z==1])-mean(X[Z==0]))/(sqrt((var(X[Z==1])+var(X[Z==0]))/2));
    } else if (length(unique(X)) == 2) {
        p.val <- summary(glm(X ~ Z, family = "binomial"))$coefficients[2, 4]
    } else {
        p.val <- summary(lm(X ~ Z))$coefficients[2, 4]
    }
    prev.diff.ratio <- (lm(X ~ Z)$coef[2])/(lm(X ~ D)$coef[2]);
    bias.ratio <- prev.diff.ratio / (lm(D ~ Z)$coef[2])
    bias.amplify <- lm(Y ~ X + D)$coef[2]
    bias.ols <- bias.amplify * (lm(X ~ D)$coef[2])
    bias.2sls <- bias.amplify * (lm(X ~ Z)$coef[2]) / (lm(D ~ Z)$coef[2])
    names(bias.ratio) <- NULL
    names(bias.amplify) <- NULL
    names(bias.ols) <- NULL
    names(bias.2sls) <- NULL
    ## stand.diff.high.treatment <- (mean(x[d==1])-mean(x[d==0]))/sqrt((var(x[d==1])+var(x[d==0]))/2);
    if (length(unique(Z)) == 2) {
        output <- list(x.mean1 = mean(X[Z == 1]),
                       x.mean0 = mean(X[Z == 0]),
                       p.val = p.val,
                       stand.diff = stand.diff,
                       x.sd = sd(X),
                       bias.ratio = bias.ratio,
                       bias.amplify = bias.amplify,
                       bias.ols = bias.ols,
                       bias.2sls = bias.2sls)
    } else {
        output <- list(coef = lm(X ~ Z)$coef[2],
                       se = summary(lm(X ~ Z))$coefficients[2, 2],
                       p.val = p.val,
                       x.sd = sd(X),
                       bias.ratio = bias.ratio,
                       bias.amplify = bias.amplify,
                       bias.ols = bias.ols,
                       bias.2sls = bias.2sls)
    }
    return(output)
}

#' @describeIn iv.diagnosis IV diagnostic plot
#'
#' @param output Output from \code{iv.diagnosis}.
#' @param order.by Order the bars by bias amplifying factor (variance of the outcome explained by each covariate), bias of the OLS estimate, or bias of the 2SLS estimate.
#' @param bias.ratio Add bias ratios (text) to the plot?
#' @param base_size size of the axis labels
#' @param text_size size of the text (bias ratios)
#'
#' @export
#' @import reshape2
#' @import ggplot2
#'
iv.diagnosis.plot<- function(output, order.by = c("bias.amplify", "ols.bias", "2sls.bias"), bias.ratio = TRUE,  base_size = 15, text_size = 5) {

    order.by <- match.arg(order.by, c("bias.amplify", "ols.bias", "2sls.bias"))

    output <- data.frame(output)
    o <- switch(order.by,
                bias.amplify = order(abs(output$bias.amplify) * output$x.sd),
                bias.ols = order(abs(output$bias.ols)),
                bias.2sls = order(abs(output$bias.2sls)))
    output$var <- factor(rownames(output), rownames(output)[o])
    rownames(output) <- NULL
    df <- data.frame(output[, c("var", "bias.ols", "bias.2sls")])
    df <- melt(df, id.vars = "var")
    colnames(df) <- c("var", "method", "bias")
    levels(df$method) <- c("ols", "2sls")
    df$method <- factor(df$method, levels = c("2sls", "ols"))

    p <- ggplot() + geom_bar(aes_string(x = "var", y = "bias", fill = "method", linetype = "method"), stat = "identity", color = "black", position=position_dodge(), data = df) + coord_flip() + theme_bw(base_size) + ylim(range(df$bias) * 1.2) + xlab("") + scale_fill_discrete(breaks = c("ols","2sls")) + scale_linetype_discrete(breaks = c("ols","2sls"))
    if (bias.ratio) {
        p <- p + geom_text(mapping = aes(y = max(df$bias) * 1.1, x = var, label = round(bias.ratio, 2)), data = data.frame(output), size = text_size)
    }
    p
}

Try the ivmodel package in your browser

Any scripts or data that you put into this service are public.

ivmodel documentation built on Nov. 17, 2017, 4:09 a.m.