R/ate.R

Defines functions summary.ate.targeted print.summary.ate.targeted ate

Documented in ate

#' Augmented Inverse Probability Weighting estimator for the Average (Causal)
#' Treatment Effect. All nuisance models are here parametric (glm). For a more
#' general approach see the \code{cate} implementation. In this implementation
#' the standard errors are correct even when the nuisance models are
#' mis-specified (the influence curve is calculated including the term coming
#' from the parametric nuisance models). The estimate is consistent if either
#' the propensity model or the outcome model / Q-model is correctly specified.
#'
#' @title AIPW (doubly-robust) estimator for Average Treatment Effect
#' @param formula formula (see details below)
#' @param data data.frame
#' @param weights optional frequency weights
#' @param offset optional offset (character or vector). can also be specified in
#'   the formula.
#' @param family Exponential family argument for outcome model
#' @param nuisance outcome regression formula (Q-model)
#' @param propensity propensity model formula
#' @param all when TRUE all standard errors are calculated (default TRUE when
#'   exposure only has two levels)
#' @param labels optional treatment labels
#' @param adjust.nuisance adjust for uncertainty due to estimation of outcome
#'   regression model parameters
#' @param adjust.propensity adjust for uncertainty due to estimation of
#'   propensity regression model parameters
#' @param ... additional arguments to lower level functions
#' @return An object of class '\code{ate.targeted}' is returned. See
#'   \code{\link{targeted-class}} for more details about this class and its
#'   generic functions.
#' @details The formula may either be specified as: response ~ treatment |
#'   nuisance-formula | propensity-formula
#'
#' For example: \code{ate(y~a | x+z+a | x*z, data=...)}
#'
#' Alternatively, as a list: \code{ate(list(y~a, ~x+z, ~x*z), data=...)}
#'
#' Or using the nuisance (and propensity argument):
#' \code{ate(y~a, nuisance=~x+z, ...)}
#' @export
#' @seealso cate
#' @author Klaus K. Holst
#' @aliases ate
#' @examples
#' m <- lava::lvm(y ~ a+x, a~x) |>
#'      lava::distribution(~y, value = lava::binomial.lvm()) |>
#'      lava::ordinal(K=4, ~a) |>
#'      transform(~a, value = factor)
#' d <- lava::sim(m, 1e3, seed=1)
#' # (a <- ate(y~a|a*x|x, data=d))
#' (a <- ate(y~a, nuisance=~a*x, propensity=~x, data = d))
#'
#' # Comparison with randomized experiment
#' m0 <- lava::cancel(m, a~x)
#' lm(y~a-1, lava::sim(m0,2e4))
#'
#' # Choosing a different contrast for the association measures
#' summary(a, contrast=c(2,4))
ate <- function(formula, # nolint
                data=parent.frame(), weights, offset,
                family=stats::gaussian(identity),
                nuisance=NULL,
                propensity=nuisance,
                all,
                labels=NULL,
                adjust.nuisance = TRUE,
                adjust.propensity = TRUE,
                ...) {
  cl <- match.call()
  if (!is.null(nuisance) && inherits(nuisance, "formula")) {
    exposure <- attr(lava::getoutcome(formula), "x")
    formula <- list(formula, nuisance, propensity)
    if (!(exposure%in%all.vars(nuisance)) && length(all.vars(nuisance))>0) {
      nuisance <- update(nuisance, as.formula(paste0("~ . +", exposure)))
      formula[[2]] <- nuisance
    }
  }
  if (is.character(family))
    family <- get(family, mode = "function", envir = parent.frame())
  if (is.function(family))
    family <- family()
  if (is.null(family$family)) {
    print(family)
    stop("'family' not recognized")
  }
  if (is.list(formula)) {
    yf <- lava::getoutcome(formula[[1]])
    exposure <- gsub("`", "", attr(yf, "x"))
    xf <- formula
    xf[[1]] <- update(xf[[1]], .~.-1)
  } else {
    if (inherits(formula, "formula")) {
      yf <- lava::getoutcome(formula, sep="|")
      xf <- attr(yf, "x")
      exposure <- attr(lava::getoutcome(xf[[1]]), "x")
      xf[[1]] <- update(as.formula(paste0(yf, deparse(xf[[1]]))), .~.-1)
    }
  }
  if (is.list(formula) || inherits(formula, "formula")) {
    xx <- lapply(xf, function(x) model.matrix(x, data=data))
    yx <- model.frame(xf[[1]], data=data)
    a <- yx[, exposure]
    y <- yx[, yf[1]]
    x1 <- xx[[2]]
    x2 <- xx[[3]]
    if (base::missing(offset))
      offset <- model.offset(model.frame(xf[[2]], data=data))
    nn <- list(yf[1], exposure, colnames(x1), colnames(x2))
    rm(yx, xx, yf)
  } else {
    stop("Expected a formula") # or a matrix (response,exposure)")
  }
  if (base::missing(weights) || length(weights) == 0) {
    weights <- rep(1, length(y))
  }
  if (inherits(weights, "formula")) weights <- all.vars(weights)
  if (is.character(weights)) weights <- as.vector(data[, weights])

  if (base::missing(offset) || length(offset)==0) offset <- rep(0, length(y))
  if (inherits(offset, "formula")) offset <- all.vars(offset)
  if (is.character(offset)) offset <- as.vector(data[, offset])
  l1 <- glm(y ~ -1+x1, weights=weights, offset=offset, family=family)
  beta <- coef(l1)
  names(beta) <- gsub("^x1", "", names(beta))
  iid.beta <- IC(l1)/length(y)
  treatments <- if (is.factor(a)) levels(a) else sort(unique(a))
  if (length(treatments)>20) stop("Unexpected large amount of treatments")
  if (base::missing(all)) all <- length(treatments)==2
  if (length(treatments)>2) {
    if (!is.factor(a)) {
      warning(
        "`", exposure,
        "` should probably be converted into a factor. ",
        "An additive model is now assumed in the outcome regression model."
      )
    }
  }
  est <- c()
  iids <- matrix(nrow=length(y), ncol=length(treatments))
  coefs <- numeric(length(treatments))
  count <- 0
  data0 <- data
  bprop <- NULL
  Vprop <- NULL
  nlabels <- labels
  for (trt in treatments) {
    count <- count+1
    a0 <- (a==trt)
    ## For simplicity we here fit a logistic regression for each treatment.
    ## TODO: we do not need to recalculate logistic regression for the last
    ## treatment.
    l2 <- glm.fit(y=a0, x=x2, weights=weights, family=binomial("logit"))
    gamma <- l2$coef
    data0[, exposure] <- factor(trt, levels=treatments)
    x1 <- model.matrix(xf[[2]], data=data0)
    val <- ace_est(
      y = cbind(y), a = cbind(a0), x1 = cbind(x1), x2 = cbind(x2),
      theta = c(beta, gamma), weights = weights,
      offset = offset, link = family$link
    )
    alpha.index <- 1
    beta.index <- seq_along(beta) + length(alpha.index)
    gamma.index <- seq_along(gamma) +
      length(alpha.index) + length(beta.index)
    U0 <- val$u
    DU <- t(val$du)
    iid.gamma <- fast_iid(a0, l2$fitted, x2, weights)/length(a0)
    if (count==1) {
      gidx <- seq_along(gamma)
      bprop <- numeric(length(gamma)*(length(treatments)-1))
      names(bprop) <- rep(names(gamma), length(treatments)-1)
      Vprop <- matrix(NA, ncol=length(bprop), nrow=length(bprop))
    }
    if (count < length(treatments) && all) {
      gpos <- gidx+length(gamma)*(count-1)
      bprop[gpos] <- gamma
      Vprop[gpos, gpos] <- crossprod(iid.gamma)
    }

    iid.alpha <- U0
    if (adjust.nuisance) {
      iid.alpha <- iid.alpha + iid.beta %*% t(DU[, beta.index, drop=FALSE])
    }
    if (adjust.propensity) {
      iid.alpha <- iid.alpha + iid.gamma %*% t(DU[, gamma.index, drop=FALSE])
    }
    iid.alpha <- iid.alpha %*% t(-Inverse(DU[, alpha.index, drop=FALSE]))
    ## iid.alpha <- (U0 + iid.beta %*% t(DU[, beta.index, drop=FALSE]) +
    ##               iid.gamma %*% t(DU[, gamma.index, drop=FALSE])) %*%
    ##   t(-Inverse(DU[, alpha.index, drop=FALSE]))

    iids[, count] <- iid.alpha
    coefs[count] <- val$alpha
    if (is.null(labels)) nlabels <- c(nlabels, paste0(exposure, "=", trt))
  }
  Vout  <- NULL
  if (all) Vout <- crossprod(iid.beta)
  outreg <- lava::estimate(coef=beta, vcov=Vout)
  propmod <- lava::estimate(coef=bprop, vcov=Vprop)
  V <- crossprod(iids)
  est <- lava::estimate(coef=coefs, vcov=V, labels=nlabels)
  est$IC <- iids * NROW(iids)
  rownames(est$IC) <- rownames(data)
  obj <- structure(list(estimate=est,
                 outcome.reg=outreg,
                 propensity.model=propmod,
                 names=unlist(nn)[1:2],
                 formula=xf,
                 call = cl,
                 npar=c(length(treatments), ncol(x1), ncol(x2)),
                 nobs=length(y),
                 opt=NULL,
                 all=all,
                 family=family),
            class=c("ate.targeted", "targeted"))
  return(obj)
}

#' @export
print.summary.ate.targeted <- function(x, ...) {
  nam <- x$names
  cat("\nAugmented Inverse Probability Weighting estimator\n")
  outreg <- x$family$family
  cat("  Response ", nam[[1]], " (Outcome model: ", outreg, "):\n", sep="")
  cat("\t", paste(nam[[1]], x$formula[[2]]), "\n")
  cat("  Exposure ", nam[[2]],
      " (Propensity model: logistic regression):\n",
      sep = ""
      )
  cat("\t", paste(nam[[2]], x$formula[[3]]), "\n")
  cat("\n")
  if (x$all) {
    sep_which <- x$npar[[1]]
    sep_labels <- NULL
    if (x$npar[[2]]>0) {
      sep_labels <- nam[[4]]
      if (x$npar[[3]]>0)
        sep_which <- c(sep_which, x$npar[[1]]+x$npar[[2]])
    }
    if (x$npar[[3]]>0)
      sep_labels <- c(sep_labels, nam[[5]])
    print(x$estimate, unique.names=FALSE,
          sep.which=sep_which,
          sep.labels=sep_labels, ...)
  } else {
    print(x$estimate, unique.names=FALSE, ...)
  }
  if (length(x$contrast)>1) {
    cc <- rownames(x$estimate$coefmat) # nolint
    with(x, cat("\nAverage Treatment Effect (constrast: '",
                cc[contrast[1]], "' - '",
                cc[contrast[2]], "'):\n\n",
                sep = ""
                ))
    if (!is.null(x$asso)) print(x$asso)
  }
  cat("\n")
}


#' @export
summary.ate.targeted <- function(object, contrast=c(2:1), ...) {
  nn <- lapply(object[c("estimate", "outcome.reg", "propensity.model")],
               function(x) length(coef(x)))
  if (object$all) {
    cc <- rbind(object$estimate$coefmat,
                object$outcome.reg$coefmat,
                object$propensity.model$coefmat)
  } else {
    cc <- object$estimate$coefmat
  }
  cc <- lava::estimate(
                coef = cc[, 1],
                vcov = diag(cc[, 2]^2, ncol = nrow(cc)), labels = rownames(cc)
              )
  if (length(contrast)>2)
    warning("Only the first two elements of 'contrast' are used")
  if (object$npar[1]<2) contrast <- 1
  asso <- NULL
  if (length(contrast) >= 2) {
    if (object$family$family == "binomial") {
      asso <- estimate(object$estimate,
                       function(x) {
                         return(c(
                           x[contrast[1]] / x[contrast[2]],
                           lava::OR(x[contrast]),
                           x[contrast[1]] - x[contrast[2]]
                         ))
                       },
                       labels = c("RR", "OR", "RD")
                       )
    } else {
      asso <- estimate(object$estimate,
                       function(x) x[contrast[1]] - x[contrast[2]],
                       labels = c("ATE")
                       )
    }
  }
  obj <- structure(list(estimate=cc, npar=nn, type=object$type, asso=asso,
                 family=object$family,
                 names=c(object$names, "",
                         "Outcome model:", "Propensity model:"),
                 all=object$all,
                 formula=gsub("~", "~ ",
                              unlist(lapply(object$formula, deparse))),
                 contrast=contrast),
            class="summary.ate.targeted")
  return(obj)
}

Try the targeted package in your browser

Any scripts or data that you put into this service are public.

targeted documentation built on Jan. 12, 2026, 9:08 a.m.