R/combinedListDirect.R

Defines functions print.summary.comblist summary.comblist print.comblist combinedListDirect

Documented in combinedListDirect

#' Combined List Estimator
#' 
#' This function implements the combined list estimator described in Aronow,
#' Coppock, Crawford, and Green (2015): Combining List Experiment and Direct
#' Question Estimates of Sensitive Behavior Prevalence
#' 
#' @param formula an object of class "formula" (or one that can be coerced to
#' that class): a symbolic description of the model to be fitted. Should be of
#' the form Y ~ T + X1 + X2, where Y is the list response, T is the treatment
#' indicator, and X1, X2, etc are pretreatment covariates. It is recommended
#' that T be a numeric variable whose values are 0 for subjects in control and
#' 1 for subjects in treatment.
#' @param data an optional data frame, list or environment (or object coercible
#' by as.data.frame to a data frame) containing the variables in the model. If
#' not found in data, the variables are taken from environment(formula),
#' typically the environment from which combined.list is called. It is good
#' practice to include all variables used in the estimation (list response,
#' treatment indicator, direct response, and optional pre-treatment covariates)
#' in a dataframe rather than calling data from the global environent.
#' @param treat a character string giving the name of the treatment variable.
#' Defaults to "treat".
#' @param direct a character string giving the name of the direct response
#' variable. Defaults to "direct". The direct response variable itself must
#' only contain the values 0 and 1, where 1 refers to subjects who answered
#' "Yes" to the direct question.
#' @return a list containing conventional, direct, and combined prevalence
#' estimates with associated standard errors as well as the results of two
#' placebo tests.
#' @examples
#' 
#' # Load data from Aronow, Coppock, Crawford, and Green (2015)
#' data("combinedListExps")
#' 
#' # complete case analysis
#' combinedListExps <- na.omit(combinedListExps)
#' # Conduct estimation without covariate adjustment
#' out.1 <- combinedListDirect(list1N ~ list1treat, 
#'                             data = subset(combinedListExps, directsfirst==1), 
#'                             treat = "list1treat", direct = "direct1")
#' summary(out.1)
#' 
#' # Conduct estimation with covariate adjustment
#' out.2 <- combinedListDirect(list1N ~ list1treat + gender + 
#'                             ideology + education + race, 
#'                             data = subset(combinedListExps, directsfirst==1), 
#'                             treat = "list1treat", direct = "direct1")
#' summary(out.2)
#' 
#' @export combinedListDirect
combinedListDirect <- function(formula, data = parent.frame(), treat="treat", direct="direct"){

  comblist.call <- match.call()

  # set up data frame, with support for standard and modified responses
  mf <- match.call(expand.dots = FALSE)

  # make all other call elements null in mf <- NULL in next line
  # you can remove all the options I have, but make sure to make NULL any option that remains like below
  mf$treat <- mf$direct <- NULL
  mf[[1]] <- as.name("model.frame")
  mf$na.action <- 'na.pass'
  mf <- eval.parent(mf)

  # define design, response data frames
  x.all <- model.matrix.default(attr(mf, "terms"), mf)
  #x.all <- x.all[,-1]
  x.all <- x.all[,setdiff(colnames(x.all), treat)]
  x.all <- as.matrix(x.all)
  y.all <- model.response(mf)
  t.all <- data[,paste(treat)]
  d.all <- data[,paste(direct)]

  # list-wise missing deletion
  na.x <- apply(is.na(x.all), 1, sum)
  na.y <- is.na(y.all)
  na.t <- is.na(t.all)
  na.d <- is.na(d.all)

  keepers <- (na.x==0 & na.y==0 & na.t==0 & na.d==0)

  x.nona <- x.all[keepers, , drop=FALSE]
  y.nona <- y.all[keepers]
  t.nona <- t.all[keepers]
  d.nona <- d.all[keepers]

  if(length(unique(t.nona))!=2){stop("The treatment variable must only contain two unique values.")}
  if(!all(d.nona %in% c(0,1))){stop("The direct response variable must be a numeric variable whose values are 0 or 1.")}

  if(inherits(t.nona, "factor")) {

    levels(t.nona) <- tolower(levels(t.nona))

    if (length(which(levels(t.nona) == "control")) == 1) {
      t.nona <- relevel(t.nona, ref = "control")
    } else {
      warning("Note: using the first level of the treatment variable as the control condition, but it is not labeled 'control'.")
    }

    t.nona <- as.numeric(t.nona) - 1

  } else {
    if(!all(t.nona %in% c(0,1))){stop("The treatment variable must be a numeric variable whose values are 0 or 1.")}
  }

  ## so that the output data has the same dimension as x.all and y.all
  data <- data[keepers, , drop = FALSE]

  # Direct Estimate
  d.bar <- mean(d.nona)
  direct.var.est <- var(d.nona)/length(d.nona)

  # Conventional List Estimate
  conv.fit <- lm(y.nona ~ t.nona + x.nona)
  conv.est <- coef(conv.fit)[2]
  conv.resids <- residuals(conv.fit)
  conv.var.t.1 <- var(conv.resids[t.nona==1])
  conv.var.t.0 <- var(conv.resids[t.nona==0])
  conv.var.est <- conv.var.t.1/sum(t.nona) + conv.var.t.0/sum(1-t.nona)

  # Combined List Estimate
  comb.fit <- lm(y.nona ~ t.nona + x.nona, subset=d.nona==0)
  intermediate.est <- coef(comb.fit)[2]
  mu.hat <- d.bar + (1-d.bar)*(intermediate.est)

  comb.resids <- residuals(comb.fit)

  comb.var.t.1 <- var(comb.resids[t.nona[d.nona==0]==1])
  comb.var.t.0 <- var(comb.resids[t.nona[d.nona==0]==0])

  gamma.hat <- mean(t.nona)
  comb.var.est <- ((((1-mu.hat)^2)/(1-d.bar))*d.bar +
                (1-d.bar)*(comb.var.t.1/gamma.hat + comb.var.t.0/(1-gamma.hat)))/length(t.nona)

  # Placebo Test I

  placebo.I.fit <- lm(y.nona ~ t.nona + x.nona, subset=d.nona==1)
  placebo.I.est <- as.numeric(coef(placebo.I.fit)[2])
  placebo.I.resids <- residuals(placebo.I.fit)
  placebo.I.var <- (var(placebo.I.resids[t.nona[d.nona==1]==1]))/sum(t.nona[d.nona==1]==1) +
    (var(placebo.I.resids[t.nona[d.nona==1]==0]))/sum(t.nona[d.nona==1]==0)
  placebo.I.se <- as.numeric(sqrt(placebo.I.var))
  placebo.I.p <- as.numeric(2*pnorm(- abs(placebo.I.est -1)/ placebo.I.se))
  placebo.I.n <- as.numeric(sum(d.nona==1))

  placebo.I.list <- list(estimate = placebo.I.est, se = placebo.I.se,
                         p = placebo.I.p, n = placebo.I.n)

  # Placebo Test II

  placebo.II.fit <- lm(d.nona ~ t.nona + x.nona)
  placebo.II.est <- as.numeric(coef(placebo.II.fit)[2])
  placebo.II.resids <- residuals(placebo.II.fit)
  placebo.II.var <- var(placebo.II.resids[t.nona==1])/(sum(t.nona==1)) +
                    var(placebo.II.resids[t.nona==0])/(sum(t.nona==0))
  placebo.II.se <- as.numeric(sqrt(placebo.II.var))
  placebo.II.p <- as.numeric(2*pnorm(- abs(placebo.II.est)/ placebo.II.se))
  placebo.II.n <- as.numeric(length(t.nona))

  placebo.II.list <- list(estimate = placebo.II.est, se = placebo.II.se,
                         p = placebo.II.p, n = placebo.II.n)


  return.object <- list(comb.est=mu.hat, comb.se=sqrt(comb.var.est),
                        direct.est=d.bar, direct.se=sqrt(direct.var.est),
                        conv.est=conv.est, conv.se=sqrt(conv.var.est),
                        placebo.I = placebo.I.list,
                        placebo.II = placebo.II.list,
                        call = match.call(),
                        data = data, x = x.nona, y = y.nona, treat = t.nona, direct=d.nona)

  class(return.object) <- "comblist"

  return(return.object)

}

print.comblist <- function(x, ...){

  cat("\n Combined List Estimates \n\nCall: ")

  dput(x$call)

  cat("\n Prevalence estimate\n")

  tb <- as.matrix(c(x$comb.est, x$comb.se))
  rownames(tb) <- c("Estimate", "Standard Error")
  colnames(tb) <- "Prevalence"
  print(tb)

}

summary.comblist <- function(object, ...) {
  structure(object, class = c("summary.comblist", class(object)))
}

print.summary.comblist <- function(x, ...){

  cat("\n Combined List Estimates \n\nCall: ")

  dput(x$call)

  cat("\n Prevalence estimates\n")

  estimates <- c(x$comb.est, x$direct.est, x$conv.est)
  ses <- c(x$comb.se, x$direct.se, x$conv.se)

  tb <- rbind(estimates, ses)
  colnames(tb) <- c("Combined", "Direct", "Conventional")
  rownames(tb) <- c("Estimate", "Standard Error")

  print(tb)

  cat("\n Placebo Test I
       Beta is the conventional list experiment estimate among those who answer 'Yes' to the direct question.
       Ho: beta = 1
       Ha: beta != 1
      \n ")
  pItb <- t(as.matrix(unlist(x$placebo.I)))
  colnames(pItb) <- c("Estimate", "SE", "p", "n")
  rownames(pItb) <- "beta"
  print(pItb)


  cat("\n Placebo Test II
       Delta is the average effect of the receiving the treatment list on the direct question response.
       Ho: delta = 0
       Ha: delta != 0
      \n ")
  pIItb <- t(as.matrix(unlist(x$placebo.II)))
  colnames(pIItb) <- c("Estimate", "SE", "p", "n")
  rownames(pIItb) <- "delta"
  print(pIItb)
}

#' Five List Experiments with Direct Questions
#'
#' A dataset containing the five list experiments in 
#' Aronow, Coppock, Crawford, and Green (2015)
#'
#'
#' @docType data
#' @keywords datasets
#' @name combinedListExps
#' @usage data(combinedListExps)
#' @format A data frame with 1023 observations and 23 variables
NULL

Try the list package in your browser

Any scripts or data that you put into this service are public.

list documentation built on June 27, 2022, 1:06 a.m.