R/matchit.R

Defines functions matchit

Documented in matchit

matchit <- function(formula, data, method="nearest", distance="logit",
                    distance.options=list(), spatial.options=list(),
                    discard="none", reestimate=FALSE, ...) {

  # ---------------------------------------------------------------------------
  # spatial input format checks
  spatial.options$is.spatial <- check.is.spatial(data)

  if (spatial.options$is.spatial) {

    if ('ignore.spatial' %in% names(spatial.options) &&
        spatial.options$ignore.spatial == TRUE) {

      data <- data@data
      warning("Spatial attributes and options are being ignored, as
              ignore.spatial is set to TRUE.")

    }

  } else {

    # Not a spatial dataframe - check to make sure the user doesn't think
    # they're using spatial functions.
    if (('threshold' %in% names(spatial.options)) ||
        ('decay.model' %in% names(spatial.options)) ||
        ('ignore.spatial' %in% names(spatial.options) &&
        spatial.options$ignore.spatial == FALSE )) {

      warning("To use spatial options you must provide a spatial dataframe
              object. Spatial options are ignored.")
    }

  }

  # validate /  set spatial options
  if (spatial.options$is.spatial == TRUE) {

    spatial.data <- data
    data <- spatial.data@data

    if (!('decay.model' %in% names(spatial.options))) {
      # use morans as default decay model
      spatial.options$decay.model <- "spherical"
      spatial.options$decay.function <-
        paste("distance.decay.", spatial.options$decay.model, sep="")

      warning("No distance decay model provided. Using default spherical
              method.")

    } else {
      # verfy decay model provided is valid
      fn0 <- paste("distance.decay.", spatial.options$decay.model, sep="")
      if (!exists(fn0)) {
        stop(spatial.options$decay.model,
             "distance decay model not supported.")
      }
      spatial.options$decay.function <- fn0
    }

    if (!('threshold' %in% names(spatial.options))) {

      warning("No spatial threshold provided. Will use x-intercept of PSM score
            correlogram with selected distance decay model.")
      spatial.options$threshold <- "auto"

    } else {
      # Check that the user-supplied threshold make sense in terms of units
      # of measurement.
      print("Custom threshold values should be verfied by user beforehand.
            Erroneous values may cause errors or bad results.")
    }

  }
  # ---------------------------------------------------------------------------


  # data input
  mcall <- match.call()
  if (is.null(data)) {
    stop("Dataframe must be specified", call.=FALSE)
  }
  if (!is.data.frame(data)) {
    stop("Data must be a dataframe", call.=FALSE)
  }
  if (sum(is.na(data)) > 0) {
    stop("Missing values exist in the data")
  }

  # list-wise deletion
  # allvars <- all.vars(mcall)
  # varsindata <- colnames(data)[colnames(data) %in% all.vars(mcall)]
  # data <- na.omit(subset(data, select = varsindata))

  # 7/13/06: Convert character variables to factors as necessary
  ischar <- rep(0, dim(data)[2])
  for (i in 1:dim(data)[2]) {
    if (is.character(data[, i])) {
      data[, i] <- as.factor(data[, i])
    }
  }

  # check inputs
  if (!is.numeric(distance)) {
    fn1 <- paste("distance2", distance, sep="")
    if (!exists(fn1)) {
      stop(distance, "not supported.")
    }
  }
  if (is.numeric(distance)) {
    # do not think this exists yet
    # does it need to be built or the related options supported somehow?
    fn1 <- "distance2user"
    stop(distance, "not supported.")
  }

  fn2 <- paste("matchit2", method, sep="")
  if (!exists(fn2)) {
    stop(method, "not supported.")
  }

  # obtain T and X
  tryerror <- try(model.frame(formula), TRUE)
  # get formula terms
  if (distance %in% c("GAMlogit", "GAMprobit", "GAMcloglog", "GAMlog",
                      "GAMcauchit")) {
    library(mgcv)
    tt <- terms(mgcv::interpret.gam(formula)$fake.formula)
  } else {
    tt <- terms(formula)
  }

  attr(tt, "intercept") <- 0
  mf <- model.frame(tt, data)
  treat <- model.response(mf)
  X <- model.matrix(tt, data=mf)

  # ===========================================================================

  # estimate the distance measure
  if (method == "exact") {
    distance <- out1 <- discarded <- NULL
    if (!is.null(distance)) {
      warning("distance is set to `NULL' when exact matching is used.")
    }

  } else if (is.numeric(distance)){
    out1 <- NULL
    discarded <- discard(treat, distance, discard, X)

  } else {

    # fill in distance formula/data if needed
    if (is.null(distance.options$formula)) {
      distance.options$formula <- formula
    }
    if (is.null(distance.options$data)) {
      distance.options$data <- data
    }

    # run distance function
    out1 <- do.call(fn1, distance.options)

    # discard / reestimate if needed
    discarded <- discard(treat, out1$distance, discard, X)
    if (reestimate) {
      distance.options$data <- data[!discarded, ]
      distance.options$weights <- distance.options$weights[!discarded]
      tmp <- out1
      out1 <- do.call(fn1, distance.options)
      tmp$distance[!discarded] <- out1$distance
      out1$distance <- tmp$distance
    }

    distance <- out1$distance
  }

  # ===========================================================================

  # ---------------------------------------------------------------------------
  # If there is a spatial object, then distance is a list that contains
  # the spatial data frame, decay model, threshold, and PSM distances.
  # Otherwise, it's only a vector.
  if (spatial.options$is.spatial == TRUE) {

    if (spatial.options$threshold == "auto") {
      print("Calculating distance decay threshold based on PSM correlogram...")

      correlogram_data <- correlog(x=coordinates(spatial.data)[, 1],
                                   y=coordinates(spatial.data)[, 2],
                                   z=distance, increment=5, latlon=TRUE,
                                   na.rm=TRUE, resamp=50, quiet=TRUE)

      spatial.options$threshold <- as.numeric(correlogram_data$x.intercept)

      # print(correlogram_data)
      print("Plotting correlogram and x-int (spatial thresh) for PSM dist")
      plot.correlog(correlogram_data)
      print(spatial.options$threshold)

    }

    combined.options <- list(distance, spatial.data,
                            spatial.options$decay.function,
                            spatial.options$threshold)
  } else {
    combined.options <- distance
  }
  # ---------------------------------------------------------------------------

  # full mahalanobis matching
  if (fn1 == "distance2mahalanobis") {
    is.full.mahalanobis <- TRUE
  } else {
    is.full.mahalanobis <- FALSE
  }

  # run matching function
  out2 <- do.call(fn2, list(treat, X, data,
                            distance = combined.options, discarded,
                            is.full.mahalanobis = is.full.mahalanobis, ...))

  # no distance for full mahalanobis matching
  if (fn1 == "distance2mahalanobis") {
    distance[1:length(distance)] <- NA
    class(out2) <- c("matchit.mahalanobis", "matchit")
  }

  # putting all the results together
  out2$call <- mcall
  out2$model <- out1$model
  out2$formula <- formula
  out2$treat <- treat
  if (is.null(out2$X)) {
    out2$X <- X
  }
  out2$distance <- distance
  out2$discarded <- discarded

  # basic summary
  nn <- matrix(0, ncol=2, nrow=4)
  nn[1, ] <- c(sum(out2$treat == 0), sum(out2$treat == 1))
  nn[2, ] <- c(sum(out2$treat == 0 & out2$weights > 0),
               sum(out2$treat == 1 & out2$weights > 0))
  nn[3, ] <- c(sum(out2$treat == 0 & out2$weights == 0 & out2$discarded == 0),
               sum(out2$treat == 1 & out2$weights == 0 & out2$discarded == 0))
  nn[4, ] <- c(sum(out2$treat == 0 & out2$weights == 0 & out2$discarded == 1),
               sum(out2$treat == 1 & out2$weights == 0 & out2$discarded == 1))
  dimnames(nn) <- list(c("All","Matched","Unmatched","Discarded"),
                       c("Control","Treated"))
  out2$nn <- nn
  return(out2)

}
DanRunfola/SimTests documentation built on May 6, 2019, 1:23 p.m.