matchit <- function(formula, data, method="nearest", distance="logit",
distance.options=list(), spatial.options=list(),
discard="none", reestimate=FALSE, ...) {
# ---------------------------------------------------------------------------
# spatial input format checks
spatial.options$is.spatial <- check.is.spatial(data)
if (spatial.options$is.spatial) {
if ('ignore.spatial' %in% names(spatial.options) &&
spatial.options$ignore.spatial == TRUE) {
data <- data@data
warning("Spatial attributes and options are being ignored, as
ignore.spatial is set to TRUE.")
}
} else {
# Not a spatial dataframe - check to make sure the user doesn't think
# they're using spatial functions.
if (('threshold' %in% names(spatial.options)) ||
('decay.model' %in% names(spatial.options)) ||
('ignore.spatial' %in% names(spatial.options) &&
spatial.options$ignore.spatial == FALSE )) {
warning("To use spatial options you must provide a spatial dataframe
object. Spatial options are ignored.")
}
}
# validate / set spatial options
if (spatial.options$is.spatial == TRUE) {
spatial.data <- data
data <- spatial.data@data
if (!('decay.model' %in% names(spatial.options))) {
# use morans as default decay model
spatial.options$decay.model <- "spherical"
spatial.options$decay.function <-
paste("distance.decay.", spatial.options$decay.model, sep="")
warning("No distance decay model provided. Using default spherical
method.")
} else {
# verfy decay model provided is valid
fn0 <- paste("distance.decay.", spatial.options$decay.model, sep="")
if (!exists(fn0)) {
stop(spatial.options$decay.model,
"distance decay model not supported.")
}
spatial.options$decay.function <- fn0
}
if (!('threshold' %in% names(spatial.options))) {
warning("No spatial threshold provided. Will use x-intercept of PSM score
correlogram with selected distance decay model.")
spatial.options$threshold <- "auto"
} else {
# Check that the user-supplied threshold make sense in terms of units
# of measurement.
print("Custom threshold values should be verfied by user beforehand.
Erroneous values may cause errors or bad results.")
}
}
# ---------------------------------------------------------------------------
# data input
mcall <- match.call()
if (is.null(data)) {
stop("Dataframe must be specified", call.=FALSE)
}
if (!is.data.frame(data)) {
stop("Data must be a dataframe", call.=FALSE)
}
if (sum(is.na(data)) > 0) {
stop("Missing values exist in the data")
}
# list-wise deletion
# allvars <- all.vars(mcall)
# varsindata <- colnames(data)[colnames(data) %in% all.vars(mcall)]
# data <- na.omit(subset(data, select = varsindata))
# 7/13/06: Convert character variables to factors as necessary
ischar <- rep(0, dim(data)[2])
for (i in 1:dim(data)[2]) {
if (is.character(data[, i])) {
data[, i] <- as.factor(data[, i])
}
}
# check inputs
if (!is.numeric(distance)) {
fn1 <- paste("distance2", distance, sep="")
if (!exists(fn1)) {
stop(distance, "not supported.")
}
}
if (is.numeric(distance)) {
# do not think this exists yet
# does it need to be built or the related options supported somehow?
fn1 <- "distance2user"
stop(distance, "not supported.")
}
fn2 <- paste("matchit2", method, sep="")
if (!exists(fn2)) {
stop(method, "not supported.")
}
# obtain T and X
tryerror <- try(model.frame(formula), TRUE)
# get formula terms
if (distance %in% c("GAMlogit", "GAMprobit", "GAMcloglog", "GAMlog",
"GAMcauchit")) {
library(mgcv)
tt <- terms(mgcv::interpret.gam(formula)$fake.formula)
} else {
tt <- terms(formula)
}
attr(tt, "intercept") <- 0
mf <- model.frame(tt, data)
treat <- model.response(mf)
X <- model.matrix(tt, data=mf)
# ===========================================================================
# estimate the distance measure
if (method == "exact") {
distance <- out1 <- discarded <- NULL
if (!is.null(distance)) {
warning("distance is set to `NULL' when exact matching is used.")
}
} else if (is.numeric(distance)){
out1 <- NULL
discarded <- discard(treat, distance, discard, X)
} else {
# fill in distance formula/data if needed
if (is.null(distance.options$formula)) {
distance.options$formula <- formula
}
if (is.null(distance.options$data)) {
distance.options$data <- data
}
# run distance function
out1 <- do.call(fn1, distance.options)
# discard / reestimate if needed
discarded <- discard(treat, out1$distance, discard, X)
if (reestimate) {
distance.options$data <- data[!discarded, ]
distance.options$weights <- distance.options$weights[!discarded]
tmp <- out1
out1 <- do.call(fn1, distance.options)
tmp$distance[!discarded] <- out1$distance
out1$distance <- tmp$distance
}
distance <- out1$distance
}
# ===========================================================================
# ---------------------------------------------------------------------------
# If there is a spatial object, then distance is a list that contains
# the spatial data frame, decay model, threshold, and PSM distances.
# Otherwise, it's only a vector.
if (spatial.options$is.spatial == TRUE) {
if (spatial.options$threshold == "auto") {
print("Calculating distance decay threshold based on PSM correlogram...")
correlogram_data <- correlog(x=coordinates(spatial.data)[, 1],
y=coordinates(spatial.data)[, 2],
z=distance, increment=5, latlon=TRUE,
na.rm=TRUE, resamp=50, quiet=TRUE)
spatial.options$threshold <- as.numeric(correlogram_data$x.intercept)
# print(correlogram_data)
print("Plotting correlogram and x-int (spatial thresh) for PSM dist")
plot.correlog(correlogram_data)
print("Estimated Threshold (km):")
print(spatial.options$threshold)
}
combined.options <- list(distance, spatial.data,
spatial.options$decay.function,
spatial.options$threshold)
} else {
combined.options <- distance
}
# ---------------------------------------------------------------------------
# full mahalanobis matching
if (fn1 == "distance2mahalanobis") {
is.full.mahalanobis <- TRUE
} else {
is.full.mahalanobis <- FALSE
}
# run matching function
out2 <- do.call(fn2, list(treat, X, data,
distance = combined.options, discarded,
is.full.mahalanobis = is.full.mahalanobis, ...))
# no distance for full mahalanobis matching
if (fn1 == "distance2mahalanobis") {
distance[1:length(distance)] <- NA
class(out2) <- c("matchit.mahalanobis", "matchit")
}
# putting all the results together
out2$call <- mcall
out2$model <- out1$model
out2$formula <- formula
out2$treat <- treat
if (is.null(out2$X)) {
out2$X <- X
}
out2$distance <- distance
out2$discarded <- discarded
# basic summary
nn <- matrix(0, ncol=2, nrow=4)
nn[1, ] <- c(sum(out2$treat == 0), sum(out2$treat == 1))
nn[2, ] <- c(sum(out2$treat == 0 & out2$weights > 0),
sum(out2$treat == 1 & out2$weights > 0))
nn[3, ] <- c(sum(out2$treat == 0 & out2$weights == 0 & out2$discarded == 0),
sum(out2$treat == 1 & out2$weights == 0 & out2$discarded == 0))
nn[4, ] <- c(sum(out2$treat == 0 & out2$weights == 0 & out2$discarded == 1),
sum(out2$treat == 1 & out2$weights == 0 & out2$discarded == 1))
dimnames(nn) <- list(c("All","Matched","Unmatched","Discarded"),
c("Control","Treated"))
out2$nn <- nn
return(out2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.