#' Fitting DAPSm with fixed or optimal weight
#'
#' Function that fits DAPS by choosing the 'optimal' weight, or using the weight
#' specified and returns the causal effect estimate under a specification of
#' caliper. If the weight is set to 'optimal', the optimal weight is chosen as
#' the smallest weight that satisfies standardized difference less than a cutoff.
#' For computational reasons, we do not fit DAPS on a lot of weights, but perform
#' an iterative algorithm to identify the optimal w.
#'
#' @param dataset
#' Data frame including treatment, outcome, coordinates, propensity score
#' estimates (named prop.scores) and observed confounders.
#' @param out.col
#' If outcome column name is not 'Y', out.col should be the index of the outcome
#' column.
#' @param trt.col
#' If treatment is not named 'X', trt.col should be set to the index of the
#' treatment column.
#' @param caliper
#' A caliper for the DAPS Score difference of matched pairs. Defaults to 0.1.
#' @param weight
#' Scalar between 0 and 1 or should be set to 'optimal'. Describes the percent
#' of weight to be given on PS difference. 1 - weight is given to standardized
#' distance. If set to 'optimal', the 'optimal' weight is chosen. Defaults to
#' 'optimal'.
#' @param coords.columns
#' If the columns of coordinates are not named 'Longitude' and 'Latitude',
#' coords.columns are the column indices corresponding to longitude and latitude
#' accordingly.
#' @param pairsRet
#' Whether we want to return the information on the matched pairs. Logical.
#' Defaults to FALSE.
#' @param cov.cols
#' If the weight is set to 'optimal', standardized difference of means will be
#' calculated on the columns whose indices are in cov.cols. If the weight is set
#' to a numeric value, then cov.cols can be left NULL, or it can be used if we
#' want the function to return the standardized difference of means of the columns
#' with indices in cov.cols.
#' @param cutoff
#' The cutoff of standardized difference of means under which the covariates are
#' considered balanced. Defaults to 0.1.
#' @param w_tol
#' Tolerance on the choice of the optimal weight. Only needed when weight is
#' 'optimal'. Defaults to 0.01.
#' @param coord_dist
#' Set to true when we want to use a distance function that calculates the
#' spherical distance of points instead of Euclidean. Defaults to FALSE.
#' @param distance
#' Function that takes in the distance matrix and returns the standardized
#' distance matrix. Defaults to the funcion that subtracks the minimum and
#' divides by the range.
#' @param caliper_type
#' Whether we want the caliper to be on DAPS or on the PS. caliper_type must be
#' either 'DAPS', or 'PS'.
#' @param quiet
#' Whether we want to print the choice of weight in DAPS optimal. Defauls to TRUE.
#' @param true_value
#' Numeric. If provided, an indicator of whether the CI covers the true value is
#' returned.
#' @param matching_algorithm
#' Argument with options 'optimal', or 'greedy'. The optimal choice uses the optmatch R
#' package to acquire the matches based on propensity score difference and a caliper on
#' distance. The greedy option matches treated and control units sequentially, starting
#' from the ones with the smallest propensity score difference. Defaults to 'optimal'.
#' @param remove.unmatchables Logical. Argument of the optmatch function. Defaults to
#' FALSE. If set to FALSE, the matching fails unless all treated units are matched. If
#' set to TRUE, matching might return matches only for some of the treated units.
#'
#' @return A list including: the estimate of the causal effect, and potential
#' standardized difference of means, optimal weight chosen, information on matched
#' pairs.
#'
#' @export
#' @examples
#' data('toyData')
#' toyData$prop.scores <- glm(Z ~ X1 + X2 + X3 + X4, family = binomial,
#' data = toyData)$fitted.values
#' daps1 <- DAPSest(toyData, out.col = 2, trt.col = 1, caliper = 0.5,
#' weight = 'optimal', coords.columns = c(4, 5),
#' pairsRet = TRUE, cov.cols = 6:9, cutoff = 0.1,
#' w_tol = 0.001, coord_dist = TRUE, caliper_type = 'DAPS',
#' matching_algorithm = 'greedy')
#' names(daps1)
#'
#' # Trying for a different value of the caliper
#' daps2 <- DAPSest(toyData, out.col = 2, trt.col = 1, caliper = 0.1,
#' weight = 'optimal', coords.columns = c(4, 5),
#' pairsRet = TRUE, cov.cols = 6:9, cutoff = 0.1,
#' w_tol = 0.001, coord_dist = TRUE, caliper_type = 'DAPS',
#' matching_algorithm = 'greedy')
#' names(daps2)
#' daps1$weight
#' daps2$weight
DAPSest <- function(dataset, out.col = NULL, trt.col = NULL, caliper = 0.1,
weight = 'optimal', coords.columns = NULL, pairsRet = FALSE,
cov.cols = NULL, cutoff = 0.1, w_tol = 0.01, coord_dist = FALSE,
distance = StandDist, caliper_type = c('DAPS', 'PS'),
quiet = FALSE, true_value = NULL,
matching_algorithm = c('optimal', 'greedy'),
remove.unmatchables = FALSE) {
matching_algorithm <- match.arg(matching_algorithm)
caliper_type <- match.arg(caliper_type)
r <- NULL
r$weight <- weight
# Naming outcome and treatment as 'Y', 'X'.
dataset <- as.data.frame(dataset)
dataset <- FormDataset(dataset, ignore.cols = NULL,
out.col = out.col, trt.col = trt.col)
# Fitting DAPS.
if (is.numeric(weight)) {
daps.out <- dist.ps(treated = dataset[dataset$X == 1, ],
control = dataset[dataset$X == 0, ], caliper = caliper,
weight = weight, coords.columns = coords.columns,
distance = distance, caliper_type = caliper_type,
coord_dist = coord_dist,
matching_algorithm = matching_algorithm,
remove.unmatchables = remove.unmatchables)
# If no matches were acheived, return missing values.
if (nrow(daps.out) == 0) {
warning(paste0('No matches were acheived for weight = ', weight))
r$ind_trt <- NA
r$ind_cnt <- NA
r$est <- NA
r$se <- NA
r$pairs <- matrix(NA, nrow = 0, ncol = 12)
return(r)
}
# Getting the matched pairs.
pairs.out <- daps.out$match
names(pairs.out) <- rownames(daps.out)
pairs.out <- na.omit(pairs.out)
pairs.daps <- dataset[c(as.numeric(names(pairs.out)),
as.numeric(pairs.out)), ]
if (!is.null(cov.cols)) {
diff_mat <- as.matrix(pairs.daps[, cov.cols])
stand_diff <- apply(diff_mat, 2, function(x) {
(mean(x[pairs.daps$X == 1]) - mean(x[pairs.daps$X == 0])) /
sd(x[pairs.daps$X == 1])
})
r$stand_diff <- stand_diff
}
r$ind_trt <- as.numeric(names(pairs.out))
r$ind_cnt <- as.numeric(pairs.out)
} else if (weight == 'optimal') {
daps.opt <- DAPSopt(dataset, caliper = caliper, coords.cols = coords.columns,
cov.cols = cov.cols, cutoff = cutoff, w_tol = w_tol,
distance = distance, caliper_type = caliper_type,
quiet = quiet, coord_dist = coord_dist,
matching_algorithm = matching_algorithm,
remove.unmatchables = remove.unmatchables)
pairs.daps <- daps.opt$pairs
r$weight <- daps.opt$weight
r$stand_diff <- daps.opt$stand_diff
r$ind_trt <- daps.opt$ind_trt
r$ind_cnt <- daps.opt$ind_cnt
}
lmod <- lm(Y ~ X, data = pairs.daps)
r$est <- lmod$coef[2]
r$se <- summary(lmod)$coef[2, 2]
if (!is.null(true_value)) {
r$cover <- (abs(true_value) - r$est < qnorm(0.975) * r$se)
}
if (pairsRet) {
which_cols <- c(which(names(dataset) %in% c('X', 'Y', 'prop.scores')))
which_cols <- c(which_cols, coords.columns)
if (is.numeric(weight)) {
pairs <- pairs.daps[1:(nrow(pairs.daps) / 2), which_cols]
pairs <- cbind(pairs, pairs.daps[(nrow(pairs.daps) / 2 + 1):
nrow(pairs.daps), which_cols])
names(pairs) <- paste0(rep(c('Trt.', 'Con.'),
each = length(which_cols)), names(pairs))
pairs$IDtrt <- as.numeric(names(pairs.out))
pairs$IDcon <- as.numeric(pairs.out)
r$pairs <- as.matrix(pairs)[, c(1, 3:6, 8:12, 2, 7)]
} else { # If the weight is optimal.
nmatch <- nrow(daps.opt$pairs) / 2
mtrt <- daps.opt$pairs[1:nmatch, which_cols]
mcnt <- daps.opt$pairs[(nmatch + 1) : (2 * nmatch), which_cols]
pairs <- cbind(mtrt, mcnt)
names(pairs) <- paste0(rep(c('Trt.', 'Con.'),
each = length(which_cols)), names(pairs))
pairs$IDtrt <- r$ind_trt
pairs$IDcnt <- r$ind_cnt
r$pairs <- pairs[, c(1, 3:6, 8:12, 2, 7)]
}
}
return(r)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.