################################################################################
# compute_rank.mahalanobis: calculate the mahalanobis distance between ranks
# of vectors
################################################################################
context("compute_rank.mahalanobis function")
pseudoinv <- function(X) # dumbed-down `ginv` (from MASS package)
{
tol <- sqrt(.Machine$double.eps)
if (length(dim(X)) > 2L || !is.numeric(X))
stop("'X' must be a numeric matrix")
if (!is.matrix(X))
X <- as.matrix(X)
Xsvd <- svd(X)
Positive <- Xsvd$d > max(tol * Xsvd$d[1L], 0)
if (all(Positive))
Xsvd$v %*% (1/Xsvd$d * t(Xsvd$u))
else if (!any(Positive)) stop("Nothing to psuedo-invert here, folks")
else Xsvd$v[, Positive, drop = FALSE] %*% ((1/Xsvd$d[Positive]) *
t(Xsvd$u[, Positive, drop = FALSE]))
}
# Rosenbaum's original rank mahalanobis distance code
# from Design of observational studies
# http://www-stat.wharton.upenn.edu/~rosenbap/Rdospublic.RData
# Downloaded 2013.06.25
# Subsequently touched up a tad, by @benthestatistician and
# @josherrickson
compute_smahal <- function (z, X) {
X <- as.matrix(X)
n <- dim(X)[1]
rownames(X) <- 1:n
k <- dim(X)[2]
m <- sum(z)
for (j in 1:k) X[, j] <- rank(X[, j])
cv <- cov(X)
vuntied <- var(1:n)
rat <- sqrt(vuntied/diag(cv))
cv <- diag(rat) %*% cv %*% diag(rat)
out <- matrix(NA, m, n - m)
Xc <- X[z == 0, , drop=FALSE]
Xt <- X[z == 1, , drop=FALSE]
rownames(out) <- rownames(X)[z == 1]
colnames(out) <- rownames(X)[z == 0]
icov <- pseudoinv(cv)
for (i in 1:m) out[i, ] <- mahalanobis(Xc, Xt[i, ], icov,
inverted = T)
# Rosenbaum's original version produced squared distances;
# JE 4/2022
sqrt(out)
}
test_that('compute_rank.mahal returns results similar to the Rosenbaum code', {
nr <- 10L
nc <- 5L
z <- integer(nr)
z[sample(1:nr, nr / 2L)] <- 1L
numdists <- sum(z)*sum(!z)
X <- matrix(runif(nr * nc), nr)
row.names(X) <- 1L:nr
reference_rankmahal <- compute_smahal(z, X)
expect_that(optmatch:::compute_rank_mahalanobis(NULL, X, as.logical(z)),
is_equivalent_to(reference_rankmahal))
expect_equivalent(as.matrix(match_on(z ~ X, method = "rank")), reference_rankmahal)
df <- data.frame(z = z, X)
expect_equivalent(as.matrix(match_on(z ~ ., data = df, method = "rank")), reference_rankmahal)
})
test_that("rank mahal with factor variables, #220", {
data(nuclearplants)
nuclearplants$group <- factor(sample(3:5, 32, TRUE))
form1 <- pr ~ group
# Generate a design matrix with no intercept and all dummies (no reference)
X1 <- model.matrix(form1, data = nuclearplants,
contrasts.arg = lapply(Filter(is.factor, nuclearplants),
function(x) {
contrasts(x, contrasts = FALSE)/sqrt(2)
})) [, -1]
d1a <- compute_smahal(nuclearplants$pr, X1)
d1b <- as.matrix(match_on(form1, data = nuclearplants,
method = "rank_mahalanobis"))
expect_true(all.equal(d1a, d1b, check.attributes = FALSE))
})
test_that("Fix for #128 (`compute_rank_mahalanobis` ignores index argument) holds", {
nr <- 10L
nc <- 5L
z <- integer(nr)
z[sample(1:nr, nr / 2L)] <- 1L
numdists <- sum(z)*sum(!z)
X <- matrix(runif(nr * nc), nr)
row.names(X) <- 1L:nr
reference_rankmahal <- compute_smahal(z, X)
indices <- expand.grid(rownames(reference_rankmahal), colnames(reference_rankmahal))
indices <- as.matrix(indices)
expect_equivalent(optmatch:::compute_rank_mahalanobis(indices, X, as.logical(z)),
reference_rankmahal[1L:numdists])
expect_equivalent(optmatch:::compute_rank_mahalanobis(indices[-1L, ], X, as.logical(z)),
reference_rankmahal[-1L])
omitdists <- sample(1L:numdists, 2)
expect_equivalent(optmatch:::compute_rank_mahalanobis(indices[-omitdists, ],
X, as.logical(z)),
reference_rankmahal[-omitdists])
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.