optmatch: Functions for Optimal Matching

################################################################################
# Match: methods to create distance specifications, possibliy sparse
################################################################################

context("match_on function")

test_that("Distances from glms", {

  n <- 16
  Z <- numeric(n)
  Z[sample.int(n, n/2)] <- 1
  X1 <- rnorm(n, mean = 5)
  X2 <- rnorm(n, mean = -2, sd = 2)
  B <- rep(c(0,1), n/2)

  test.glm <- glm(Z ~ X1 + X2 + B, family = binomial()) # the coefs should be zero or so

  result.glm <- match_on(test.glm)

  expect_true(validDistanceSpecification(result.glm))
  expect_equal(length(result.glm), (n/2)^2)

  # what about classes that inherit from glm?
  class(test.glm) <- c("foo", class(test.glm))

  result.foo <- match_on(test.glm)

  expect_true(validDistanceSpecification(result.foo))
  expect_equal(length(result.foo), (n/2)^2)

  # test that we can use GLMs with an attached data.frame
  df <- data.frame(Z. = Z, X1. = X1, X2. = X2, B. = B)
  expect_error(glm(Z. ~ X1. + X2. + B., family = binomial()))

  model.attach <- with(df, glm(Z. ~ X1. + X2. + B., family = binomial()))
  res.attach <- match_on(model.attach)

  expect_true(isTRUE(all.equal(res.attach, result.foo,
                               check.attributes = FALSE)))

})

test_that("Missingness in treatment", {
  set.seed(548243)
  Z <- sample(0:1, 10, TRUE)
  X <- rnorm(10)

  g <- glm(Z ~ X, family=binomial)

  expect_equal(sum(dim(match_on(g))), 10)

  Z.na <- Z; X.na <- X

  Z.na[1] <- NA
  X.na[2] <- NA

  g2 <- glm(Z ~ X.na, family=binomial)
  # Should have full recovery.
  expect_equal(sum(dim(match_on(g2))), 10)

  g3 <- glm(Z.na ~ X, family=binomial)
  # With missing treatment, drop observation.
  expect_equal(sum(dim(match_on(g3))), 9)

  g4 <- glm(Z.na ~ X.na, family=binomial)
  expect_equal(sum(dim(match_on(g4))), 9)

  d1 <- data.frame(Z,X.na)
  d2 <- data.frame(Z.na, X)
  d3 <- data.frame(Z.na, X.na)
  rm("Z","X","Z.na","X.na")

  # Passing data should have same recovery structure as before
  h1 <- glm(Z ~ X.na, family=binomial, data=d1)
  expect_equal(sum(dim(match_on(h1, data=d1))), 10)

  h2 <- glm(Z.na ~ X, family=binomial, data=d2)
  expect_equal(sum(dim(match_on(h2, data=d2))), 9)

  h3 <- glm(Z.na ~ X.na, family=binomial, data=d3)
  expect_equal(sum(dim(match_on(h3, data=d3))), 9)
})

test_that("Distances from formulas", {
  n <- 16
  Z <- rep(c(0,1), n/2)
  X1 <- rnorm(n, mean = 5)
  X2 <- rnorm(n, mean = -2, sd = 2)
  B <- as.factor(rep(c(0,1), each = n/2))

  test.data <- data.frame(Z, X1, X2, B)

  result.fmla <- match_on(Z ~ X1 + X2 + B, data = test.data)
  expect_true(validDistanceSpecification(result.fmla))

  # test pulling from the environment, like lm does
  result.envir <- match_on(Z ~ X1 + X2 + B)
  expect_equivalent(result.fmla, result.envir)

  expect_error(match_on(~ X1 + X2, data = test.data))
  expect_error(match_on(Z ~ 1, data = test.data))

  # checking diferent classes of responses
  res.one <- match_on(Z ~ X1)
  res.logical <- match_on(as.logical(Z) ~ X1)
  expect_equivalent(res.one, res.logical)

  tol <- 10^(9L - getOption("digits")) * sqrt(.Machine$double.eps)

  # euclidean distances
  # first, compute what the distances should be for the data.
  euclid <- as.matrix(dist(test.data[,-1], method = "euclidean", upper = T))
  z <- as.logical(Z)
  euclid <- euclid[z, !z]
  expect_true(all(abs(match_on(Z ~ X1 + X2 + B, method = "euclidean") - euclid) <
                      tol)) # there is some rounding error, but it is small

  # factor-related
  f0 <- as.factor(rep(1:4, each=n/4))
  f1 <- as.factor(rep(rep(1:2, each=2),n/4))
  f2 <- as.factor(rep(3:4, each=n/2))

  # Euclidean distances on a single factor should be 1 or 0
  tmp <- match_on(Z~f1, method="euclidean")
  expect_true(all(abs(tmp) < tol | abs(tmp - 1) < tol))

  euclid2 <- match_on(Z~f0, method="euclidean")
  expect_true(all(abs(euclid2) < tol | abs(euclid2 - 1) < tol))

  # with 2 orthogonal factors, distances should be 0, 1 or 2
  tmp <- match_on(Z ~ f1 + f2, method="euclidean")
  expect_true(all(abs(tmp) < tol | abs(tmp - 1) < tol | abs(tmp - sqrt(2)) < tol))

  # with a single 2 level factor, numeric version to give same distances
  expect_equal(as.matrix(match_on(Z~as.numeric(f1), method="euclidean")), as.matrix(match_on(Z~f1, method="euclidean")))

  # passing a function name for method
  #  expect_true(all(abs(match_on(Z ~ X1 + X2 + B, method = optmatch:::compute_euclidean) - euclid) < tol)) # there is some rounding error, but it is small
  # removed - no longer support user-functions in method

  # Mahalanobis distances involving factors

  expect_equal(as.matrix(match_on(Z~as.numeric(f1), method="mahalanobis")), as.matrix(match_on(Z~f1, method="mahalanobis")))

  # excluding matches combined with a formula
  stratify <- exactMatch(Z ~ B)
  res.strat <- match_on(Z ~ X1 + X2, within = stratify)
  expect_is(res.strat, "InfinitySparseMatrix")
  expect_equal(length(res.strat), 2 * (n/4)^2)

})

test_that("Issue 87: NA's in data => unmatchable, but retained, units in distances", {
  d <- data.frame(z  = c(1,1,1,0,0),
                  x1 = c(7, 9, NA, -1, 4),
                  x2 = c(1, 0, 0, 0, 1))

  rownames(d) <- c("A", "B", "C", "y", "z")
  g <- function(x) {
    tmp <- rep("OTHER", length(g))
    tmp[is.finite(x)]  <- "FINITE"
    tmp[!is.finite(x)] <- "INF"
    tmp[is.na(x)] <- "NA"
    return(tmp)
  }

  f <- function(method) {
    v <- as.matrix(match_on(z ~ x1 + x2, data = d, method = method))
    g(v)
  }

  expectedM <- c("FINITE", "FINITE", "INF", "FINITE", "FINITE", "INF")

  expect_equivalent(f("mahalanobis"), expectedM)
  expect_equivalent(f("euclid"), expectedM)
  expect_equivalent(f("rank_mahal"), expectedM)

  cal1 <- caliper(match_on(z~x1, data=d), width=1e3)
  expect_equivalent(g(as.matrix(match_on(z ~ x1 + x2, data = d,
                                         within=cal1, method = "mahalanobis"))),
                    expectedM)

  ## now with numeric method:
  zz <- d$z
  x1 <- d$x1
  names(zz) <- names(x1) <- rownames(d)

  v <- as.matrix(match_on(x1, z = zz))
  expect_equivalent(g(v), expectedM)

  ## glm should have the opposite behavior: automatically imputing
  expect_warning(v <- as.matrix(match_on(glm(z ~ x1 + x2, data = d, family = binomial))), "fitted")
  expect_equivalent(g(v), rep("FINITE", 6))
})

# while the formula method often handles mahalanobis distances, separating the tests for clarity
test_that("Mahalanobis distance calculations", {
  badData <- data.frame(Z = rep(c(0,1), 10),
                        all1 = as.factor(rep(1,20)),
                        badf1 = c(rep(1,3), rep(0,7)),
                        badf2 = c(rep(0,3), rep(1,7)))

  expect_error(match_on(Z ~ all1, data = badData), "contrasts can be applied only to factors with 2 or more levels")

  # even though the supplied data is a bad idea, it should work using the svd() decomposition
  res <- match_on(Z ~ badf1 + badf2, data = badData)
})

test_that("Distances from functions", {
  n <- 16
  Z <- c(rep(0, n/2), rep(1, n/2))
  X1 <- rep(c(1,2,3,4), each = n/4)
  B <- rep(c(0,1), n/2)
  test.data <- data.frame(Z, X1, B)

  sdiffs <- function(index, data, z) {
    abs(data[index[,1], "X1"] - data[index[,2], "X1"])
  }

  result.function <- match_on(sdiffs, z = Z, data = test.data)
  expect_equal(dim(result.function), c(8,8))

  # the result is a blocked matrix:
  # | 2 1 |
  # | 3 2 |

  expect_equal(mean(result.function), 2)

  # no treatment indicator
  expect_error(match_on(sdiffs, data = test.data))

  # no data
  expect_error(match_on(sdiffs, z = Z))

})

###### Using mad() instead of sd() for GLM distances
###
###result <- match_on(glm(pr ~ t1 + t2 + cost, data = nuclearplants, family = binomial()))
###
#### this is an odd test, but a simple way to make sure mad is running, not SD().
#### I would like a better test of the actual values, but it works
###test(mean(result$m) > 2)
###
###

test_that("Errors for numeric vectors", {
  expect_error(match_on(1:10))
})

###### Stratifying by a pipe (|) character in formulas
###
###main.fmla <- pr ~ t1 + t2
###strat.fmla <- ~ pt
###combined.fmla <- pr ~ t1 + t2 | pt
###
###result.main <- match_on(main.fmla, structure.fmla = strat.fmla, data = nuclearplants)
###result.combined <- match_on(combined.fmla, data = nuclearplants)
###
###test(identical(result.main, result.combined))
###
###### Informatively insist that one of formulas specify the treatment group
###shouldError(match_on(~t1+t2, structure.fmla=~pt, data=nuclearplants))
###test(identical(match_on(pr~t1+t2, structure.fmla=~pt, data=nuclearplants),
###               match_on(~t1+t2, structure.fmla=pr~pt, data=nuclearplants))
###     )
###### Finding "data" when it isn't given as an argument
###### Caveats:
###### * data's row.names get lost when you don't pass data as explicit argument;
###### thus testing with 'all.equal(unlist(<...>),unlist(<...>))' rather than 'identical(<...>,<...>)'.
###### * with(nuclearplants, match_on(fmla)) bombs for obscure scoping-related reasons,
###### namely that the environment of fmla is the globalenv rather than that created by 'with'.
###### This despite the facts that identical(fmla,pr ~ t1 + t2 + pt) is TRUE and that
###### with(nuclearplants, match_on(pr ~ t1 + t2 + pt)) runs fine.
###### But then with(nuclearplants, lm(fmla)) bombs too, for same reason, so don't worry be happy.
###attach(nuclearplants)
###test(all.equal(unlist(result.fmla),unlist(match_on(fmla))))
###test(all.equal(unlist(result.main),unlist(match_on(main.fmla, structure.fmla=strat.fmla))))
###test(all.equal(unlist(result.combined),unlist(match_on(combined.fmla))) )
###detach("nuclearplants")
###test(identical(fmla,pr ~ t1 + t2 + pt))
###test(all.equal(unlist(result.fmla),unlist(with(nuclearplants, match_on(pr ~ t1 + t2 + pt)))))
###test(identical(combined.fmla, pr ~ t1 + t2 | pt))
###test(all.equal(unlist(result.combined), unlist(with(nuclearplants, match_on(pr ~ t1 + t2 | pt)))))
###test(all.equal(unlist(result.fmla), unlist(with(nuclearplants[-which(names(nuclearplants)=="pt")],
###                                           match_on(update(pr ~ t1 + t2 + pt,.~.-pt + nuclearplants$pt))
###                                           )
###                                      )
###          )
###     )
###test(all.equal(unlist(result.combined), unlist(with(nuclearplants, match_on(pr ~ t1 + t2, structure.fmla=strat.fmla)))))

# test_that("Bigglm distances", {
# Moved to test.notforCRAN.R

test_that("Numeric: simple differences of scores", {
  # note: the propensity score method depends on this method as well, so if
  # those tests start failing, check here.

  scores <- rep(7, 12)
  z <- rep(c(0,1), 6)
  names(z) <- names(scores) <- letters[1:12]

  expect_true(all(match_on(scores, z = z) == 0))

  expect_true(all(match_on(z * 2, z = z) == 2))

  expect_true(all(match_on(z * -2, z = z) == 2))

  # proper errors
  expect_error(match_on(scores), "treatment")
  expect_error(match_on(scores, z = c(1,2)), "length")
  expect_error(match_on(c(1,2,3,4), z = c(0,1,0,1)), "names")

  # pass a caliper width, limits the comparisons that are going to be made.
  # the scores are going to be computed using abs diff, we can use the tools
  # developed in feasible.R

  scores <- rep(1:3, each = 4)
  names(scores) <- letters[1:12]

  # first, test the helper function scoreCaliper that generates the list of allowed comparisons.
  scres <- scoreCaliper(scores, z, 1)
  # match_on(scores, z) shows that there are 8 comparisons of size 2
  expect_equal(length(scres), 28)
  # every entry should be zero, the match_on function will compute the actual differences
  expect_equal(sum(scres), 0)
  # repeating above using non-integer values
  expect_equal(length(scoreCaliper(scores/3, z, 1/3)), 28)

  # try it as a matrix
  expect_equivalent(as.matrix(scres), matrix(c(0,0,0,0,Inf,Inf,
                                               0,0,0,0,Inf,Inf,
                                               rep(0, 6),
                                               rep(0, 6),
                                               Inf, Inf, 0,0,0,0,
                                               Inf,Inf,0,0,0,0), nrow = 6, ncol = 6))

  # repeat with match_on
  expect_equal(length(match_on(scores, z = z, caliper = 1)), 28) # 6 * 6 - 8
  expect_equal(length(match_on(scores, z = z, caliper = 1.5)), 28)

  # combine the caliper width with an within argument
  b <- rep(1:3, 4)
  ez <- exactMatch(z ~ b)

  res <- match_on(scores, z = z, caliper = 1, within = ez)
  expect_equal(length(res), 9)

  # next test includes treated and control units that are excluded entirely
  # with caliper = 1
  scores2 <- c(scores, -50, 100, 200, -100)
  z2 <- c(z, 1,0,1,0)
  names(scores2) <- names(z2) <- letters[1:(length(scores2))]
  res <- match_on(scores2, z = z2, caliper = 1)
  expect_equal(length(res), 28) # effectively same result as without the new units

  # caliper must be of length 1
  expect_error(match_on(scores2, z = z2, caliper = c(1,2)), "scalar")
})

test_that("Numeric, issues with vector names", {
  # #189

  # If z is not named, it should copy over x's names
  z <- c(0, 0, 1, 1)
  x <- c("a" = NA, "b" = 1, "c" = 2, "d" = 3)
  m1 <- match_on(x, z = z)
  expect_equal(dim(m1), c(2, 2))
  expect_equal(m1, match_on(z ~ x, method = "euclidean",
                            data = data.frame(x,z)),
               check.attributes = FALSE)

  # If z is named, and the same as x, everything should work.
  z <- c("a" = 0,  "b" = 0, "c" = 1, "d" = 1)
  x <- c("a" = NA, "b" = 1, "c" = 2, "d" = 3)
  m2 <- match_on(x, z = z)
  expect_equal(dim(m2), c(2, 2))
  expect_equal(m2, match_on(z ~ x, method = "euclidean",
                        data = data.frame(x,z)),
               check.attributes = FALSE)
  expect_identical(m1, m2)

  # if z is named same as x, but misordered, order and continue on.
  z <- c("b" = 0,  "a" = 0, "d" = 1, "c" = 1)
  x <- c("a" = NA, "a" = 1, "c" = 2, "d" =  3)
  m3 <- match_on(x, z = z)
  expect_equal(dim(m3), c(2, 2))
  expect_equal(m3, match_on(z ~ x, method = "euclidean",
                        data = data.frame(x,z)),
               check.attributes = FALSE)

  # if names of z and x are not the same, error
  z <- c("a" = 0,  "c" = 0, "b" = 1)
  x <- c("a" = NA, "b" = 1, "d" = 2)
  expect_error(match_on(x, z = z), "names")


})

test_that("matrix, ISM methods' within args",{
  scores <- rep(1:3, each = 4)
  z <- rep(c(0,1), 6)
  b <- rep(1:3, 4)
  names(z) <- names(scores) <- letters[1:12]
  ez <- exactMatch(z ~ b)
  sISM  <- match_on(scores, z=z)
  expect_s4_class(sISM, "DenseMatrix")
  expect_equivalent(match_on(sISM, within=ez), sISM+ez)
  sISM2  <- as.InfinitySparseMatrix(sISM)
  expect_equivalent(match_on(sISM2, within=ez), sISM+ez)
})

test_that("use of matrix, ISM, BISM methods w/ caliper arg", {
  scores <- rep(1:3, each = 4)
  z <- rep(c(0,1), 6)
  b <- rep(1:3, 4)
  names(z) <- names(scores) <- letters[1:12]
  sISM  <- match_on(scores, z=z)
  expect_s4_class(sISM, "DenseMatrix")
  sISM_cal  <- match_on(scores, z=z, caliper=1)
  expect_equivalent(sISM_cal, match_on(sISM, caliper=1))
  expect_equivalent(sISM_cal,
                  match_on(as.InfinitySparseMatrix(sISM), caliper=1)
                  )

  ez <- exactMatch(z ~ b)
  res <- match_on(scores, z = z, # checked in "Numeric: simple..."
                  caliper = 1, within = ez) # test above
  expect_equivalent(res, sISM_cal + ez)

  res1  <- match_on(sISM, caliper=1, within=ez)
  expect_equivalent(res, res1)
})

test_that("update() of match_on created objects", {
  Z <- rep(c(1,0), 10)
  X1 <- rep(c(1,3), 10)
  X2 <- rep(c(1,5), 10)
  B <- rep(c(1,2), each = 10)

  names(Z) <- names(X1) <- names(X2) <- names(B) <- letters[1:20]

  # first, with dense problems
  basic <- match_on(X1, z = Z)
  use.x2 <- update(basic, x = X2)
  expect_true(all(basic == 2))
  expect_true(all(use.x2 == 4))

  X1 <- X2
  expect_true(all((update(basic) - use.x2) == 0))

  # next, with sparse problems
  X1 <- rep(c(1,3), 10)
  names(X1) <- letters[1:20]

  basic <- match_on(X1, z = Z, within = exactMatch(Z ~ B))
  use.x2 <- update(basic, x = X2)
  expect_true(all(basic == 2))
  expect_true(all(use.x2 == 4))

  X1 <- X2
  expect_true(all((update(basic) - use.x2) == 0))

  # now create a distspec with addition, update should error
  X1 <- rep(c(1,3), 10)
  names(X1) <- letters[1:20]

  stratified <- match_on(X1, z = Z, within = exactMatch(Z ~ B))
  unstratified <- match_on(X1, z = Z)
  em <- exactMatch(Z ~ B)

  expect_equivalent(stratified, unstratified + em)

  expect_error(update(unstratified + em, x = X2))
})

test_that("Issue #44", {
  # problem: `within` negates proper caliper

  scores <- rep(c(1,2,2,3), each = 25)
  z <- rep(c(0,1), each = 50)

  names(scores) <- paste("A", 1:100, sep = "")

  # get the caliper only results
  res.cal <- match_on(scores, z = z, caliper = 1)
  expect_equal(max(res.cal), 1)

  # now make up a within arg
  names(z) <- names(scores)
  www  <- exactMatch(x = as.factor(rep(c(0,1), 50)),
                     treatment = z)

  res.w  <- match_on(scores, z = z, within = www)
  expect_true(max(res.w) > 1)

  # they should safely interact
  res.cal.w <- match_on(scores, z = z, within = www, caliper = 1)
  expect_equal(max(res.cal.w), 1)


})

test_that("Issue 48: caliper is a universal argument", {

  Z <- rep(c(1,0), 5)
  X <- -4:5
  names(X) <- names(Z) <- letters[1:10]

  res.num <- match_on(X, z = Z, caliper = 1)
  expect_true(all(res.num <= 1))

  res.glm <- match_on(glm(Z ~ X, family = binomial), caliper = 1)
  expect_true(all(res.glm <= 1))

  res.fmla <- match_on(Z ~ X, caliper = 1)
  expect_true(all(res.fmla <= 1))
})

# test_that("bayesglm, brglm", {
#
#   # packaages are added at top of file
#   data(nuclearplants)
#
#   by <- bayesglm(pr ~ cost, data=nuclearplants, family=binomial)
#   expect_true(all(class(by) == c("bayesglm", "glm", "lm")))
#   m1 <- match_on(by, data=nuclearplants)
#   expect_true(class(m1)[1] %in% c("InfinitySparseMatrix", "BlockedInfinitySparseMatrix", "DenseMatrix"))
#
#   br <- brglm(pr ~ cost, data=nuclearplants, family=binomial, method="glm.fit")
#   expect_true(all(class(br) == c("brglm", "glm", "lm")))
#   m2 <- match_on(br, data=nuclearplants)
#   expect_true(class(m2)[1] %in% c("InfinitySparseMatrix", "BlockedInfinitySparseMatrix", "DenseMatrix"))
# })

test_that("standardization scale from within match_on", {
  n <- 16
  Z <- numeric(n)
  Z[sample.int(n, n/2)] <- 1
  X1 <- rnorm(n, mean = 5)
  X2 <- rnorm(n, mean = -2, sd = 2)
  B <- rep(c(0,1), n/2)

  test.glm <- glm(Z ~ X1 + X2 + B, family = binomial()) # the coefs should be zero or so

  expect_silent(result.glm0 <- match_on(test.glm))
  expect_is(result.glm0, "matrix")

  expect_silent(result.glm1 <- match_on(test.glm, standardization.scale=mad))
  expect_equivalent(result.glm1, result.glm0)

  expect_silent(result.glm2 <- match_on(test.glm, standardization.scale=sd))
  expect_is(result.glm2, "matrix")

  expect_silent(result.glm4 <- match_on(test.glm, standardization.scale=1))
  expect_is(result.glm4, "matrix")
})

test_that("standardization_scale with svyglm",{
  if (requireNamespace("survey", quietly = TRUE)) {
    data(api, package = "survey")
    d <- survey::svydesign(id = ~ 1, probs = 1, data = apistrat)
    sglm <- survey::svyglm(sch.wide ~ ell + meals + mobility, design = d,
                           family=quasibinomial())
    aglm <- glm(sch.wide ~ ell + meals + mobility,data = apistrat,
                family = binomial)
    expect_equivalent(sglm$linear.predictors, aglm$linear.predictors)
    sd_sglm <- standardization_scale(sglm$linear.predictor,
                                     trtgrp=sglm$y,
                                     standardizer = svy_sd,
                                     svydesign_ = sglm$'survey.design')
    sd_aglm <- standardization_scale(aglm$linear.predictor,
                                     trtgrp=aglm$y,
                                     standardizer = stats::sd)
    expect_equivalent(sd_sglm, sd_aglm)

    d_w <- survey::svydesign(id = ~ 1, weights = ~ pw, data = apistrat)
    sglm_w <- survey::svyglm(sch.wide ~ ell + meals + mobility, design = d_w,
                             family = quasibinomial())
    mad_sglm_w <- standardization_scale(sglm_w$linear.predictor,
                                        trtgrp=sglm_w$y,
                                        standardizer = svy_mad,
                                        svydesign_ = sglm_w$'survey.design')
    expect_true(is.numeric(mad_sglm_w))
    expect_gt(mad_sglm_w, 0)
    sd_sglm_w <- standardization_scale(sglm_w$linear.predictor,
                                     trtgrp=sglm_w$y,
                                     standardizer = svy_sd,
                                     svydesign_ = sglm_w$'survey.design')
    expect_true(is.numeric(sd_sglm_w))
    expect_gt(sd_sglm_w, 0)
  }
  expect_true(TRUE) # avoiding empty test warning
})

test_that("model.frame.svyglm() & model.response (cf # 194)",{
  if (requireNamespace("survey", quietly = TRUE)) {
    data(api, package = "survey")
    dstrat<-survey::svydesign(id=~1, strata=~stype, weights=~pw,
                      data=apistrat, fpc=~fpc)
    sglm <- survey::svyglm(sch.wide~ell+meals+mobility, design=dstrat,
                   family=quasibinomial())
    mf <- optmatch:::model.frame.svyglm(sglm)
    expect_identical(model.response(mf),
                     setNames(sglm$model$'sch.wide',
                              rownames(sglm$model)
                              )
                     )
    }
})
test_that("Building exactMatch from formula with strata", {

  d <- data.frame(x = rnorm(8),
                  t = rep(0:1, 4),
                  z = rep(0:1, each=4))

  em <- exactMatch(t ~ z, data=d)

  nw <- makeWithinFromStrata(t ~ strata(z), d)

  expect_true(is(nw, "list"))
  expect_true(length(nw) == 2)
  expect_true(all(names(nw) == c("x", "within")))

  expect_true(is(nw$x, "formula"))
  expect_true(is(nw$within, "BlockedInfinitySparseMatrix"))
  expect_true(all(unlist(subdim(nw$within)) == 2))

  expect_equivalent(em, nw$within)
  expect_equivalent(t ~ 1, nw$x)

  nw1 <- makeWithinFromStrata(!!t ~ strata(z), d)
  expect_equivalent(nw1$within, nw$within)

  nw2 <- makeWithinFromStrata(t ~ x + strata(z), d)

  expect_equivalent(em, nw2$within)
  expect_equivalent(t ~ x, nw2$x)


})


test_that("Using strata instead of within arguments", {
  data(nuclearplants)

  m1 <- match_on(pr ~ cost, within=exactMatch(pr ~ pt, data=nuclearplants),
                  data=nuclearplants)
  m2 <- match_on(pr ~ cost + strata(pt), data=nuclearplants)
  m2b <- match_on(pr ~ cost, data=nuclearplants)

  expect_true(is(m1, "BlockedInfinitySparseMatrix"))

  expect_true(is(m1, "BlockedInfinitySparseMatrix"))

  expect_true(is(m2, "BlockedInfinitySparseMatrix"))
  expect_true(all.equal(m1, m2, check.attributes=FALSE))
  expect_true(!isTRUE(all.equal(m2, m2b, check.attributes=FALSE)))

  # handling more complicated strata calls
  m3 <- match_on(pr ~ cost, within=exactMatch(pr ~ pt + ct + ne,
                                               data=nuclearplants),
                  data=nuclearplants)
  m4 <- match_on(pr ~ cost + strata(pt) + strata(ct, ne), data=nuclearplants)

  expect_true(all.equal(m3, m4, check.attributes=FALSE))

  e1 <- exactMatch(pr ~ ne, data=nuclearplants)
  e2 <- exactMatch(pr ~ ne + ct, data=nuclearplants)

  m5 <- match_on(pr ~ cost, within=e2, data=nuclearplants)
  m6 <- match_on(pr ~ cost + strata(ct), within=e1, data=nuclearplants)
  m7 <- match_on(pr ~ cost + strata(ct, ne), data=nuclearplants)

  expect_true(all.equal(m5, m6, check.attributes=FALSE))
  expect_true(all.equal(m5, m7, check.attributes=FALSE))

})

test_that("strata in GLMs", {

  data(nuclearplants)

  m1 <- match_on(glm(pr ~ t1 + ne, data=nuclearplants, family=binomial),
                 within=exactMatch(pr ~ ne, data=nuclearplants),
                 data=nuclearplants)
  m2 <- match_on(glm(pr ~ t1 + strata(ne), data=nuclearplants, family=binomial),
                 data=nuclearplants)

  expect_true(is(m1, "BlockedInfinitySparseMatrix"))
  expect_true(is(m2, "BlockedInfinitySparseMatrix"))
  expect_true(all.equal(m1, m2, check.attributes=FALSE))

  m3 <- match_on(glm(pr ~ t1 + ne + interaction(ct,pt), data=nuclearplants,
                     family=binomial),
                 within=exactMatch(pr ~ ne + ct*pt, data=nuclearplants),
                 data=nuclearplants)
  m4 <- match_on(glm(pr ~ t1 + strata(ne) + strata(ct, pt),
                     data=nuclearplants, family=binomial), data=nuclearplants)

  expect_true(all.equal(m3, m4, check.attributes=FALSE))

  e1 <- exactMatch(pr ~ ne, data=nuclearplants)
  e2 <- exactMatch(pr ~ ne + ct, data=nuclearplants)

  m5 <- match_on(glm(pr ~ cost + ne + ct, data=nuclearplants, family=binomial),
                 within=e2, data=nuclearplants)
  m6 <- match_on(glm(pr ~ cost + ne + strata(ct), data=nuclearplants,
                     family=binomial),
                 within=e1, data=nuclearplants)
  m7 <- match_on(glm(pr ~ cost + strata(ct) + strata(ne), data=nuclearplants,
                     family=binomial),
                 data=nuclearplants)

  expect_true(all.equal(m5, m6, check.attributes=FALSE))
  expect_true(all.equal(m5, m7, check.attributes=FALSE))


  # strata(a,b) is equivalent to interaction(a,b)
  m8 <- match_on(glm(pr ~ cost + strata(ct,ne), data=nuclearplants,
                     family=binomial),
                 data=nuclearplants)
  suppressWarnings(
    m9 <- match_on(glm(pr ~ cost + interaction(ne, ct) + strata(ct),
                       data=nuclearplants, family=binomial),
                   within=e1, data=nuclearplants)
  )
  m10 <- match_on(glm(pr ~ cost + interaction(ne,ct), data=nuclearplants,
                      family=binomial),
                  within=e2, data=nuclearplants)
  m10b <- match_on(glm(pr ~ cost + ne*ct, data=nuclearplants, family=binomial),
                  within=e2, data=nuclearplants)
  # m9 is a bit weight because of the double inclusion of ct, and is an unlikely
  # way for users to enter code, but the extra ct is of course ignored.

  expect_true(all.equal(m8, m9, check.attributes=FALSE))
  expect_true(all.equal(m8, m10, check.attributes=FALSE))

  # Fixed effects should be added when exactMatching
  m11 <- match_on(glm(pr ~ cost, data=nuclearplants, family=binomial),
                  within=e2, data=nuclearplants)

  expect_false(isTRUE(all.equal(m8, m11, check.attributes=FALSE)))


})

test_that("Subsetting an ISM by passing a new data object to match_on", {

  data <- data.frame(z = rep(c(0,1), 13), x = rnorm(26))
  rownames(data) <- letters

  x <- match_on(z ~ x, data = data)
  expect_equal(dim(x), c(13, 13))

  d2 <- data.frame(w = 10:15)
  rownames(d2) <- letters[10:15]

  y <- match_on(x, data = d2)
  expect_equal(dim(y), c(3, 3))

  y2 <- match_on(optmatch:::as.InfinitySparseMatrix(x), data = d2)
  expect_equal(dim(y2), c(3, 3))
})

test_that("#114 informative error if caliper in formula", {

  data(nuclearplants)

  m <- match_on(pr ~ cost, data = nuclearplants)
  expect_error(match_on(pr ~ t1 + caliper(m), data = nuclearplants),
               "be applied via")
  expect_error(match_on(pr ~ t1 + caliper(m), data = nuclearplants),
               "within\\ =\\ caliper\\(m\\)")
  expect_error(match_on(pr ~ t1 + caliper(m) + caliper(n), data = nuclearplants),
               "within\\ =\\ caliper\\(m\\)\\ \\+\\ caliper\\(n\\)")

  em <- exactMatch(pr ~ pt, data = nuclearplants)
  expect_error(match_on(pr ~ t1 + caliper(m) + caliper(n),
                        data = nuclearplants, within = em),
               "within\\ =\\ em\\ \\+\\ caliper\\(m\\)\\ \\+\\ caliper\\(n\\)")

})

test_that("variables named strata or caliper are allowed", {

  data <- data.frame(z = rep(0:1, each = 10),
                     x = rnorm(20),
                     strata = rep(1:4, times = 5),
                     caliper = rnorm(20))
  # Try some valid albeit confusing inputs, along with some oddly formed input
  expect_silent(match_on(z ~ x + strata, data = data))
  expect_silent(match_on(z ~ x + caliper, data = data))
  expect_silent(match_on(z ~ x + strata + strata(strata), data = data))
  expect_silent(match_on(z ~ x + caliper + strata(strata), data = data))

  data(nuclearplants)
  m1 <- match_on(pr ~ t1 + t2 + strata(pt), data = nuclearplants)

  names(nuclearplants)[11] <- "strata"

  m2 <- match_on(pr ~ t1 + t2 + strata(strata), data = nuclearplants)
  expect_identical(as.matrix(m1), as.matrix(m2))

})

test_that("dot and strata in formula", {
  data <- data.frame(z = rep(0:1, each = 5),
                     x = rnorm(10),
                     s = rep(0:1, times = 5))

  expect_error(m <- match_on(z ~ . - s + strata(s), data = data),
               "Cannot use . expansion", fixed = TRUE)

})

test_that("#123: Supporting NA's in treatment, match_on.formula", {
  data <- data.frame(z = rep(0:1, each = 5),
                     x = rnorm(10))
  m <- match_on(z ~ x, data = data)
  expect_equal(dim(m), c(5, 5))
  expect_equal(rownames(m), rownames(data)[!is.na(data$z) & data$z == 1])
  expect_equal(colnames(m), rownames(data)[!is.na(data$z) & data$z == 0])

  # Now add an NA

  data$z[1] <- NA
  m <- match_on(z ~ x, data = data)
  expect_equal(dim(m), c(5, 4))
  expect_equal(rownames(m), rownames(data)[!is.na(data$z) & data$z == 1])
  expect_equal(colnames(m), rownames(data)[!is.na(data$z) & data$z == 0])

  data$z[c(2,5,6,7)] <- NA
  m <- match_on(z ~ x, data = data)
  expect_equal(dim(m), c(3, 2))
  expect_equal(rownames(m), rownames(data)[!is.na(data$z) & data$z == 1])
  expect_equal(colnames(m), rownames(data)[!is.na(data$z) & data$z == 0])


})

test_that("#123: Supporting NA's in treatment, match_on.numeric", {
  z <- rep(0:1, each = 5)
  x <- rnorm(10)
  names(z) <- names(x) <- 1:10
  m <- match_on(x, z = z)
  expect_equal(dim(m), c(5, 5))
  expect_equal(colnames(m), names(z[1:5])[!is.na(z[1:5])])
  expect_equal(rownames(m), names(z[6:10])[!is.na(z[6:10])])

  data <- data.frame(z, x)
  m2 <- match_on(x, z = z, data = data)
  expect_equivalent(m, m2)

  # Now add an NA

  z[1] <- NA
  m <- match_on(x, z = z)
  expect_equal(dim(m), c(5, 4))
  expect_equal(colnames(m), names(z[1:5])[!is.na(z[1:5])])
  expect_equal(rownames(m), names(z[6:10])[!is.na(z[6:10])])

  data <- data.frame(z, x)
  m2 <- match_on(x, z = z, data = data)
  expect_equivalent(m, m2)

  z[c(2,5,6,7)] <- NA
  m <- match_on(x, z = z)
  expect_equal(dim(m), c(3, 2))
  expect_equal(colnames(m), names(z[1:5])[!is.na(z[1:5])])
  expect_equal(rownames(m), names(z[6:10])[!is.na(z[6:10])])

  data <- data.frame(z, x)
  m2 <- match_on(x, z = z, data = data)
  expect_equivalent(m, m2)
})

test_that("#123: Supporting NA's in treatment, match_on.function", {

  data <- data.frame(z = rep(0:1, each = 5),
                     x = rnorm(10))

  sdiffs <- function(index, data, z) {
    abs(data[index[,1], "x"] - data[index[,2], "x"])
  }

  m <- match_on(sdiffs, z = data$z, data = data)
  expect_equal(dim(m), c(5, 5))
  expect_equal(colnames(m), rownames(data)[1:5][!is.na(data$z[1:5])])
  expect_equal(rownames(m), rownames(data)[6:10][!is.na(data$z[6:10])])

  data$z[1] <- NA

  m <- match_on(sdiffs, z = data$z, data = data)
  expect_equal(dim(m), c(5, 4))
  expect_equal(colnames(m), rownames(data)[1:5][!is.na(data$z[1:5])])
  expect_equal(rownames(m), rownames(data)[6:10][!is.na(data$z[6:10])])

  data$z[c(2,5,6,7)] <- NA

  m <- match_on(sdiffs, z = data$z, data = data)
  expect_equal(dim(m), c(3, 2))
  expect_equal(colnames(m), rownames(data)[1:5][!is.na(data$z[1:5])])
  expect_equal(rownames(m), rownames(data)[6:10][!is.na(data$z[6:10])])

})

test_that("#123: Supporting NA's in treatment, match_on.glm/bigglm", {

  data <- data.frame(z = rep(0:1, each = 5),
                     x = rnorm(10))

  mod <- glm(z ~ x, data = data, family = binomial)

  m <- match_on(mod)
  expect_equal(dim(m), c(5, 5))
  expect_equal(colnames(m), rownames(data)[1:5][!is.na(data$z[1:5])])
  expect_equal(rownames(m), rownames(data)[6:10][!is.na(data$z[6:10])])

  m2 <- match_on(mod, data = data)
  expect_equivalent(m, m2)

  data$z[1] <- NA

  mod <- glm(z ~ x, data = data, family = binomial)

  m <- match_on(mod)
  expect_equal(dim(m), c(5, 4))
  expect_equal(colnames(m), rownames(data)[1:5][!is.na(data$z[1:5])])
  expect_equal(rownames(m), rownames(data)[6:10][!is.na(data$z[6:10])])

  m2 <- match_on(mod, data = data)
  expect_equivalent(m, m2)

  data$z[c(2,5,6,7)] <- NA

  mod <- glm(z ~ x, data = data, family = binomial)

  m <- match_on(mod)
  expect_equal(dim(m), c(3, 2))
  expect_equal(colnames(m), rownames(data)[1:5][!is.na(data$z[1:5])])
  expect_equal(rownames(m), rownames(data)[6:10][!is.na(data$z[6:10])])

  m2 <- match_on(mod, data = data)
  expect_equivalent(m, m2)
})

test_that("147: within=caliper with NA's", {
  data(nuclearplants)
  #testing with missing data
  nuclearplants$cost[2] <- NA
  m.missing <- match_on(pr ~ cost, within=exactMatch(pr ~ pt, data=nuclearplants),
                        data=nuclearplants)
  expect_true(is(m.missing, "BlockedInfinitySparseMatrix"))
  m.drop <- match_on(pr ~ cost, within=exactMatch(pr ~ pt, data=nuclearplants[-2, ]),
                     data=nuclearplants[-2,])
  expect_true(all.equal(m.drop@.Data, m.missing@.Data))

  np <- nuclearplants[1:6,]
  np$cost[2] <- NA
  c <- caliper(match_on(pr ~ cost, data = np, method = "euclidean"), width = 100)
  m.1 <- match_on(pr ~ cap, within = c, data = np)

  m <- match_on(pr ~ cap, data = np)
  m.2 <- m + c
  expect_true(all.equal(dim(m.1),dim(m.2)))
  expect_true(is(m.1, "InfinitySparseMatrix"))
  expect_true(is(m.2, "InfinitySparseMatrix"))
  expect_true(all.equal(sort(m.2@.Data), sort(m.1@.Data)))
})

test_that("Exclude argument for match_on with caliper arg", {
    set.seed(10303920)
    n <- 20
    x <- rnorm(n, sd = 4)
    names(x) <- letters[1:20]
    z <- c(rep(0, 10), rep(1, 10))
    mint <- names(which.min(x[z == 1]))
    maxc <- names(which.min(x[z == 0]))
    cal <- sd(x) / 5

    m <- match_on(x, z = z, caliper = cal, exclude = c(mint, maxc))

    mm <- as.matrix(m)
    expect_true(sum(is.finite(mm)) < 100)
    expect_equal(sum(is.finite(mm[mint, ])), 10)
    expect_equal(sum(is.finite(mm[, maxc])), 10)

    ## exclude only treatment
    mt <- match_on(x, z = z, caliper = cal, exclude = mint)
    mm <- as.matrix(mt)
    expect_true(sum(is.finite(mm)) < 100)
    expect_equal(sum(is.finite(mm[mint, ])), 10)
    expect_true(sum(is.finite(mm[, maxc])) < 10)

    ## exclude only control
    mc <- match_on(x, z = z, caliper = cal, exclude = maxc)
    mm <- as.matrix(mc)
    expect_true(sum(is.finite(mm)) < 100)
    expect_equal(sum(is.finite(mm[, maxc])), 10)
    expect_true(sum(is.finite(mm[mint, ])) < 10)

    ## repeating with other versions of match_on
    df <- data.frame(z = z, x = x)
    mce <- match_on(z ~ x, data = df, caliper = cal, exclude = maxc, method = "euclidean")
    mme <- as.matrix(mce)
    expect_true(sum(is.finite(mme)) < 100)
    expect_equal(sum(is.finite(mme[, maxc])), 10)
    expect_true(sum(is.finite(mme[mint, ])) < 10)

    mg <- match_on(glm(z ~ x, data = df, family = binomial),
                    caliper = cal, exclude = c(mint, maxc))
    mm <- as.matrix(mg)
    expect_true(sum(is.finite(mm)) < 100)
    expect_equal(sum(is.finite(mm[mint, ])), 10)
    expect_equal(sum(is.finite(mm[, maxc])), 10)

    exmat <- matrix(1:9, nrow = 3, dimnames = list(c('a', 'b', 'c'), c('x', 'y', 'z')))
    mm <- as.matrix(match_on(exmat, caliper = 4, exclude = 'z'))
    expect_equivalent(mm[, 'y'], c(4, Inf, Inf))
    expect_equivalent(mm[, 'z'], 7:9)

    mm <- as.matrix(match_on(as.InfinitySparseMatrix(exmat), caliper = 4, exclude = 'z'))
    expect_equivalent(mm[, 'y'], c(4, Inf, Inf))
    expect_equivalent(mm[, 'z'], 7:9)
})


test_that("No longer support user-defined distances in match_on.formula", {
  data(nuclearplants)
  expect_warning(match_on(pr ~ cost, data = nuclearplants, method = optmatch:::compute_euclidean), "not supported")
})
markmfredrickson/optmatch documentation built on June 15, 2025, 1:58 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
markmfredrickson/optmatch
Functions for Optimal Matching

tests/testthat/test.match_on.R
In markmfredrickson/optmatch: Functions for Optimal Matching

R Package Documentation

Browse R Packages

We want your feedback!

markmfredrickson/optmatch Functions for Optimal Matching

tests/testthat/test.match_on.R In markmfredrickson/optmatch: Functions for Optimal Matching

R Package Documentation

Browse R Packages

We want your feedback!

markmfredrickson/optmatch
Functions for Optimal Matching

tests/testthat/test.match_on.R
In markmfredrickson/optmatch: Functions for Optimal Matching