tests/testthat/test-subsample_functions.R

test_that("subsample construction works with multi-valued D", {
  # Simulate small dataset
  nobs <- 500
  X <- cbind(1, matrix(rnorm(nobs*39), nobs, 39))
  D_tld <-  X %*% runif(40) + rnorm(nobs)
  fun <- stepfun(quantile(D_tld, probs = c(0.25, 0.5, 0.75)), c(1, 2, 3, 4))
  D <- fun(D_tld)
  y <- D + X %*% runif(40) + rnorm(nobs)

  # Compute crossfit indices w/o splitting by D
  crossfit_indices_1 <- get_crossfit_indices(cluster_variable = 1:500,
                                           sample_folds = 5, cv_folds = 3,
                                           D = NULL)

  # Compute crossfit indices w/ splitting by D
  crossfit_indices_2 <- get_crossfit_indices(cluster_variable = 1:500,
                                           sample_folds = 5, cv_folds = 3,
                                           D = D)

  # Check that subsamples and cv subsamples are of matching size
  expect_equal(nobs - length(crossfit_indices_1$subsamples[[1]]),
               length(unlist(crossfit_indices_1$cv_subsamples_list[[1]])))
  expect_equal(nobs - length(crossfit_indices_2$subsamples[[1]]),
               length(unlist(crossfit_indices_2$cv_subsamples_list[[1]])))
  expect_equal(sum(D==1) - length(crossfit_indices_2$subsamples_byD[[1]][[1]]),
               length(unlist(crossfit_indices_2$cv_subsamples_byD[[1]][[1]])))
})#TEST_THAT

test_that("subsample construction works with multi-valued D and dependence", {
  # Simulate small dataset
  n_cluster <- 200
  nobs <- 500
  X <- cbind(1, matrix(rnorm(n_cluster*39), n_cluster, 39))
  D_tld <-  X %*% runif(40) + rnorm(n_cluster)
  fun <- stepfun(quantile(D_tld, probs = c(0.25, 0.5, 0.75)), c(1, 2, 3, 4))
  D <- fun(D_tld)
  cluster_variable <- sample(1:n_cluster, nobs, replace = TRUE)
  D <- D[cluster_variable]
  X <- X[cluster_variable, ]
  y <- D + X %*% runif(40) + rnorm(nobs)

  # Compute crossfit indices w/o splitting by D
  crossfit_indices_1 <- get_crossfit_indices(cluster_variable =
                                               cluster_variable,
                                             sample_folds = 5, cv_folds = 3,
                                             D = NULL)
  expect_identical(
    setdiff(unique(cluster_variable[crossfit_indices_1$subsamples[[1]]]),
          unique(cluster_variable[crossfit_indices_1$subsamples[[2]]])),
    unique(cluster_variable[crossfit_indices_1$subsamples[[1]]]))

  # Compute crossfit indices w/ splitting by D
  crossfit_indices_2 <- get_crossfit_indices(cluster_variable =
                                               cluster_variable,
                                             sample_folds = 5, cv_folds = 3,
                                             D = D)

  # Check that cluster variables are unique across folds
  expect_identical(
  setdiff(cluster_variable[D==1][crossfit_indices_2$subsamples_byD[[1]][[1]]],
          cluster_variable[D==1][crossfit_indices_2$subsamples_byD[[1]][[2]]]),
  unique(cluster_variable[D==1][crossfit_indices_2$subsamples_byD[[1]][[1]]]))
  expect_identical(
    setdiff(cluster_variable[D==2][crossfit_indices_2$subsamples_byD[[2]][[1]]],
            cluster_variable[D==2][crossfit_indices_2$subsamples_byD[[2]][[2]]]),
    unique(cluster_variable[D==2][crossfit_indices_2$subsamples_byD[[2]][[1]]]))
  expect_identical(
    setdiff(unique(cluster_variable[crossfit_indices_2$subsamples[[1]]]),
            unique(cluster_variable[crossfit_indices_2$subsamples[[2]]])),
    unique(cluster_variable[crossfit_indices_2$subsamples[[1]]]))

  # Check that subsamples and cv subsamples are of matching size
  expect_equal(nobs - length(crossfit_indices_1$subsamples[[1]]),
               length(unlist(crossfit_indices_1$cv_subsamples_list[[1]])))
  expect_equal(nobs - length(crossfit_indices_2$subsamples[[1]]),
               length(unlist(crossfit_indices_2$cv_subsamples_list[[1]])))
  expect_equal(sum(D==1) - length(crossfit_indices_2$subsamples_byD[[1]][[1]]),
               length(unlist(crossfit_indices_2$cv_subsamples_byD[[1]][[1]])))
})#TEST_THAT

test_that("auxiliary_X construction works with multi-valued D", {
  # Simulate small dataset
  nobs <- 500
  X <- cbind(1, matrix(rnorm(nobs*39), nobs, 39))
  D_tld <-  X %*% runif(40) + rnorm(nobs)
  fun <- stepfun(quantile(D_tld, probs = c(0.25, 0.5, 0.75)), c(1, 2, 3, 4))
  D <- fun(D_tld)
  y <- D + X %*% runif(40) + rnorm(nobs)

  # Compute crossfit indices w/ splitting by D
  crossfit_indices <- get_crossfit_indices(cluster_variable = 1:500,
                                             sample_folds = 5, cv_folds = 3,
                                             D = D)

  # Get auxiliary_indx
  auxiliary_indx <- get_auxiliary_indx(crossfit_indices$subsamples_byD, D)

  # Get auxiliary_X for first sample_folds
  auxiliary_X_d <- get_auxiliary_X(auxiliary_indx[[1]], X)

  # Check that auxiliary X is of correct size
  expect_equal(length(unlist(auxiliary_indx[[1]])),
               nobs - length(crossfit_indices$subsamples[[1]]) -
                 length(crossfit_indices$subsamples_byD[[1]][[1]]))
  expect_equal(dim(auxiliary_X_d[[1]])[1], length(auxiliary_indx[[1]][[1]]))
})#TEST_THAT

Try the ddml package in your browser

Any scripts or data that you put into this service are public.

ddml documentation built on Oct. 3, 2024, 1:13 a.m.