test-preprocess.R
In triplediff: Triple-Difference Estimators

# Testing error handling in run_preprocess_2periods() function
library(data.table)

test_that("Testing error handling in run_preprocess_2periods() function", {

  # generating dataset without errors
  two_periods_no_errors_df = generate_test_panel()

  # ------------------------------
  # Performing tests
  # ------------------------------

  # Introducing discrepancy in the dataset
  y_not_numeric = copy(two_periods_no_errors_df)
  y_not_numeric$outcome = as.character(y_not_numeric$outcome) # Introducing error: converting numeric to character

  # Introducing missing values in the outcome
  missing_values_outcome_df = copy(two_periods_no_errors_df)
  missing_values_outcome_df[1:2, "outcome"] = NA # Introducing missing values in the outcome variable

  # Introducing missing values in the treatment
  missing_values_treat_df = copy(two_periods_no_errors_df)
  missing_values_treat_df[1:2, "treat"] = NA # Introducing missing values in the treatment variable

  # Dataset only with 1 treated unit (inference is no feasible)
  one_treated_unit_df = copy(two_periods_no_errors_df)
  one_treated_unit_df$treat = ifelse(one_treated_unit_df$treat == 1, 0, 0)
  one_treated_unit_df[1:2, "treat"] = 1

  # partition is no unique by id
  partition_not_unique_df = copy(two_periods_no_errors_df)
  partition_not_unique_df$partition[1] <- ifelse(partition_not_unique_df$partition[1] == 1, 0, 1)

  # treatment variables "dname" is not unique by id
  treat_not_unique_df = copy(two_periods_no_errors_df)
  treat_not_unique_df$treat[1] <- ifelse(treat_not_unique_df$treat[1] == 1, 0, 1)

  # covariates are varying over time
  covariates_invariant_df = copy(two_periods_no_errors_df)
  covariates_invariant_df$x1 = rnorm(nrow(covariates_invariant_df))

  # more than 2 time periods
  more_than_two_periods_df = generate_test_panel(time = 3)

  # more than 2 groups
  more_than_two_groups_df = copy(two_periods_no_errors_df)
  more_than_two_groups_df$treat[1] = 2

  # partition is not numeric
  partition_not_numeric_df = copy(two_periods_no_errors_df)
  partition_not_numeric_df$partition = as.character(partition_not_numeric_df$partition)

  # partition is not binary
  partition_not_binary_df = copy(two_periods_no_errors_df)
  partition_not_binary_df$partition = rnorm(nrow(partition_not_binary_df))

  # Providing weights columns and one of them is null
  weights_null_df = copy(two_periods_no_errors_df)
  weights_null_df$weights = rep(1, nrow(weights_null_df))
  weights_null_df[weights_null_df$id == 1]$weights = NA

  # ------------------------------
  # Warnings
  # ------------------------------

  # Test 2: Warning for est_method
  expect_warning(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                     pname = "partition", xformla = ~x1 + x2,
                     data = two_periods_no_errors_df, control_group = NULL, base_period = NULL, est_method = "whatever",
                     weightsname = NULL, boot = FALSE, nboot = NULL,
                     inffunc = FALSE, skip_data_checks = FALSE))

  # Test 3: Warning for missing values in outcome variable "yname"
  expect_warning(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                     pname = "partition", xformla = ~x1 + x2,
                     data = missing_values_outcome_df, control_group = NULL, base_period = NULL, est_method = "dr",
                     weightsname = NULL, boot = FALSE, nboot = NULL,
                     inffunc = FALSE, skip_data_checks = FALSE))


  # ------------------------------
  # Errors
  # ------------------------------
  # Test 4: handling of non-numeric "yname" column
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = y_not_numeric, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 5: error for missing values in treatment variable "gname"
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = missing_values_treat_df, control_group = "nevertreated", base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 6: error for small groups for inference (e.g. only one treated unit)
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = one_treated_unit_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE,  nboot = NULL,
                   inffunc = TRUE, skip_data_checks = FALSE))

  # Test 7: error when "yname" is not in data
  expect_error(ddd(yname = "whatever", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = two_periods_no_errors_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 8: error when "tname" is not in data
  expect_error(ddd(yname = "outcome", tname = "whatever", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = two_periods_no_errors_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 9: error when "dname" is not in data
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "whatever",
                   pname = "partition", xformla = ~x1 + x2,
                   data = two_periods_no_errors_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 10: error when "partition" is not in data
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "whatever", xformla = ~x1 + x2,
                   data = two_periods_no_errors_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 11: error when "idname" is not in data
  expect_error(ddd(yname = "outcome", tname = "year", idname = "whatever", gname = "treat",
                   pname = "whatever", xformla = ~x1 + x2,
                   data = two_periods_no_errors_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 12: error when "xformla" is not a formula
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = "x1 - x2",
                   data = two_periods_no_errors_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 13: partition variable "partition" is not unique by id
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = partition_not_unique_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 14: treatment variable "dname" is not unique by id
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = treat_not_unique_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 15: error when covariates are not invariant
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = covariates.not.invariant.df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 16: More that 2 time periods
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = more_than_two_periods_df, control_group = "nevertreated", base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 17: Partition is not numeric
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = partition_not_numeric_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 18: Partition is not numeric
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = partition_not_numeric_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

  # Test 19: Weights columns contains Null values
  expect_error(ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = weights_null_df, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = "weights", boot = FALSE, nboot = NULL,
                   inffunc = FALSE, skip_data_checks = FALSE))

})