tests/testthat/test-imputation.R

# Skip on CRAN as this test takes some time.
testthat::skip_on_cran()
testthat::skip_on_ci()

outcome_type <- "survival"
n_numeric_features <- 1
imputation_method <- "lasso"

# Generic test -----------------------------------------------------------------
for (n_numeric_features in c(4, 3, 2, 1, 0)) {
  data <- familiar:::test_create_synthetic_series_data(
    outcome_type = outcome_type,
    n_numeric = n_numeric_features
  )

  for (imputation_method in familiar:::.get_available_imputation_methods()) {
    testthat::test_that(
      paste0(
        "Imputation is correctly performed using the ",
        imputation_method, " method and ",
        n_numeric_features, " numeric features."),
      {
        # Make a copy of the data.
        data_copy <- data.table::copy(data)

        # Create a list of featureInfo objects.
        feature_info_list <- familiar:::.get_feature_info_data(
          data = data_copy@data,
          file_paths = NULL,
          project_id = character(),
          outcome_type = outcome_type
        )[[1]]

        # Create imputation skeleton.
        feature_info_list <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list,
          data = data_copy)

        # Assume that the data is pre-processed.
        data_copy@preprocessing_level <- "batch_normalisation"

        # Attempt to impute the data.
        data_copy <- familiar:::impute_features(
          data = data_copy,
          feature_info_list = feature_info_list)

        # Check that the data is not altered.
        testthat::expect_equal(data.table::fsetequal(data_copy@data, data@data), TRUE)

        # Iterate over features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          # Check that imputer parameters were set.
          testthat::expect_equal(
            familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters),
            TRUE)

          # Check that all feature data are valid.
          testthat::expect_equal(
            all(familiar:::is_valid_data(data_copy@data[[feature]])),
            TRUE)
        }
      }
    )
  }
}



# Rare categories test ---------------------------------------------------------
for (n_numeric_features in c(3, 2, 1, 0)) {
  data <- familiar:::test_create_synthetic_series_data(
    outcome_type = outcome_type,
    n_numeric = n_numeric_features
  )

  for (imputation_method in familiar:::.get_available_imputation_methods()) {
    testthat::test_that(
      paste0(
        "Imputation is correctly performed using the ",
        imputation_method, " method and ",
        n_numeric_features, " numeric features."),
      {
        # Make a copy of the data.
        data_copy <- data.table::copy(data)

        # Create a list of featureInfo objects.
        feature_info_list <- familiar:::.get_feature_info_data(
          data = data_copy@data,
          file_paths = NULL,
          project_id = character(),
          outcome_type = outcome_type
        )[[1]]

        # Create imputation skeleton.
        feature_info_list <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # At this point replace rare levels in the data.
        data_copy <- familiar:::test_data_drop_rare_feature_levels(data = data_copy)

        # Add imputer parameters.
        feature_info_list <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list,
          data = data_copy)

        # Assume that the data is pre-processed.
        data_copy@preprocessing_level <- "batch_normalisation"

        # Attempt to impute the data.
        data_copy <- familiar:::impute_features(
          data = data_copy,
          feature_info_list = feature_info_list)

        # Iterate over features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          # Check that imputer parameters were set.
          testthat::expect_equal(
            familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters),
            TRUE)

          # Check that all feature data are valid.
          testthat::expect_equal(
            all(familiar:::is_valid_data(data_copy@data[[feature]])),
            TRUE)
        }
      }
    )
  }
}




# NA-instance test -------------------------------------------------------------
for (n_numeric_features in c(4, 3, 2, 1, 0)) {
  data <- familiar:::test_create_synthetic_series_na_data(
    outcome_type = outcome_type,
    n_numeric = n_numeric_features)

  for (imputation_method in familiar:::.get_available_imputation_methods()) {
    testthat::test_that(
      paste0(
        "Imputation is correctly performed using the ",
        imputation_method, " method and ",
        n_numeric_features, " numeric features for a dataset with some NA instances"),
      {
        # Make a copy of the data.
        data_copy <- data.table::copy(data)

        # Create a list of featureInfo objects.
        feature_info_list <- familiar:::.get_feature_info_data(
          data = data_copy@data,
          file_paths = NULL,
          project_id = character(),
          outcome_type = outcome_type
        )[[1]]

        # Create imputation skeleton.
        feature_info_list <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list,
          data = data_copy)

        # Assume that the data is pre-processed.
        data_copy@preprocessing_level <- "batch_normalisation"

        # Attempt to impute the data.
        data_copy <- familiar:::impute_features(
          data = data_copy,
          feature_info_list = feature_info_list)

        # Iterate over features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          # Check that imputer parameters were set.
          testthat::expect_equal(
            familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters),
            TRUE)

          # Check that all feature data are valid.
          testthat::expect_equal(
            all(familiar:::is_valid_data(data_copy@data[[feature]])),
            TRUE)
        }
      }
    )
  }
}



# NA random-value test ---------------------------------------------------------
for (n_numeric_features in c(4, 3, 2, 1, 0)) {
  data <- familiar:::test_create_synthetic_series_random_na_data(
    outcome_type = outcome_type,
    n_numeric = n_numeric_features
  )

  for (imputation_method in familiar:::.get_available_imputation_methods()) {
    testthat::test_that(
      paste0(
        "Imputation is correctly performed using the ",
        imputation_method, " method and ",
        n_numeric_features, " numeric features for a dataset with some NA datapoints."),
      {
        # Make a copy of the data.
        data_copy <- data.table::copy(data)

        # Create a list of featureInfo objects.
        feature_info_list <- familiar:::.get_feature_info_data(
          data = data_copy@data,
          file_paths = NULL,
          project_id = character(),
          outcome_type = outcome_type
        )[[1]]

        # Create imputation skeleton.
        feature_info_list <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list,
          data = data_copy)

        # Assume that the data is pre-processed.
        data_copy@preprocessing_level <- "batch_normalisation"

        # Attempt to impute the data.
        data_copy <- familiar:::impute_features(
          data = data_copy,
          feature_info_list = feature_info_list)

        # Iterate over features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          # Check that imputer parameters were set.
          testthat::expect_equal(
            familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters),
            TRUE)

          # Check that all feature data are valid.
          testthat::expect_equal(
            all(familiar:::is_valid_data(data_copy@data[[feature]])),
            TRUE)
        }
      }
    )
  }
}



# One feature NA test ----------------------------------------------------------
for (n_numeric_features in c(4, 3, 2, 1, 0)) {
  data <- familiar:::test_create_synthetic_series_one_feature_all_na_data(
    outcome_type = outcome_type,
    n_numeric = n_numeric_features)

  for (imputation_method in familiar:::.get_available_imputation_methods()) {
    testthat::test_that(
      paste0(
        "Imputation is correctly performed using the ",
        imputation_method, " method and ",
        n_numeric_features, " numeric features for a dataset with one feature entirely NA."),
      {
        # Make a copy of the data.
        data_copy <- data.table::copy(data)

        # Create a list of featureInfo objects.
        feature_info_list <- familiar:::.get_feature_info_data(
          data = data_copy@data,
          file_paths = NULL,
          project_id = character(),
          outcome_type = outcome_type
        )[[1]]

        # Create imputation skeleton.
        feature_info_list <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list,
          data = data_copy)

        # Assume that the data is pre-processed.
        data_copy@preprocessing_level <- "batch_normalisation"

        # Attempt to impute the data.
        data_copy <- familiar:::impute_features(
          data = data_copy,
          feature_info_list = feature_info_list)

        # Iterate over features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          # Check that imputer parameters were set.
          testthat::expect_equal(
            familiar:::feature_info_complete(
              feature_info_list[[feature]]@imputation_parameters),
            TRUE)
          
          if (feature == "feature_2") {
            # Feature 2 is completely missing data.
            testthat::expect_equal(
              any(familiar:::is_valid_data(data_copy@data[[feature]])),
              FALSE)

            # Check that a None-class imputer is trained.
            testthat::expect_s4_class(
              feature_info_list[[feature]]@imputation_parameters,
              "featureInfoParametersImputationNone")
            
          } else {
            # Check that all feature data are valid.
            testthat::expect_equal(
              all(familiar:::is_valid_data(data_copy@data[[feature]])),
              TRUE)
          }
        }
      }
    )
  }
}



# Invariant feature test -------------------------------------------------------
for (n_numeric_features in c(4, 3, 2, 1, 0)) {
  data <- familiar:::test_create_synthetic_series_invariant_feature_data(
    outcome_type = outcome_type,
    n_numeric = n_numeric_features
  )

  for (imputation_method in familiar:::.get_available_imputation_methods()) {
    testthat::test_that(
      paste0(
        "Imputation is correctly performed using the ",
        imputation_method, " method and ",
        n_numeric_features, " numeric features for a dataset with invariant features."),
      {
        # Make a copy of the data.
        data_copy <- data.table::copy(data)

        # Create a list of featureInfo objects.
        feature_info_list <- familiar:::.get_feature_info_data(
          data = data_copy@data,
          file_paths = NULL,
          project_id = character(),
          outcome_type = outcome_type
        )[[1]]

        # Create imputation skeleton.
        feature_info_list <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list,
          data = data_copy)

        # Assume that the data is pre-processed.
        data_copy@preprocessing_level <- "batch_normalisation"

        # Attempt to impute the data.
        data_copy <- familiar:::impute_features(
          data = data_copy,
          feature_info_list = feature_info_list)

        # Iterate over features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          # Check that imputer parameters were set.
          testthat::expect_equal(
            familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters),
            TRUE)

          # Check that all feature data are valid.
          testthat::expect_equal(
            all(familiar:::is_valid_data(data_copy@data[[feature]])),
            TRUE)
        }
      }
    )
  }
}



# One feature invariant test ---------------------------------------------------
for (n_numeric_features in c(4, 3, 2, 1, 0)) {
  data <- familiar:::test_create_synthetic_series_one_feature_invariant_data(
    outcome_type = outcome_type,
    n_numeric = n_numeric_features)

  for (imputation_method in familiar:::.get_available_imputation_methods()) {
    testthat::test_that(
      paste0(
        "Imputation is correctly performed using the ",
        imputation_method, " method and ",
        n_numeric_features, " numeric features for a dataset with one invariant feature."),
      {
        # Make a copy of the data.
        data_copy <- data.table::copy(data)

        # Create a list of featureInfo objects.
        feature_info_list <- familiar:::.get_feature_info_data(
          data = data_copy@data,
          file_paths = NULL,
          project_id = character(),
          outcome_type = outcome_type
        )[[1]]

        # Create imputation skeleton.
        feature_info_list <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list,
          data = data_copy)

        # Assume that the data is pre-processed.
        data_copy@preprocessing_level <- "batch_normalisation"

        # Attempt to impute the data.
        data_copy <- familiar:::impute_features(
          data = data_copy,
          feature_info_list = feature_info_list)

        # Iterate over features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          # Check that imputer parameters were set.
          testthat::expect_equal(
            familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters),
            TRUE)

          # Check that all feature data are valid.
          testthat::expect_equal(
            all(familiar:::is_valid_data(data_copy@data[[feature]])),
            TRUE)
        }
      }
    )
  }
}


# One-sample test --------------------------------------------------------------
for (n_numeric_features in c(4, 3, 2, 1, 0)) {
  data <- familiar:::test_create_synthetic_series_one_sample_data(
    outcome_type = outcome_type,
    n_numeric = n_numeric_features)

  for (imputation_method in familiar:::.get_available_imputation_methods()) {
    testthat::test_that(
      paste0(
        "Imputation is correctly performed using the ",
        imputation_method, " method and ",
        n_numeric_features, " numeric features for a dataset with invariant features."),
      {
        # Make a copy of the data.
        data_copy <- data.table::copy(data)

        # Create a list of featureInfo objects.
        feature_info_list <- familiar:::.get_feature_info_data(
          data = data_copy@data,
          file_paths = NULL,
          project_id = character(),
          outcome_type = outcome_type
        )[[1]]

        # Create imputation skeleton.
        feature_info_list <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list,
          data = data_copy)

        # Assume that the data is pre-processed.
        data_copy@preprocessing_level <- "batch_normalisation"

        # Attempt to impute the data.
        data_copy <- familiar:::impute_features(
          data = data_copy,
          feature_info_list = feature_info_list)

        # Iterate over features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          # Check that imputer parameters were set.
          testthat::expect_equal(
            familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters),
            TRUE)

          # Check that all feature data are valid.
          testthat::expect_equal(
            all(familiar:::is_valid_data(data_copy@data[[feature]])),
            TRUE)
        }
      }
    )
  }
}


# One-feature test -------------------------------------------------------------
for (n_numeric_features in c(1, 0)) {
  data <- familiar:::test_create_synthetic_series_random_na_data(
    outcome_type = outcome_type,
    n_numeric = n_numeric_features)

  # Select only the first feature.
  data@data <- data@data[, mget(
    c(familiar:::get_non_feature_columns(data), "feature_1"))]

  for (imputation_method in familiar:::.get_available_imputation_methods()) {
    testthat::test_that(
      paste0(
        "Imputation is correctly performed using the ",
        imputation_method, " method and ",
        n_numeric_features, " numeric features for a dataset with a single ",
        "feature and some NA datapoints."),
      {
        # Make a copy of the data.
        data_copy <- data.table::copy(data)

        # Create a list of featureInfo objects.
        feature_info_list <- familiar:::.get_feature_info_data(
          data = data_copy@data,
          file_paths = NULL,
          project_id = character(),
          outcome_type = outcome_type
        )[[1]]

        # Create imputation skeleton.
        feature_info_list <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list,
          data = data_copy)

        # Assume that the data is pre-processed.
        data_copy@preprocessing_level <- "batch_normalisation"

        # Attempt to impute the data.
        data_copy <- familiar:::impute_features(
          data = data_copy,
          feature_info_list = feature_info_list)

        # Iterate over features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          # Check that imputer parameters were set.
          testthat::expect_equal(
            familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters),
            TRUE)

          # Check that all feature data are valid.
          testthat::expect_equal(
            all(familiar:::is_valid_data(data_copy@data[[feature]])),
            TRUE)
        }
      }
    )
  }
}


# Collection test --------------------------------------------------------------
for (n_numeric_features in c(4, 3, 2, 1, 0)) {
  data <- familiar:::test_create_synthetic_series_random_na_data(
    outcome_type = outcome_type,
    n_numeric = n_numeric_features)

  for (imputation_method in familiar:::.get_available_imputation_methods()) {
    testthat::test_that(
      paste0(
        "Imputation is correctly performed using the ",
        imputation_method, " method and ",
        n_numeric_features, " numeric features for a dataset with some NA ",
        "datapoints and a collection of imputers."),
      {
        # Make a copy of the data.
        data_copy <- data.table::copy(data)

        # Create a list of featureInfo objects.
        feature_info_list <- familiar:::.get_feature_info_data(
          data = data_copy@data,
          file_paths = NULL,
          project_id = character(),
          outcome_type = outcome_type
        )[[1]]

        # Create imputation skeleton.
        feature_info_list_1 <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list_1 <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list_1,
          data = data_copy)

        # Create imputation skeleton.
        feature_info_list_2 <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list_2 <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list_2,
          data = data_copy)

        # Create imputation skeleton.
        feature_info_list_3 <- familiar:::create_imputation_parameter_skeleton(
          feature_info_list = feature_info_list,
          imputation_method = imputation_method)

        # Add imputer parameters.
        feature_info_list_3 <- familiar:::add_imputation_info(
          feature_info_list = feature_info_list_3,
          data = data_copy)

        # Collect features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          feature_info_list[[feature]]@imputation_parameters <- familiar:::..collect_and_aggregate_imputation_info(
            feature_info_list = list(
              feature_info_list_1[[feature]],
              feature_info_list_2[[feature]],
              feature_info_list_3[[feature]]),
            feature_type = feature_info_list[[feature]]@feature_type,
            feature_name = feature_info_list[[feature]]@name
          )$parameters
        }

        # Assume that the data is pre-processed.
        data_copy@preprocessing_level <- "batch_normalisation"

        # Attempt to impute the data.
        data_copy <- familiar:::impute_features(
          data = data_copy,
          feature_info_list = feature_info_list)

        # Iterate over features.
        for (feature in familiar:::get_feature_columns(data_copy)) {
          # Check that imputer parameters were set.
          testthat::expect_equal(
            familiar:::feature_info_complete(feature_info_list[[feature]]@imputation_parameters),
            TRUE)

          # Check that all feature data are valid.
          testthat::expect_equal(
            all(familiar:::is_valid_data(data_copy@data[[feature]])),
            TRUE)
        }
      }
    )
  }
}

Try the familiar package in your browser

Any scripts or data that you put into this service are public.

familiar documentation built on Sept. 30, 2024, 9:18 a.m.