tests/testthat/test_example_functions.R

library(cvms)
context("example functions")

## preprocess_functions()

test_that("the expected function definitions are returned in preprocess_functions()", {
  testthat::skip("Fails in check - IMPROVE THESE TESTS")
  expect_equal(
    head(capture.output(preprocess_functions("standardize")), 22),
    c(
      "function(train_data, test_data, formula, hyperparameters){",
      "",
      "      # Get centering and scaling parameters from the train_data",
      "      preprocess_params <- caret::preProcess(train_data,",
      "                                             method = c(\"scale\", \"center\"))",
      "",
      "      # Apply standardization to all numeric variables in",
      "      # train_data and test_data",
      "      train_data <- stats::predict(preprocess_params, train_data)",
      "      test_data <- stats::predict(preprocess_params, test_data)",
      "",
      "      # Extract parameters and add to tibble",
      "      tidy_parameters <- tibble::tibble(\"Measure\" = c(\"Mean\", \"SD\")) %>%",
      "        dplyr::bind_cols(",
      "          dplyr::bind_rows(preprocess_params$mean,",
      "                           preprocess_params$std)",
      "          )",
      "",
      "      list(\"train\" = train_data,",
      "           \"test\" = test_data,",
      "           \"parameters\" = tidy_parameters)",
      "    }"
    )
  )

  expect_equal(
    head(capture.output(preprocess_functions("normalize")), 22),
    c(
      "function(train_data, test_data, formula, hyperparameters){",
      "",
      "        # Get normalization parameters from the train_data",
      "        preprocess_params <- caret::preProcess(train_data,",
      "                                               method = c(\"range\"),",
      "                                               rangeBounds = c(0,1))",
      "",
      "        # Apply normalization to all numeric variables in",
      "        # train_data and test_data",
      "        train_data <- stats::predict(preprocess_params, train_data)",
      "        test_data <- stats::predict(preprocess_params, test_data)",
      "",
      "        # Extract parameters and add to tibble",
      "        tidy_parameters <- tibble::tibble(\"Measure\" = c(\"Min\", \"Max\")) %>%",
      "          dplyr::bind_cols(",
      "            dplyr::as_tibble(preprocess_params$ranges)",
      "          )",
      "",
      "        list(\"train\" = train_data,",
      "             \"test\" = test_data,",
      "             \"parameters\" = tidy_parameters)", "      }"
    )
  )
})

test_that("the expected output is returned from preprocess_functions() functions", {

  # Standardize

  # Load data and fold it
  xpectr::set_test_seed(1)
  partitions <- groupdata2::partition(participant.scores,
    p = 0.75,
    cat_col = "diagnosis",
    id_col = "participant",
    list_out = FALSE
  ) %>%
    dplyr::mutate(
      diagnosis = as.numeric(diagnosis),
      notAVar = 1
    ) %>%
    dplyr::ungroup()
  train_set <- partitions[partitions[[".partitions"]] == 1, ]
  test_set <- partitions[partitions[[".partitions"]] == 2, ]

  # Standardize with preprocess_fn
  standardize_fn <- preprocess_functions("standardize")
  standardized_by_fn <- standardize_fn(
    train_data = train_set, test_data = test_set,
    # notAVar is just to avoid changing the unit tests below
    # when diagnosis wouldn't be standardized
    formula = as.formula("notAVar ~ ."),
    hyperparameters = NULL
  )

  # Standardize manually
  train_mean_age <- mean(train_set$age)
  train_sd_age <- sd(train_set$age)
  train_mean_score <- mean(train_set$score)
  train_sd_score <- sd(train_set$score)
  train_mean_session <- mean(train_set$session)
  train_sd_session <- sd(train_set$session)
  train_mean_diagnosis <- mean(train_set$diagnosis)
  train_sd_diagnosis <- sd(train_set$diagnosis)

  standardized_train <- train_set
  standardized_test <- test_set
  standardized_train[["age"]] <- (standardized_train[["age"]] - train_mean_age) / train_sd_age
  standardized_test[["age"]] <- (standardized_test[["age"]] - train_mean_age) / train_sd_age
  standardized_train[["score"]] <- (standardized_train[["score"]] - train_mean_score) / train_sd_score
  standardized_test[["score"]] <- (standardized_test[["score"]] - train_mean_score) / train_sd_score
  standardized_train[["session"]] <- (standardized_train[["session"]] - train_mean_session) / train_sd_session
  standardized_test[["session"]] <- (standardized_test[["session"]] - train_mean_session) / train_sd_session
  standardized_train[["diagnosis"]] <- (standardized_train[["diagnosis"]] - train_mean_diagnosis) / train_sd_diagnosis
  standardized_test[["diagnosis"]] <- (standardized_test[["diagnosis"]] - train_mean_diagnosis) / train_sd_diagnosis

  # Test
  expect_equal(standardized_by_fn[["train"]], standardized_train)
  expect_equal(standardized_by_fn[["test"]], standardized_test)
  expect_equal(
    standardized_by_fn[["parameters"]]$Measure,
    c("Mean", "SD")
  )

  expect_equal(standardized_by_fn[["parameters"]]$age,
    c(train_mean_age, train_sd_age),
    tolerance = 1e-4
  )
  expect_equal(standardized_by_fn[["parameters"]]$age,
    c(29.2857142857143, 7.93185260290972),
    tolerance = 1e-4
  )

  expect_equal(standardized_by_fn[["parameters"]]$diagnosis,
    c(0.571428571428571, 0.50709255283711),
    tolerance = 1e-4
  )
  expect_equal(standardized_by_fn[["parameters"]]$diagnosis,
    c(train_mean_diagnosis, train_sd_diagnosis),
    tolerance = 1e-4
  )

  expect_equal(standardized_by_fn[["parameters"]]$score,
    c(40.2857142857143, 19.4220051929322),
    tolerance = 1e-4
  )
  expect_equal(standardized_by_fn[["parameters"]]$score,
    c(train_mean_score, train_sd_score),
    tolerance = 1e-4
  )

  expect_equal(standardized_by_fn[["parameters"]]$session,
    c(2, 0.836660026534076),
    tolerance = 1e-4
  )
  expect_equal(standardized_by_fn[["parameters"]]$session,
    c(train_mean_session, train_sd_session),
    tolerance = 1e-4
  )

  # Normalize

  # Normalize with preprocess_fn
  normalize_fn <- preprocess_functions("range")
  normalized_by_fn <- normalize_fn(
    train_data = train_set, test_data = test_set,
    formula = as.formula("notAVar ~ ."),
    hyperparameters = NULL
  )

  # Normalize manually
  train_min_age <- min(train_set[["age"]])
  train_max_age <- max(train_set[["age"]])
  train_min_diagnosis <- min(train_set[["diagnosis"]])
  train_max_diagnosis <- max(train_set[["diagnosis"]])
  train_min_score <- min(train_set[["score"]])
  train_max_score <- max(train_set[["score"]])
  train_min_session <- min(train_set[["session"]])
  train_max_session <- max(train_set[["session"]])

  minMaxScaler <- function(x, min__, max__) {
    (x - min__) / (max__ - min__)
  }

  normalized_train_set <- train_set
  normalized_test_set <- test_set
  normalized_train_set[["age"]] <- minMaxScaler(
    normalized_train_set[["age"]],
    train_min_age, train_max_age
  )
  normalized_train_set[["diagnosis"]] <- minMaxScaler(
    normalized_train_set[["diagnosis"]],
    train_min_diagnosis, train_max_diagnosis
  )
  normalized_train_set[["score"]] <- minMaxScaler(
    normalized_train_set[["score"]],
    train_min_score, train_max_score
  )
  normalized_train_set[["session"]] <- minMaxScaler(
    normalized_train_set[["session"]],
    train_min_session, train_max_session
  )
  normalized_test_set[["age"]] <- minMaxScaler(
    normalized_test_set[["age"]],
    train_min_age, train_max_age
  )
  normalized_test_set[["diagnosis"]] <- minMaxScaler(
    normalized_test_set[["diagnosis"]],
    train_min_diagnosis, train_max_diagnosis
  )
  normalized_test_set[["score"]] <- minMaxScaler(
    normalized_test_set[["score"]],
    train_min_score, train_max_score
  )
  normalized_test_set[["session"]] <- minMaxScaler(
    normalized_test_set[["session"]],
    train_min_session, train_max_session
  )

  # Test
  expect_identical(normalized_by_fn[["train"]], normalized_train_set)
  expect_identical(normalized_by_fn[["test"]], normalized_test_set)
  expect_equal(
    normalized_by_fn[["parameters"]]$Measure,
    c("Min", "Max")
  )
  expect_equal(normalized_by_fn[["parameters"]]$age,
    c(20, 43),
    tolerance = 1e-4
  )
  expect_equal(normalized_by_fn[["parameters"]]$diagnosis,
    c(0, 1),
    tolerance = 1e-4
  )
  expect_equal(normalized_by_fn[["parameters"]]$score,
    c(10, 81),
    tolerance = 1e-4
  )
  expect_equal(normalized_by_fn[["parameters"]]$session,
    c(1, 3),
    tolerance = 1e-4
  )
})

# TODO test the other example function functions

Try the cvms package in your browser

Any scripts or data that you put into this service are public.

cvms documentation built on Sept. 11, 2024, 6:22 p.m.