tests/testthat/test-nmr-data-analysis.R

## Prepare demo dataset
prepare_dataset <- function() {
  # 12 artificial samples created based on the 3 demo samples
  MeOH_plasma_extraction_dir <- system.file("dataset-demo", package = "AlpsNMR")
  MeOH_plasma_extraction_xlsx <- file.path(MeOH_plasma_extraction_dir, "dummy_metadata.xlsx")
  exp_subj_id <- readxl::read_excel(MeOH_plasma_extraction_xlsx, sheet = 1)
  subj_id_age <- readxl::read_excel(MeOH_plasma_extraction_xlsx, sheet = 2)
  zip_files <- fs::dir_ls(MeOH_plasma_extraction_dir, glob = "*.zip")
  
  dataset <- nmr_read_samples(sample_names = zip_files)
  dataset <- nmr_meta_add(dataset, metadata = exp_subj_id, by = "NMRExperiment")
  dataset <- nmr_meta_add(dataset, metadata = subj_id_age, by = "SubjectID")
  ppm_res <- nmr_ppm_resolution(dataset)[[1]]
  dataset <- nmr_interpolate_1D(dataset, axis = c(min = 3.7, max = 4.5, by = 2.3E-4))
  dataset <- nmr_baseline_removal(dataset, lambda = 6, p = 0.01)
  dataset <- nmr_normalize(dataset, method = "area")
  metadata <- nmr_meta_get(dataset, groups = "external")
  metadata$Group <- c("A", "B", "B")
  dataset <- nmr_meta_add(dataset, metadata[,c("NMRExperiment", "Group")])
  # Artificially create a larger dataset
  larger_metadata <- rbind(metadata, metadata, metadata, metadata, metadata)
  
  larger_metadata$NMRExperiment <- as.character(
    seq(from = 10, by = 10, length.out = nrow(larger_metadata))
  )
  dataset <- new_nmr_dataset_1D(
    ppm_axis = dataset$axis,
    data_1r = rbind(dataset$data_1r, dataset$data_1r, dataset$data_1r, dataset$data_1r, dataset$data_1r),
    metadata = list(external = larger_metadata)
  )
  dataset
}

## Dataset can be used

test_that("nmr_data_analysis works", {
  dataset <- prepare_dataset()
  methodology <- plsda_auroc_vip_method(ncomp = 2)
  out <- nmr_data_analysis(
    dataset,
    y_column = "Group",
    identity_column = NULL,
    external_val = list(iterations = 1, test_size = 0.25),
    internal_val = list(iterations = 2, test_size = 0.25),
    data_analysis_method = methodology
  )
  expect_false(is.null(out))
})

test_that("random subsampling works", {
  subject_id <- rep(c("Alice", "Bob", "Charlie", "Diana"), times = 2)
  replicate <- rep(c(1,2), each = 4)
  set.seed(2563432L)
  sample_idx <- 1:8
  num_iterations <- 2L
  out <- random_subsampling(sample_idx, iterations = num_iterations, test_size = 0.25,
                            keep_together = subject_id)
  expect_equal(length(out), num_iterations)
  expect_equal(length(out[[1]][["training"]]), 6L)
  expect_equal(length(out[[1]][["test"]]), 2L)
  # Subjects kept together in the split, no subject in train is present in test:
  expect_equal(
    length(
      intersect(
        subject_id[out[[1]][["test"]]],
        subject_id[out[[1]][["training"]]]
      )
    ),
    0L
  )
})

test_that("split_double_cv works", {
  nsamples <- 16L
  subject_id <- rep(c("Alice", "Bob", "Charlie", "Diana"), times = 4)
  replicate <- rep(c(1,2), each = 8)
  metadata <- data.frame(
    NMRExperiment = as.character(seq(from = 10, by = 10, length.out = nsamples)),
    SubjectID = subject_id,
    Replicate = replicate,
    stringsAsFactors = FALSE
  )
  dataset <- new_nmr_dataset_1D(
    ppm_axis = 1:10,
    data_1r = matrix(sample(1:200, 10*nsamples), ncol = 10, nrow = nsamples),
    metadata = list(external = metadata)
  )
  
  external_val_niter <- 2L
  internal_val_niter <- 4L
  external_test_size <- 0.25
  internal_test_size <- 0.34
  out <- AlpsNMR:::split_double_cv(
    dataset = dataset,
    keep_together = "SubjectID",
    external_val = list(iterations = external_val_niter, test_size = external_test_size),
    internal_val = list(iterations = internal_val_niter, test_size = internal_test_size)
  )
  
  expect_equal(names(out), c("outer", "inner"))
  expect_equal(length(out[["outer"]]), external_val_niter)
  expect_equal(length(out[["inner"]]), external_val_niter*internal_val_niter)
  expected_samples_in_external_test <- floor(nsamples*external_test_size)
  expected_samples_in_train <- nsamples - expected_samples_in_external_test
  expected_samples_in_train_internal_test <- floor(expected_samples_in_train*internal_test_size)
  expected_samples_in_train_internal_train <- expected_samples_in_train - expected_samples_in_train_internal_test
  
  expect_equal(length(out$inner$`1_1`$inner_train_idx),
               expected_samples_in_train_internal_train)
})

Try the AlpsNMR package in your browser

Any scripts or data that you put into this service are public.

AlpsNMR documentation built on April 1, 2021, 6:02 p.m.