tests/testthat/test-merge_samples.R

context("test-merge_samples")

library(dplyr)
library(optimall)

# Make multiwave_object
test <- new_multiwave(phases = 2, waves = c(1, 3))

set.seed <- 345
iris <- data.frame(
  id = c(1:60),
  Species = rep(c("A", "B", "C"), times = 20),
  Sepal.Length = rnorm(60, 3, 0.7)
)
iris$Sepal.Width <- iris$Sepal.Length + rnorm(60, 0, 0.5)

get_data(test, phase = 1, slot = "data") <-
  dplyr::select(iris, -Sepal.Width)

get_data(test, phase = 2, slot = "metadata") <- list(
  strata = "Species",
  design_strata = "strata",
  id = "id",
  n_allocated = "n_to_sample"
)


get_data(test, phase = 2, wave = 1, slot = "design") <-
  data.frame(
    strata = unique(iris$Species),
    n_to_sample = c(5, 5, 5)
  )

set.seed(123)
test <- apply_multiwave(test,
  phase = 2,
  wave = 1, "sample_strata"
) # get samples

samples <- get_data(test, phase = 2, wave = 1, slot = "samples")

get_data(test, phase = 2, wave = 1, slot = "sampled_data") <-
  dplyr::select(iris, id, Sepal.Width)[samples, ]

# Testing Wave 1
test <- merge_samples(test,
  phase = 2, wave = 1, id = "id",
  sampled_ind = "already_sampled_ind"
)


test_that("merge_samples merges data with sampled data
          when column doesn't already exist", {
  expect_equal(
    dim(test@phases$phase2@waves$wave1@data),
    c(60, 5)
  )
  expect_equal(length(
    test@phases$phase2@waves$wave1@data$`already_sampled_ind`[
      test@phases$phase2@waves$wave1@data$`already_sampled_ind` == 1
    ]
  ), 15)
  expect_equal(length(
    test@phases$phase2@waves$wave1@data$`already_sampled_ind`[
      !is.na(test@phases$phase2@waves$wave1@data$Sepal.Width)
    ]
  ), 15)
})

# Testing Wave 2
samples2 <- sample(c(1:60)[-as.numeric(samples)], 15)
# No duplicates
get_data(test, phase = 2, wave = 2, slot = "samples") <-
  as.character(samples2) # No duplicates
get_data(test, phase = 2, wave = 2, slot = "sampled_data") <-
  dplyr::select(iris, id, Sepal.Width)[samples2, ]
test1 <- merge_samples(test,
  phase = 2, wave = 2, id = "id",
  sampled_ind = "already_sampled_ind"
)

test_that("merge_samples merges data with sampled data
          when column already exists", {
  expect_equal(
    dim(test1@phases$phase2@waves$wave1@data),
    c(60, 5)
  )
  expect_equal(length(
    test1@phases$phase2@waves$wave2@data$`already_sampled_ind`[
      test1@phases$phase2@waves$wave2@data$`already_sampled_ind` == 1
    ]
  ), 30)
  expect_equal(length(
    test1@phases$phase2@waves$wave2@data$`already_sampled_ind`[
      !is.na(test1@phases$phase2@waves$wave2@data$Sepal.Width)
    ]
  ), 30)
})

test_that("warning if id already has value for what has been sampled", {
  temp <- test1
  get_data(temp, phase = 2, wave = 2, slot = "sampled_data") <-
    dplyr::select(iris, id, Sepal.Width)[samples2 + as.numeric(samples[1]), ]
  # Make dup
  get_data(temp, phase = 2, wave = 2, slot = "samples") <-
    c(as.character(samples2), samples[1]) # Make dup
  expect_warning(
    merge_samples(temp,
      phase = 2, wave = 2, id = "id",
      sampled_ind = "already_sampled_ind"
    ),
    "have non-NA values already"
  )
})

test_that("warning if new ids are in sampled data", {
  temp <- test1
  get_data(temp, phase = 2, wave = 2, slot = "sampled_data") <-
    rbind(
      dplyr::select(iris, id, Sepal.Width)[samples2, ],
      c(61, 3.5)
    )
  # Make dup
  get_data(temp, phase = 2, wave = 2, slot = "samples") <-
    c(as.character(samples2), "61") # Make dup
  expect_warning(
    temp <- merge_samples(temp,
      phase = 2, wave = 2, id = "id",
      sampled_ind = "already_sampled_ind"
    ),
    "sampled_data is introducing new ids"
  )
  # temp <- merge_samples(temp, phase = 2, wave = 2, id = "id",
  # sampled_ind = "already_sampled_ind")
  expect_equal(nrow(get_data(temp, 2, 2, "data")), 61) # one extra row
})

test_that("warning if one of wave's 'samples' slots is empty", {
  temp <- test1
  get_data(temp, phase = 2, wave = 2, slot = "sampled_data") <-
    dplyr::select(iris, id, Sepal.Width)[samples2, ]
  # Make dup
  get_data(temp, phase = 2, wave = 2, slot = "samples") <-
    character(0) # forget to specify samples
  expect_warning(
    temp <- merge_samples(temp,
      phase = 2, wave = 2, id = "id",
      sampled_ind = "already_sampled_ind"
    ),
    "slots of previous waves in this phase are"
  )
})

test_that("arguments can be specified in metadata", {
  temp <- test1
  get_data(test1, phase = NA, wave = NA, "metadata") <- list(
    id = "id",
    sampled_ind = "already_sampled_ind"
  )
  temp <- merge_samples(temp, phase = 2, wave = 2)
  expect_equal(dim(get_data(temp, 2, 2, "data")), c(60, 5))
  expect_equal(
    "already_sampled_ind" %in% names(
      get_data(temp, phase = 2, wave = 2, slot = "data")
    ),
    TRUE
  )
  temp <- test1
  get_data(temp, phase = 2, wave = NA, "metadata") <- list(
    id = "id",
    sampled_ind = "already_samples_ind"
  )
  temp <- merge_samples(temp, phase = 2, wave = 2)
  expect_equal(dim(get_data(temp, 2, 2, "data")), c(60, 5))
  temp <- test1
  get_data(temp, phase = 2, wave = 2, "metadata") <- list(
    id = "id",
    sampled_ind = "already_sampled_ind"
  )
  temp <- merge_samples(temp, phase = 2, wave = 2)
  expect_equal(dim(get_data(temp, 2, 2, "data")), c(60, 5))
})

test_that("Errors as necessary", {
  temp <- test1
  get_data(temp, phase = 2, wave = 2, slot = "sampled_data") <-
    dplyr::select(iris, id, Sepal.Width)[samples2, ]
  # Make dup
  get_data(temp, phase = 2, wave = 2, slot = "samples") <-
    character(0) # forget to specify samples
  expect_error(
    merge_samples(temp,
      phase = 0, wave = 2, id = "id",
      sampled_ind = "already_sampled_ind"
    ),
    "must be a numeric value specifying a valid phase"
  )
  expect_error(
    merge_samples(temp,
      phase = 2, wave = 5, id = "id",
      sampled_ind = "already_sampled_ind"
    ),
    "must be a numeric value specifying a valid wave"
  )
  expect_error(
    merge_samples(temp,
      phase = 2, wave = 2, id = "id",
      sampled_ind = 3
    ),
    "must be a character value"
  )
  expect_error(
    merge_samples(temp,
      phase = 2, wave = 2, id = "wrong",
      sampled_ind = 3
    ),
    "must be a character value"
  )
})

Try the optimall package in your browser

Any scripts or data that you put into this service are public.

optimall documentation built on Sept. 8, 2023, 6:07 p.m.