tests/testthat/test-merge_samples.R

context("test-merge_samples")

library(dplyr)
library(optimall)

# Make multiwave_object
test <- multiwave(phases = 2, waves = c(1, 3))

set.seed <- 345
iris <- data.frame(
  id = c(1:60),
  Species = rep(c("A", "B", "C"), times = 20),
  Sepal.Length = rnorm(60, 3, 0.7)
)
iris$Sepal.Width <- iris$Sepal.Length + rnorm(60, 0, 0.5)

set_mw(test, phase = 1, slot = "data") <-
  dplyr::select(iris, -Sepal.Width)

set_mw(test, phase = 2, slot = "metadata") <- list(
  strata = "Species",
  design_strata = "strata",
  id = "id",
  n_allocated = "n_to_sample"
)


set_mw(test, phase = 2, wave = 1, slot = "design") <-
  data.frame(
    strata = unique(iris$Species),
    n_to_sample = c(5, 5, 5),
    probs = c(.25, .25, .25)
  )

set.seed(0123)
test <- apply_multiwave(test,
  phase = 2,
  wave = 1, "sample_strata", probs = "probs"
) # get samples

samples <- get_mw(test, phase = 2, wave = 1, slot = "samples")$ids

set_mw(test, phase = 2, wave = 1, slot = "sampled_data") <-
  dplyr::select(iris, id, Sepal.Width)[samples, ]

# Testing Wave 1
test <- merge_samples(test,
  phase = 2, wave = 1, id = "id", include_probs = TRUE
)


test_that("merge_samples merges data with sampled data
          when column doesn't already exist", {
  expect_equal(
    dim(test@phases$phase2@waves$wave1@data),
    c(60, 7)
  )
  expect_equal(length(
    test@phases$phase2@waves$wave1@data$`sampled_phase2`[
      test@phases$phase2@waves$wave1@data$`sampled_phase2` == 1
    ]
  ), 15)
  expect_equal(length(
    test@phases$phase2@waves$wave1@data$`sampled_phase2`[
      !is.na(test@phases$phase2@waves$wave1@data$Sepal.Width)
    ]
  ), 15)
})

# Testing Wave 2
samples2 <- sample(c(1:60)[-as.numeric(samples)], 15)
# No duplicates
set_mw(test, phase = 2, wave = 2, slot = "samples") <-
  list(ids = samples2) # No duplicates
set_mw(test, phase = 2, wave = 2, slot = "sampled_data") <-
  dplyr::select(iris, id, Sepal.Width)[samples2, ]
test1 <- merge_samples(test,
  phase = 2, wave = 2, id = "id"
)

test_that("merge_samples merges data with sampled data
          when column already exists", {
  expect_equal(
    dim(test1@phases$phase2@waves$wave1@data),
    c(60, 7)
  )
  expect_equal(length(
    test1@phases$phase2@waves$wave2@data$`sampled_phase2`[
      test1@phases$phase2@waves$wave2@data$`sampled_phase2` == 1
    ]
  ), 30)
  expect_equal(length(
    test1@phases$phase2@waves$wave2@data$`sampled_phase2`[
      !is.na(test1@phases$phase2@waves$wave2@data$Sepal.Width)
    ]
  ), 30)
})

test_that("warning if id already has value for what has been sampled", {
  temp <- test1
  set_mw(temp, phase = 2, wave = 2, slot = "sampled_data") <-
    dplyr::select(iris, id, Sepal.Width)[samples2 + as.numeric(samples[1]), ]
  # Make dup
  set_mw(temp, phase = 2, wave = 2, slot = "samples") <-
    list(ids = c(samples2, samples[1])) # Make dup
  expect_warning(
    merge_samples(temp,
      phase = 2, wave = 2, id = "id"
    ),
    "have non-NA values already"
  )
})

test_that("warning if new ids are in sampled data", {
  temp <- test1
  set_mw(temp, phase = 2, wave = 2, slot = "sampled_data") <-
    rbind(
      dplyr::select(iris, id, Sepal.Width)[samples2, ],
      c(61, 3.5)
    )
  # Make dup
  set_mw(temp, phase = 2, wave = 2, slot = "samples") <-
    list(ids = c(samples2, "61")) # Make dup
  expect_warning(
    temp <- merge_samples(temp,
      phase = 2, wave = 2, id = "id"
    ),
    "sampled_data is introducing new ids"
  )
  # temp <- merge_samples(temp, phase = 2, wave = 2, id = "id")
  expect_equal(nrow(get_mw(temp, 2, 2, "data")), 61) # one extra row
})

test_that("error if one of wave's 'samples' slots is empty, but
          design_data still has sampled", {
  temp <- test1
  set_mw(temp, phase = 2, wave = 2, slot = "sampled_data") <-
    dplyr::select(iris, id, Sepal.Width)[samples2, ]
  # Make dup
  set_mw(temp, phase = 2, wave = 2, slot = "samples") <-
    list() # forget to specify samples
  expect_warning(
    temp <- merge_samples(temp,
      phase = 2, wave = 2, id = "id"),
    "slots of waves in this phase are"
  )
  set_mw(temp, phase = 2, wave = 2, slot = "samples") <-
    list(ids = c(3,4,5)) # specify wrong samples
  expect_warning(
    temp <- merge_samples(temp,
                          phase = 2, wave = 2, id = "id"),
    "they do not match"
  )
})

test_that("arguments can be specified in metadata", {
  temp <- test1
  set_mw(temp, phase = NA, wave = NA, "metadata") <- list(
    id = "id", include_probs = FALSE
    )
  set_mw(temp, phase = 2, wave = 2, slot = "samples") <-
    list(ids = samples2)
  temp <- merge_samples(temp, phase = 2, wave = 2)
  expect_equal(dim(get_mw(temp, 2, 2, "data")), c(60, 8))
  expect_equal(length(dplyr::filter(get_mw(temp, 2, 2, "data"),
                                    !is.na(sampling_prob))$id),
               15)
  expect_equal(length(dplyr::filter(get_mw(temp, 2, 2, "data"),
                                    sampled_phase2 == 1)$id),
                      30)
  expect_equal(length(dplyr::filter(get_mw(temp, 2, 2, "data"),
                                    sampled_wave2.1 == 1)$id) +
                 length(dplyr::filter(get_mw(temp, 2, 2, "data"),
                                      sampled_wave2.2 == 1)$id),
               30)

  expect_equivalent(get_mw(temp, 2, 2, "data")$sampled_phase2,
                    get_mw(temp, 2, 2, "data")$sampled_wave2.1 +
                    get_mw(temp, 2, 2, "data")$sampled_wave2.2)
  temp <- test1
  set_mw(temp, phase = 2, wave = 2, "metadata") <- list(
    id = "id",
    phase_sample_ind = "already_sampled_phase2_ind"
  )
  temp <- merge_samples(temp, phase = 2, wave = 2)
  expect_equal(dim(get_mw(temp, 2, 2, "data")), c(60, 9))
  temp <- test1
  set_mw(temp, phase = 2, wave = NA, "metadata") <- list(
    id = "id",
    phase_sample_ind = "already_sampled2_phase2_ind"
  )
  temp <- merge_samples(temp, phase = 2, wave = 2)
  expect_equal(dim(get_mw(temp, 2, 2, "data")), c(60, 9))
  expect_true("already_sampled2_phase2_ind2" %in% names(get_mw(temp,
                                                            2, 2, "data")))
})

test_that("Errors as necessary", {
  temp <- test1
  set_mw(temp, phase = 2, wave = 2, slot = "sampled_data") <-
    dplyr::select(iris, id, Sepal.Width)[samples2, ]
  # Make dup
  set_mw(temp, phase = 2, wave = 2, slot = "samples") <-
    list(ids = NULL) # forget to specify samples
  expect_error(
    merge_samples(temp,
      phase = 0, wave = 2, id = "id"),
    "must be a numeric value specifying a valid phase"
  )
  expect_error(
    merge_samples(temp,
      phase = 2, wave = 5, id = "id"),
    "must be a numeric value specifying a valid wave"
  )
  expect_error(
    merge_samples(temp,
      phase = 2, wave = 2, id = "id",
      phase_sample_ind = 3
    ),
    "must be a character value"
  )
  expect_error(
    merge_samples(temp,
                  phase = 2, wave = 2, id = "id",
                  wave_sample_ind = 3
    ),
    "must be FALSE or a character value"
  )
  expect_error(
    merge_samples(temp,
      phase = 2, wave = 2, id = "wrong",
      phase_sample_ind = 3
    ),
    "must be a character value"
  )
  expect_error(
    merge_samples(temp,
                  phase = 2, wave = 2, id = "id",
                  include_probs = 3
    ),
    "must be TRUE, FALSE"
  )
})

test_that("New wave and phase column names work", {
  temp <- test1
  set_mw(temp, phase = 2, wave = 2, slot = "sampled_data") <-
    dplyr::select(iris, id, Sepal.Width)[samples2, ]

  temp <- merge_samples(temp,
                  phase = 2, wave = 2, id = "id",
                  wave_sample_ind = FALSE, include_probs = FALSE)
  expect_false("sampled_wave2" %in% names(get_mw(temp, phase = 2, wave = 2)))
  temp <- merge_samples(temp,
                        phase = 2, wave = 2, id = "id",
                        wave_sample_ind = "mytestwave",
                        phase_sample_ind = "mytestphase"
                        )
  expect_equal(sort(dplyr::filter(get_mw(temp, phase = 2, wave = 2),
                             mytestphase2 == 1)$id),
               sort(c(
                 get_mw(temp, phase = 2, wave = 1, slot = "samples")$id,
                 get_mw(temp, phase = 2, wave = 2, slot = "samples")$id
               )))
  expect_equivalent(sort(dplyr::filter(get_mw(temp, phase = 2, wave = 2),
                                          mytestwave2.2 == 1)$id),
               sort(get_mw(temp, phase = 2, wave = 2, slot = "samples")$ids))

})

# Set up include_probs
temp <- test1
temp <- apply_multiwave(temp, phase = 2, wave = 2,
                        fun = "allocate_wave", id = "id",
                        y = "Sepal.Width",
                        already_sampled = "sampled_phase2",
                        allocation_method = "WrightII",
                        nsample = 15)
set_mw(temp, phase = 2, wave = 2, slot = "design") <-
  get_mw(temp, phase = 2, wave = 2, slot = "design") %>%
  dplyr::mutate(probs = n_to_sample/(npop-nsample_prior))
temp <- apply_multiwave(temp, phase = 2, wave = 2,
                        fun = "sample_strata", id = "id",
                        already_sampled = "sampled_phase2",
                        probs = "probs")
set_mw(temp, phase = 2, wave = 2, slot = "sampled_data") <-
  dplyr::select(iris, id, Sepal.Width)[get_mw(temp, 2, 2, "samples")$ids, ]

test_that("include_probs works", {
  temp <- merge_samples(temp,
                        phase = 2, wave = 2, id = "id",
                        include_probs = TRUE)
  expect_true("sampled_wave2.2" %in% names(get_mw(temp, phase = 2, wave = 2)))
  expect_true("sampling_prob" %in% names(get_mw(temp, phase = 2, wave = 2)))
  new_data <- dplyr::filter(get_mw(temp, phase = 2, wave = 2),
                            sampled_wave2.2 == 1)
  if(
    length(unique(new_data$sampling_prob)) ==
    length(unique(get_mw(temp, phase = 2, wave = 2, "design")$Species))){
  expect_equal(as.numeric(table(new_data$Species)), get_mw(temp, phase = 2,
                                              wave = 2, "design")$n_to_sample)
  expect_equal(sort(as.numeric(table(new_data$sampling_prob))),
               sort(get_mw(temp, phase = 2, wave = 2, "design")$n_to_sample))
  }
  expect_equal(sort(new_data$sampling_prob),
               sort(get_mw(temp, phase = 2, wave = 2, "samples")$probs))
})

test_that("include_probs arguments can be specified in metadata", {
  temp <- merge_samples(temp,
                        phase = 2, wave = 2, id = "id",
                        include_probs = TRUE)
  set_mw(temp, phase = NA, wave = NA, "metadata") <- list(
    include_probs = FALSE
  )
  temp <- merge_samples(temp,
                        phase = 2, wave = 2, id = "id")
  expect_equal(nrow(dplyr::filter(get_mw(temp, 2, 2), !is.na(sampling_prob))),
               15)
  set_mw(temp, phase = 2, wave = NA, "metadata") <- list(
    include_probs = TRUE
  )

  temp <- merge_samples(temp,
                        phase = 2, wave = 2, id = "id")
  expect_equal(nrow(dplyr::filter(get_mw(temp, 2, 2), !is.na(sampling_prob))), 30)

  set_mw(temp, phase = 2, wave = 2, "metadata") <- list(
    include_probs = TRUE
  )

  temp <- merge_samples(temp,
                        phase = 2, wave = 2, id = "id")
  expect_equal(nrow(dplyr::filter(get_mw(temp, 2, 2), !is.na(sampling_prob))), 30)
})

test_that("include_probs correctly overwrites sampling_prob if necessary", {
  set_mw(temp, phase = 2, wave = 1) <-
    get_mw(temp, phase = 2, wave = 1) %>%
    dplyr::mutate(sampling_prob = 0.0001)
  temp <- merge_samples(temp,
                        phase = 2, wave = 2, id = "id",
                        include_probs = TRUE)
  expect_equal(nrow(dplyr::filter(get_mw(temp, 2, 2), !is.na(sampling_prob))),
               60)
  expect_equal(nrow(dplyr::filter(get_mw(temp, 2, 2), sampling_prob == 0.0001)),
               45)
})

test_that("include_probs warns if no probs in samples slot", {

  temp <- apply_multiwave(temp, phase = 2, wave = 2,
                          fun = "sample_strata", id = "id", strata = "Species",
                          already_sampled = "sampled_phase2",
                          probs = NULL)
  set_mw(temp, phase = 2, wave = 2, slot = "sampled_data") <-
    dplyr::select(iris, id, Sepal.Width)[get_mw(temp, 2, 2, "samples")$ids, ]
  expect_warning(merge_samples(temp,
                        phase = 2, wave = 2, id = "id",
                        include_probs = TRUE), "include_probs is TRUE")

})

Try the optimall package in your browser

Any scripts or data that you put into this service are public.

optimall documentation built on June 22, 2024, 9:34 a.m.