tests/testthat/test_extraction.R

context("Extraction")

test_that("query codes having downloaded surveys", {
  testthat::skip_on_cran()
  skip_if_no_auth()

  cli <- new_rand_client()

  # create availbale surveys
  survs <- cli$available_datasets()

  # check rds only for one survey
  downloads <- cli$get_datasets(
    dataset_filenames = "AOBR62DT.ZIP", download_option = "r"
  )
  downloads <- cli$get_datasets(
    dataset_filenames = "AOBR62SV.ZIP", download_option = "r",
    all_lower = FALSE, reformat = TRUE
  )

  ## QUESTIONS AND VARIABLE TESTS

  # create questions
  quest <- cli$survey_questions(
    dataset_filenames = "AOBR62DT.ZIP",
    search_terms = c("fever", "malaria", "test")
  )

  # check the regeex option
  quest <- cli$survey_questions(
    dataset_filenames = "AOBR62DT.ZIP", regex = c("fever|test")
  )

  # cehck for message on both
  quest <- cli$survey_questions(
    dataset_filenames = "AOBR62DT.ZIP",
    regex = c("fever|test"),
    search_terms = c("fever|test")
  )

  # check the essetial terms option
  quest <- cli$survey_questions(
    dataset_filenames = "AOBR62DT.ZIP",
    search_terms = c("fever|test"),
    essential_terms = "malaria"
  )


  # check the same with an uppercase survey, one variable that is na essential
  quest <- cli$survey_variables(
    dataset_filenames = "AOBR62SV.ZIP", variables = "hml32",
    essential_variables = c("hml35", "h32n"),
    reformat = TRUE
  )

  # check variable
  quest <- cli$survey_questions(
    dataset_filenames = "AOBR62DT.ZIP",
    search_terms = c("fever|test")
  )


  ####

  # extract the data
  extract <- cli$extract(quest, add_geo = T)

  # extract the qeustions
  extract_neat <- rdhs:::extract_codes_to_descriptions(extract, quest)

  ## and repeat for sruveys that have no geo

  # check rds only for one survey
  downloads <- cli$get_datasets(
    dataset_filenames = "ZWHR31SV.ZIP",
    download_option = "r"
  )
  r <- readRDS(downloads$ZWHR31SV)
  r1 <- data_and_labels(r)
  r2 <- data_and_labels(downloads$ZWHR31SV)
  expect_identical(r1, r2)
  expect_error(data_and_labels("twaddle"))

  # create questions for a regex and non
  quest <- cli$survey_questions(
    dataset_filenames = "ZWHR31SV.ZIP",
    regex = c("Has refrigerator", "fever")
  )
  quest <- cli$survey_questions(
    dataset_filenames = "ZWHR31SV.ZIP",
    search_terms = c("Has refrigerator")
  )

  # extract the data
  extract <- cli$extract(quest, add_geo = T)
  expect_identical(extract$ZWHR31SV$LATNUM[1], NA)

  # and repreat for reformatted ones
  quest <- cli$survey_questions(
    dataset_filenames = "AOBR62DT.ZIP",
    search_terms = "malaria", reformat = TRUE
  )
  extract <- cli$extract(quest, add_geo = T)
  unlink(cli$get_root())
})

test_that("surveyId in extract", {
  skip_if_no_auth()
  testthat::skip_on_cran()
  cli <- new_rand_client()

  dat <- cli$survey_variables(
    dataset_filenames = "ZWHR31SV.ZIP",
    variables = "hv024"
  )
  expect_identical(names(dat)[5], "survey_id")

  extract <- cli$extract(dat)
  unlink(cli$get_root())
})



test_that("rbind_labelled", {
  testthat::skip_on_cran()
  skip_if_no_auth()

  # create auth through whichever route is valid for the environment
  cli <- new_rand_client()

  # get some datasets
  d <- cli$get_datasets(c("AOBR62FL.ZIP", "BJBR41FL.ZIP"))

  quest <- cli$survey_variables(c("AOBR62FL.ZIP", "BJBR41FL.ZIP"),
    variables = c("v024", "v130")
  )

  extract <- cli$extract(quest, add_geo = TRUE)

  expect_warning(rbind_labelled(extract))

  dat <- rbind_labelled(extract, labels = list(
    "v024" = "concatenate",
    "v130" = "concatenate"
  ))

  dat <- rbind_labelled(extract,
    labels = list("v024" = "concatenate"),
    warn = FALSE
  )

  # now do it for just one variable
  quest <- cli$survey_variables(c("AOBR62FL.ZIP", "BJBR41FL.ZIP"),
    variables = c("v024")
  )
  extract <- cli$extract(quest, add_geo = FALSE)
  dat <- rbind_labelled(extract, labels = list("v024" = "concatenate"))
  expect_warning(rbind_labelled(extract))


  # now let's force it to look out for factors and reformats
  quest <- cli$survey_variables(c("AOBR62FL.ZIP", "BJBR41FL.ZIP"),
    variables = c("v024", "v130"), reformat = TRUE
  )
  extract <- cli$extract(quest, add_geo = FALSE)
  dat <- rbind_labelled(extract, labels = list("v024" = "concatenate"))

  # now for factors
  extract$AOBR62FL$v024 <- as.factor(extract$AOBR62FL$v024)
  extract$BJBR41FL$v024 <- as.factor(extract$BJBR41FL$v024)
  dat <- rbind_labelled(extract, labels = list("v024" = "concatenate"))

  expect_equal(
    as.numeric(sort(table(dat$v024))),
    as.numeric(sort(c(
      table(extract$AOBR62FL$v024),
      table(extract$BJBR41FL$v024)
    )))
  )
  unlink(cli$get_root())
})

test_that("as_factor.labelled", {

  df1 <- data.frame(
    area = haven::labelled(c(1L, 2L, 3L), c("reg 1"=1,"reg 2"=2,"reg 3"=3)),
    climate = haven::labelled(c(0L, 1L, 1L), c("cold"=0,"hot"=1))
  )

  # manually change it to the old style
  class(df1$area) <- "labelled"
  class(df1$climate) <- "labelled"

  # rdhs as_factor.labelled check
  expect_true(class(as_factor.labelled(df1$area)) == "factor")

})

test_that("add_geo issue for Kenya", {
  skip_if_no_auth()
  testthat::skip_on_cran()
  cli <- new_rand_client()

  dat <- cli$survey_variables(
    dataset_filenames = "KEBR72FL.ZIP",
    variables = "v024"
  )

  extract <- cli$extract(dat, add_geo = TRUE)
  expect_true(length(unique(extract$KEBR72FL$DHSREGNA))>1)
  unlink(cli$get_root())
})
OJWatson/rdhs documentation built on April 4, 2024, 10:46 a.m.