tests/testthat/test-recodeMissings.R

x <- data.frame(
  A = declared(
    c(1:5, -92),
    labels = c(Good = 1, Bad = 5, NR = -92),
    na_values = -92
  ),
  B = labelled(
    c(1:5, haven::tagged_na('a')),
    labels = c(DK = haven::tagged_na('a'))
  ),
  C = declared(
    c(1, -91, 3:5, -92),
    labels = c(DK = -91, NR = -92),
    na_values = c(-91, -92)
  ),
  D = declared(
    c(1, -91, 3:5, -92),
    labels = c(DK = -91, NR = -92),
    na_range = c(-99, -91)
  ),
  E = declared(
     c(letters[1:5], -92),
    labels = c(Good = "a", Bad = "e", NR = -92),
    na_values = -92
  ),
  F = declared( # range with a value to the right side not in the dictionary
    c(1, -95, 3:5, -94),
    labels = c(DK = -91, NR = -92),
    na_range = c(-95, -93)
  ),
  G = declared( # range with both values not in the dictionary
    c(1, -91, 3:5, -92),
    labels = c(DK = -91, NR = -92),
    na_range = c(-99, -90)
  )
)

x2 <- x
# erase information about declared missing values
declared::missing_values(x2$A) <- NULL
declared::missing_values(x2$C) <- NULL
declared::missing_range(x2$D) <- NULL
declared::missing_range(x2$E) <- NULL
declared::missing_range(x2$F) <- NULL
declared::missing_range(x2$G) <- NULL

xspss <- recodeMissings(x, to = "SPSS")
xstata <- recodeMissings(x, to = "Stata")

xsas <- recodeMissings(x2, to = "SAS") # actually the same thing as "Stata"
attr(xsas, "dictionary") <- NULL

xchar <- data.frame(
  A = declared(
    letters[1:5],
    labels = c(Good = "a", Bad = "e")
  )
)

manymissings <- seq(-950, -91)
xmany <- data.frame(
  A = declared(
    c(1:5, manymissings),
    labels = c(Good = 1, Bad = 5, setNames(manymissings, paste0("NR", manymissings))),
    na_range = c(-950, -91)
  ),
  B = labelled(
    c(sample(1:5, length(manymissings) + 4, replace = TRUE), haven::tagged_na('a')),
    labels = c(DK = haven::tagged_na('a'))
  )
)

test_that("recodeMissings() works", {
  expect_error(recodeMissings(1:5))
  expect_error(recodeMissings(data.frame(A = 1:5)))
  expect_error(recodeMissings(subset(xmany, select = "A"), to = "Stata"))
  expect_error(recodeMissings(xmany, to = "Stata"))
  expect_equal(recodeMissings(xchar, to = "SAS"), xchar)
  expect_equal(recodeMissings(xchar, to = "SPSS"), xchar)
  expect_equal(xsas, x2)
  expect_true(is.na(xspss$B[6]))
  expect_equal(haven::na_tag(xstata$A[6]), "b")
})

test_that("variables get declared NAs if they are mixed SPSS / Stata types", {
  expect_false(admisc::anyTagged(xspss))
})

test_that("variables get tagged NAs if they are mixed SPSS / Stata types", {
  # except variable E which is character
  expect_true(all(sapply(xstata[, -5], admisc::anyTagged)))
})

test_that("variables get tagged NAs when they are all SPSS-type", {
  onlyspss <- recodeMissings(x[, -c(2, 5)], to = "Stata")
  expect_true(all(sapply(onlyspss, admisc::anyTagged)))
})

test_that("a dictionary is produced from the missing codes in the data", {
  dictionary <- recodeMissings(x, to = "Stata", return_dictionary = TRUE)
  expect_true(is.data.frame(dictionary))
})

test_that("missing values in the data, not present in a dictionary, trigget error", {
  dictionary <- data.frame(label = "DK", code = 999, new = -91)
  expect_error(recodeMissings(x, dictionary = dictionary))
})

test_that("range of values with infinite bounds are recalculated", {
  # see test-convert.R for the structure of the object dfm
  rdfm <- recodeMissings(dfm)
  expect_equal(declared::missing_range(dfm$plusinf), c(91, Inf))
  expect_equal(declared::missing_range(rdfm$plusinf), c(-Inf, -92))
  rdfm <- recodeMissings(dfm, start = 999)
  expect_equal(declared::missing_range(rdfm$plusinf), c(1000, Inf))
})

test_that("recoding works for both declared and haven", {
  hxspss <- recodeMissings(x, to_declared = FALSE)
  expect_equal(dim(xspss), dim(hxspss))
})

test_that("missing ranges are reconstructed if not present in the dictionary", {
  expect_equal(declared::missing_range(xspss$F), c(-94, -92))
  expect_equal(
    diff(declared::missing_range(x$F)),
    diff(declared::missing_range(xspss$F))
  )
  expect_equal(declared::missing_range(xspss$G), c(-94, -91))
})

Try the DDIwR package in your browser

Any scripts or data that you put into this service are public.

DDIwR documentation built on Oct. 1, 2023, 5:06 p.m.