tests/testthat/test-Impute.R

test_that("Impute.numeric", {

  # vector with missing value
  x <- 1:10
  x[3] <- NA
  # impute
  xi <- Impute(x, f_i = "i_median")

  expect_equal(xi[3], median(x, na.rm = T))
  expect_length(xi, length(x))
  expect_equal(xi[-3], x[-3])

  # check for mis-specified function (first arg is not called x)
  f_test <- function(y){y}
  expect_error(
    Impute(x, f_i = "f_test")
  )

})

# Check data frame method: imputing using mean and median, by row and col

test_that("Impute.data.frame", {

  # a df of random numbers
  X <- as.data.frame(matrix(runif(50), 10, 5))

  # introduce NAs (2 in 3 of 5 cols)
  X[sample(1:10, 2), 1] <- NA
  X[sample(1:10, 2), 3] <- NA
  X[sample(1:10, 2), 5] <- NA

  # impute X manually using col mean
  Xi1 <- X
  Xi1[is.na(Xi1[1]), 1] <- mean(Xi1[[1]], na.rm = T)
  Xi1[is.na(Xi1[3]), 3] <- mean(Xi1[[3]], na.rm = T)
  Xi1[is.na(Xi1[5]), 5] <- mean(Xi1[[5]], na.rm = T)

  # do the same in COINr
  Xic <- Impute(X, f_i = "i_mean")

  # CHECK
  expect_equal(Xi1, Xic)

  # same for rows
  Xi2 <- X

  # manual imputation of rows using median
  for(irow in 1:nrow(Xi2)){
    xrow <- as.numeric(Xi2[irow, ])
    xrow[is.na(xrow)] <- median(xrow, na.rm = T)
    Xi2[irow, ] <- xrow
  }

  # COINr
  Xic2 <- Impute(X, f_i = "i_median", impute_by = "row", normalise_first = FALSE)

  # CHECK
  expect_equal(Xi2, Xic2)

})


test_that("impute.coin", {

  # build coin
  coin <- build_example_coin(up_to = "new_coin")

  # impute raw data set using population groups
  coin <- Impute(coin, dset = "Raw", f_i = "i_mean_grp", use_group = "Pop_group", write_to = "test1")

  # get data set
  df1 <- get_dset(coin, dset = "test1", also_get = "none")

  # impute the same but semi-manually
  # first get raw data
  dfraw <- get_dset(coin, "Raw", also_get = "Pop_group")
  # only numeric cols
  dfraw_ <- dfraw[names(dfraw) %nin% c("uCode", "Pop_group")]

  # impute using same approach but we get the ingredients manually and use data.frame method
  df2 <- Impute(dfraw_, f_i = "i_mean_grp", f_i_para = list(f = dfraw$Pop_group))

  expect_equal(df1, df2)

  # test group and by-row imputation
  coin <- Impute(coin, dset = "Raw", impute_by = "row", f_i = "i_mean", group_level = 2)

  # this should have imputed using a grouped row median, normalising first. let's see
  xr <- get_data(coin, dset = "Raw", iCodes = "Physical", Level = 1, uCodes = "BGD")
  xn <- get_data(coin, dset = "Imputed", iCodes = "Physical", Level = 1, uCodes = "BGD")
  expect_mapequal(xn[names(xn) != "ConSpeed"], xr[names(xr) != "ConSpeed"])

  # have to normalise this df
  X <- get_data(coin, dset = "Raw", iCodes = "Physical", Level = 1)
  Xn <- lapply(X, function(x){
    if(is.numeric(x)){
      (x - min(x, na.rm = TRUE))/(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
    } else {
      x
    }
  })
  Xn <- as.data.frame(Xn)

  # the still-normalised imputed vector
  xn_unsc <- Xn[Xn$uCode == "BGD", ]
  # est conspeed
  xref <- rowMeans(xn_unsc[-1], na.rm = TRUE)
  xref_sc <- xref*(max(X$ConSpeed, na.rm = TRUE) - min(X$ConSpeed, na.rm = TRUE)) + min(X$ConSpeed, na.rm = TRUE)

  # FINALLY the test
  expect_equal(xn$ConSpeed, as.numeric(xref_sc))

  # also test no imputation
  coin <- Impute(coin, dset = "Raw", f_i = "i_mean_grp", use_group = "Pop_group", write_to = "test1", disable = TRUE)
  expect_identical(coin$Data$Raw, coin$Data$test1)


})

test_that("impute.purse", {

  purse <- build_example_purse(up_to = "new_coin", quietly = TRUE)

  pursei <- Impute(purse, dset = "Raw")

  # compare
  coin1 <- Impute(purse$coin[[1]], dset = "Raw")
  expect_equal(pursei$coin[[1]], coin1)

  # manually set up panel imputation NA
  purse$coin$`2022`$Data$Raw$Flights[1] <- NA

  pursei <- Impute(purse, dset = "Raw", f_i = "impute_panel")

  # we should see that the 2022 value is equal to the 2021 value now
  x21 <- pursei$coin$`2021`$Data$Imputed$Flights[1]
  x22 <- pursei$coin$`2022`$Data$Imputed$Flights[1]

  expect_equal(x22, x21)

})

test_that("group_mean", {

  # data plus groupings
  x <- runif(20)
  f <- sample(c("a","b"), 20, replace = TRUE)

  # make some missing data
  x[c(1, 5, 10)] <- NA

  # impute
  xi <- i_mean_grp(x, f)

  # check
  expect_equal(xi[1], mean(x[f==f[1]], na.rm = TRUE))
  expect_equal(xi[5], mean(x[f==f[5]], na.rm = TRUE))
  expect_equal(xi[10], mean(x[f==f[10]], na.rm = TRUE))

  # now check case with missing groups
  f[c(2,5)] <- NA
  # impute
  xi <- i_mean_grp(x, f)

  # check - I exclude the NA group rows completely
  x2 <- x[-c(2,5)]
  f2 <- f[-c(2,5)]

  # expect the first NA value to be equal to the mean of the group in the vector excluding NAs
  expect_equal(xi[1], mean(x2[f2==f2[1]], na.rm = TRUE))
  # expect second NA to be equal to NA still, because has NA group
  expect_equal(xi[5], as.numeric(NA))

})

Try the COINr package in your browser

Any scripts or data that you put into this service are public.

COINr documentation built on Oct. 9, 2023, 5:07 p.m.