test-qsu.R
In collapse: Advanced and Fast Data Transformation

context("qsu")



# rm(list = ls())

bmean <- base::mean
bsd <- stats::sd
bsum <- base::sum

bstats <- function(x) {
  if(!is.numeric(x)) return(c(N = bsum(!is.na(x)), Mean = NA_real_, SD = NA_real_, Min = NA_real_, Max = NA_real_))
  c(N = bsum(!is.na(x)), Mean = bmean(x, na.rm = TRUE), SD = bsd(x, na.rm = TRUE), `names<-`(range(x, na.rm = TRUE), c("Min", "Max")))
}
base_qsu <- function(x, g = NULL) {
  if(is.atomic(x) && !is.matrix(x)) return(`oldClass<-`(bstats(x), c("qsu", "table")))
  if(is.null(g)) {
    r <- t(dapply(x, bstats, return = "matrix"))
    return(`oldClass<-`(r, c("qsu", "matrix", "table")))
  }
  r <- simplify2array(BY(x, g, bstats, return = "list", expand.wide = TRUE))
  return(`oldClass<-`(r, c("qsu", "array", "table")))
}

wldNA <- na_insert(wlddev)
xNA <- na_insert(rnorm(100))
ones <- rep(1, fnrow(wlddev))

for(i in 1:2) {
  if(i == 1L) qsu <- function(x, ...) collapse::qsu(x, ..., stable.algo = FALSE)
  if(i == 2L) qsu <- collapse::qsu

test_that("qsu works properly for simple cases (including unit groups and weights)", {

  expect_equal(qsu(1:10), base_qsu(1:10))
  expect_equal(qsu(10:1), base_qsu(10:1))
  expect_equal(qsu(xNA), base_qsu(xNA))
  expect_equal(qsu(wlddev), base_qsu(wlddev))
  expect_equal(qsu(wldNA), base_qsu(wldNA))
  expect_equal(qsu(GGDC10S), base_qsu(GGDC10S))

  expect_equal(qsu(1:10, w = rep(1, 10))[-2L], base_qsu(1:10))
  expect_equal(qsu(10:1, w = rep(1, 10))[-2L], base_qsu(10:1))
  expect_equal(qsu(xNA, w = rep(1, 100))[-2L], base_qsu(xNA))
  expect_equal(qsu(wlddev, w = ones)[,-2L], base_qsu(wlddev))
  expect_equal(qsu(wldNA, w = ones)[,-2L], base_qsu(wldNA))
  expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)))[,-2L], base_qsu(GGDC10S))

  expect_equal(unattrib(qsu(1:10, g = rep(1, 10))), unattrib(base_qsu(1:10)))
  expect_equal(unattrib(qsu(10:1, g = rep(1, 10))), unattrib(base_qsu(10:1)))
  expect_equal(unattrib(qsu(xNA, g = rep(1, 100))), unattrib(base_qsu(xNA)))
  expect_equal(unattrib(qsu(wlddev, by = ones)), unattrib(t(base_qsu(wlddev)))) # This should be an array... or oriented the other way around...

  expect_equal(unattrib(qsu(1:10, g = rep(1, 10), w = rep(1, 10)))[-2L], unattrib(base_qsu(1:10)))
  expect_equal(unattrib(qsu(10:1, g = rep(1, 10), w = rep(1, 10)))[-2L], unattrib(base_qsu(10:1)))
  expect_equal(unattrib(qsu(xNA, g = rep(1, 100), w = rep(1, 100)))[-2L], unattrib(base_qsu(xNA)))
  expect_equal(qsu(wldNA, w = ones)[,-2L], base_qsu(wldNA))
  expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)))[,-2L], base_qsu(GGDC10S))
  expect_equal(t(unclass(qsu(wldNA, w = ones, by = ones)))[,-2L], unclass(base_qsu(wldNA)))
  expect_equal(t(unclass(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), by = rep(1, fnrow(GGDC10S)))))[,-2L], unclass(base_qsu(GGDC10S)))

})

}
rm(qsu)

test_that("qsu works properly for simple cases with higher-order statistics (including unit groups and weights)", {

  expect_equal(qsu(1:10, higher = TRUE)[1:5], base_qsu(1:10))
  expect_equal(qsu(10:1, higher = TRUE)[1:5], base_qsu(10:1))
  expect_equal(qsu(xNA, higher = TRUE)[1:5], base_qsu(xNA))
  expect_equal(qsu(wlddev, higher = TRUE)[,1:5], base_qsu(wlddev))
  expect_equal(qsu(wldNA, higher = TRUE)[,1:5], base_qsu(wldNA))
  expect_equal(qsu(GGDC10S, higher = TRUE)[,1:5], base_qsu(GGDC10S))

  expect_equal(qsu(1:10, w = rep(1, 10), higher = TRUE)[c(1L, 3:6)], base_qsu(1:10))
  expect_equal(qsu(10:1, w = rep(1, 10), higher = TRUE)[c(1L, 3:6)], base_qsu(10:1))
  expect_equal(qsu(xNA, w = rep(1, 100), higher = TRUE)[c(1L, 3:6)], base_qsu(xNA))
  expect_equal(qsu(wlddev, w = ones, higher = TRUE)[,c(1L, 3:6)], base_qsu(wlddev))
  expect_equal(qsu(wldNA, w = ones, higher = TRUE)[,c(1L, 3:6)], base_qsu(wldNA))
  expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,c(1L, 3:6)], base_qsu(GGDC10S))

  expect_equal(unattrib(qsu(1:10, g = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(1:10)))
  expect_equal(unattrib(qsu(10:1, g = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(10:1)))
  expect_equal(unattrib(qsu(xNA, g = rep(1, 100), higher = TRUE)[1:5]), unattrib(base_qsu(xNA)))
  expect_equal(unattrib(qsu(wlddev, by = ones, higher = TRUE)[1:5, ]), unattrib(t(base_qsu(wlddev)))) # This should be an array... or oriented the other way around...

  expect_equal(unattrib(qsu(1:10, g = rep(1, 10), w = rep(1, 10), higher = TRUE)[c(1L, 3:6)]), unattrib(base_qsu(1:10)))
  expect_equal(unattrib(qsu(10:1, g = rep(1, 10), w = rep(1, 10), higher = TRUE)[c(1L, 3:6)]), unattrib(base_qsu(10:1)))
  expect_equal(unattrib(qsu(xNA, g = rep(1, 100), w = rep(1, 100), higher = TRUE)[c(1L, 3:6)]), unattrib(base_qsu(xNA)))
  expect_equal(qsu(wldNA, w = ones, higher = TRUE)[,c(1L, 3:6)], base_qsu(wldNA))
  expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,c(1L, 3:6)], base_qsu(GGDC10S))
  expect_equal(t(unclass(qsu(wldNA, w = ones, by = ones, higher = TRUE)[c(1L, 3:6),])), unclass(base_qsu(wldNA)))
  expect_equal(t(unclass(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), by = rep(1, fnrow(GGDC10S)), higher = TRUE)))[,c(1L, 3:6)], unclass(base_qsu(GGDC10S)))

})


wtd.sd <- function(x, w) sqrt(bsum(w * (x - weighted.mean(x, w))^2)/bsum(w))
wtd.skewness <- function(x, w) (bsum(w * (x - weighted.mean(x, w))^3)/bsum(w))/wtd.sd(x, w)^3
wtd.kurtosis <- function(x, w) ((bsum(w * (x - weighted.mean(x, w))^4)/bsum(w))/wtd.sd(x, w)^4)

base_w_qsu <- function(x, w) {
  if(!is.numeric(x)) return(c(N = bsum(!is.na(x)), Mean = NA_real_, SD = NA_real_, Min = NA_real_, Max = NA_real_, Skew = NA_real_, Kurt = NA_real_))
  cc <- complete.cases(x, w)
  if(!all(cc)) {
    x <- x[cc]
    w <- w[cc]
  }
  res <- c(N = length(x), Mean = weighted.mean(x, w), SD = fsd(x, w = w),
    `names<-`(range(x, na.rm = TRUE), c("Min", "Max")), Skew = wtd.skewness(x, w), Kurt = wtd.kurtosis(x, w))
  class(res) <- c("qsu", "table")
  res
}

test_that("Proper performance of weighted statsistics", {
  x <- mtcars$mpg
  w <- ceiling(mtcars$wt*10)
  wx <- rep(x, w)
  expect_equal(base_w_qsu(x, w)[-1L], qsu(wx, higher = TRUE)[-1L])
  expect_equal(qsu(wx)[-1L], qsu(x, w = w)[-(1:2)])
  expect_equal(qsu(wx, higher = TRUE)[-1L], qsu(x, w = w, higher = TRUE)[-(1:2)])
  expect_equal(drop(qsu(wx, g = rep(1L, length(wx)), higher = TRUE))[-1L], drop(qsu(x, g = rep(1L, length(x)), w = w, higher = TRUE))[-(1:2)])
})

g <- GRP(wlddev, ~ income)
p <- GRP(wlddev, ~ iso3c)

for(i in 1:2) {
  if(i == 1L) qsu <- function(x, ...) collapse::qsu(x, ..., stable.algo = FALSE)
  if(i == 2L) qsu <- collapse::qsu

test_that("qsu works properly for grouped and panel data computations", {

  # Grouped Statistics
  expect_equal(qsu(wldNA, g), base_qsu(wldNA, g))
  expect_equal(qsu(GGDC10S, GGDC10S$Variable), base_qsu(GGDC10S, GGDC10S$Variable))
  # Grouped and Weighted Statistics
  expect_equal(qsu(wldNA, g, w = ones)[,-2L,], base_qsu(wldNA, g))
  expect_equal(qsu(GGDC10S, GGDC10S$Variable, w = rep(1, fnrow(GGDC10S)))[,-2L,], base_qsu(GGDC10S, GGDC10S$Variable))
  # Panel Data Statistics
  ps <- qsu(wldNA, pid = p, cols = is.numeric)
  expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA))))
  expect_equal(unattrib(t(ps["Between",,])), unattrib(base_qsu(fmean(nv(wldNA), p))))
  expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1]))
  # Weighted Panel Data Statistics
  ps <- qsu(wldNA, pid = p, w = ones, cols = is.numeric)[,-2L,]
  expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA))))
  expect_equal(unattrib(t(ps["Between",-1,])), unattrib(base_qsu(fbetween(nv(wldNA), p))[,-1]))
  expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1]))
  # Grouped Panel Data Statistics
  ps <- qsu(wldNA, by = g, pid = p, cols = is.numeric)
  expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g)))
  expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,]))
  expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,]))
  # Grouped and Weighted Panel Data Statistics
  ps <- qsu(wldNA, by = g, pid = p, w = ones, cols = is.numeric)[,-2L,,]
  expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g)))
  expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,]))
  expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,]))

})

}
rm(qsu)

test_that("qsu works properly for grouped and panel data computations with higher-order statistics", {

  # Grouped Statistics
  expect_equal(qsu(wldNA, g, higher = TRUE)[,1:5,], base_qsu(wldNA, g))
  expect_equal(qsu(GGDC10S, GGDC10S$Variable, higher = TRUE)[,1:5,], base_qsu(GGDC10S, GGDC10S$Variable))
  # Grouped and Weighted Statistics
  expect_equal(qsu(wldNA, g, w = ones, higher = TRUE)[,c(1L, 3:6),], base_qsu(wldNA, g))
  expect_equal(qsu(GGDC10S, GGDC10S$Variable, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,c(1L, 3:6),], base_qsu(GGDC10S, GGDC10S$Variable))
  # Panel Data Statistics
  ps <- qsu(wldNA, pid = p, cols = is.numeric, higher = TRUE)[,1:5,]
  expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA))))
  expect_equal(unattrib(t(ps["Between",,])), unattrib(base_qsu(fmean(nv(wldNA), p))))
  expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1]))
  # Weighted Panel Data Statistics
  ps <- qsu(wldNA, pid = p, w = ones, cols = is.numeric, higher = TRUE)[,c(1L, 3:6),]
  expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA))))
  # TODO: Figure out why this test fails !!!!!!
  # expect_equal(unattrib(t(ps["Between",-1,])), unattrib(base_qsu(fbetween(nv(wldNA), p))[,-1]))
  expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1]))
  # Grouped Panel Data Statistics
  ps <- qsu(wldNA, by = g, pid = p, cols = is.numeric, higher = TRUE)[,1:5,,]
  expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g)))
  expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,]))
  expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,]))
  # Grouped and Weighted Panel Data Statistics
  ps <- qsu(wldNA, by = g, pid = p, w = ones, cols = is.numeric, higher = TRUE)[,c(1L, 3:6),,]
  expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g)))
  expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,]))
  expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,]))

})

# Make more tests!! See also collapse general TODO !
test_that("qsu gives errors for wrong input", {
  expect_error(qsu(wlddev$year, 2:4))
  expect_error(qsu(wlddev$year, pid = 2:4))
  expect_error(qsu(wlddev, 2:4))
  expect_error(qsu(wlddev, pid = 2:4))
  expect_error(qsu(wlddev$year, letters))
  expect_error(qsu(wlddev$year, pid = letters))
  expect_error(qsu(wlddev, letters))
  expect_error(qsu(wlddev, pid = letters))

  expect_error(qsu(wlddev, ~ iso3c + bla))
  expect_error(qsu(wlddev, pid = ~ iso3c + bla))

  expect_visible(qsu(wlddev, PCGDP ~ region + income))
  expect_visible(qsu(wlddev, pid = PCGDP ~ region + income))

  expect_equal(qsu(wlddev, PCGDP ~ region + income, ~ iso3c), qsu(wlddev, ~ region + income, pid = PCGDP ~ iso3c))

  expect_error(qsu(wlddev, cols = 9:14))
  expect_error(qsu(wlddev, cols = c("PCGDP","bla")))
})