tests/testthat/test_misc.R

# Part of the rstanarm package for estimating model parameters
# Copyright (C) 2015, 2016, 2017 Trustees of Columbia University
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

suppressPackageStartupMessages(library(rstanarm))
SEED <- 12345
set.seed(SEED)
ITER <- 10L
CHAINS <- 2L
REFRESH <- 0

if (!exists("example_model")) {
  example_model <- run_example_model()
}

context("helper functions")

test_that("nlist works", {
  nlist <- rstanarm:::nlist
  a <- 1
  b <- 2
  c <- 3
  val <- list(nlist(a, b, c), 
              nlist(a, b, c = "tornado"), 
              nlist(a = -1, b = -2, c))
  ans <- list(list(a = a, b = b, c = c), 
              list(a = a, b = b, c = "tornado"), 
              list(a = -1, b = -2, c = c))
  expect_identical(val, ans)
})

test_that("family checking works", {
  fams <- rstanarm:::nlist("binomial", "gaussian", "poisson", gamma = "Gamma", 
                           ig = "inverse.gaussian", nb = "neg_binomial_2")
  for (j in seq_along(fams)) {
    is.f <- getFromNamespace(paste0("is.", names(fams)[j]), "rstanarm")
    f <- get(fams[[j]])()$family
    expect_true(is.f(f))
    expect_false(is.f("not a family"))
  }
})

test_that("%ORifNULL% works", {
  `%ORifNULL%` <- rstanarm:::`%ORifNULL%`
  a <- list(NULL, NA, NaN, 1, "a", FALSE)
  b <- 1
  ans <- c(b, a[-1])
  for (j in seq_along(a)) {
    expect_identical(a[[j]] %ORifNULL% b, ans[[j]])
  }
})

test_that("%ORifINF% works", {
  `%ORifINF%` <- rstanarm:::`%ORifINF%`
  a <- list(Inf, -Inf, 1, "a", FALSE)
  b <- 0
  ans <- c(b, a[-1])
  for (j in seq_along(a)) {
    expect_identical(a[[j]] %ORifINF% b, ans[[j]])
  }
})

test_that("maybe_broadcast works", {
  maybe_broadcast <- rstanarm:::maybe_broadcast
  n <- 5
  x <- list(numeric(0), NULL, 1, c(1,1))
  ans <- list(rep(0,n), rep(0,n), rep(1,n), c(1,1))
  for (j in seq_along(ans)) {
    expect_equal(maybe_broadcast(x[[j]], n), ans[[j]])  
  }
})

test_that("set_prior_scale works", {
  set_prior_scale <- rstanarm:::set_prior_scale
  expect_error(set_prior_scale("a", "b", "c"))
  expect_error(set_prior_scale(1, 1, 1))
  expect_equal(set_prior_scale(NULL, 1, "a"), 1)
  expect_equal(set_prior_scale(NULL, 1, "probit"), dnorm(0) / dlogis(0))
  expect_equal(set_prior_scale(2, 1, "a"), 2)
  expect_equal(set_prior_scale(2, 1, "probit"), 2 * dnorm(0) / dlogis(0))
})

test_that("validate_parameter_value works", {
  validate_parameter_value <- rstanarm:::validate_parameter_value
  expect_error(validate_parameter_value(-1), "should be positive")
  expect_error(validate_parameter_value(0), "should be positive")
  expect_error(validate_parameter_value("a"), "should be NULL or numeric")
  expect_error(validate_parameter_value(NA), "should be NULL or numeric")
  expect_true(validate_parameter_value(NULL))
  expect_true(validate_parameter_value(.01))
  expect_true(validate_parameter_value(.Machine$double.xmax))
})

test_that("validate_R2_location works", {
  validate_R2_location <- rstanarm:::validate_R2_location
  expect_error(
    validate_R2_location(-1, what = "mode"), 
    "location must be in (0,1]", 
    fixed = TRUE
  )
  expect_error(
    validate_R2_location(.5, what = "log"), 
    "location must be negative", 
    fixed = TRUE
  )
  expect_error(
    validate_R2_location(0, what = "mean"), 
    "location must be in (0,1)", 
    fixed = TRUE
  )
  expect_error(
    validate_R2_location(c(0.5, 0.25), what = "mode"), 
    "only accepts a single value for 'location'", 
    fixed = TRUE
  )
})

test_that("validate_weights works", {
  validate_weights <- rstanarm:::validate_weights
  ff <- function(weights) validate_weights(weights)
  expect_equal(ff(), double(0))
  expect_equal(ff(x <- rexp(10)), x)
  expect_equal(validate_weights(NULL), double(0))
  expect_equal(validate_weights(1:10), 1:10)
  expect_error(validate_weights(LETTERS), regexp = "numeric")
  expect_error(validate_weights(c(-1,2,3)), regexp = "negative", ignore.case = TRUE)
  expect_error(stan_glm(mpg ~ wt, data = mtcars, weights = rep(-1, nrow(mtcars))), 
               regexp = "negative", ignore.case = TRUE)
  
  capture.output(fit <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing", seed = SEED,
                                 weights = rexp(nrow(mtcars)), refresh = 0))
  expect_stanreg(fit)
})

test_that("validate_offset works", {
  validate_offset <- rstanarm:::validate_offset
  expect_equal(validate_offset(NULL), double(0))
  expect_equal(validate_offset(rep(1, 10), rnorm(10)), rep(1, 10))
  expect_error(validate_offset(rep(1, 10), rnorm(5)))
  expect_error(validate_offset(rep(1, 5), rnorm(10)), 
               regexp = "number of offsets", ignore.case = TRUE)
  
  SW(fito <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing", seed = SEED))
  SW(fito2 <- update(fito, offset = rep(5, nrow(mtcars))))
  expect_equal(coef(fito)[1], 5 + coef(fito2)[1], tol = 0.2)
})

test_that("validate_family works", {
  validate_family <- rstanarm:::validate_family
  expect_equal(validate_family("gaussian"), gaussian())
  expect_equal(validate_family(gaussian), gaussian())
  expect_equal(validate_family(gaussian()), gaussian())
  expect_equal(validate_family(gaussian(link = "log")), gaussian(link = "log"))
  expect_equal(validate_family(binomial(link = "probit")), binomial(link = "probit"))
  expect_equal(validate_family(neg_binomial_2()), neg_binomial_2())
  expect_error(validate_family("not a family"))
  expect_error(validate_family(rnorm(10)), "must be a family")
  expect_error(stan_glm(mpg ~ wt, data = mtcars, family = "not a family"))
})

test_that("validate_glm_formula works", {
  validate_glm_formula <- rstanarm:::validate_glm_formula
  expect_silent(validate_glm_formula(mpg ~ wt + cyl))
  expect_error(validate_glm_formula(mpg ~ wt + (1|cyl)), "not allowed")
  expect_error(validate_glm_formula(mpg ~ (1|cyl/gear)), "not allowed")
})

test_that("validate_data works", {
  validate_data <- rstanarm:::validate_data
  expect_error(validate_data(list(1)), 
               "'data' must be a data frame")
  expect_warning(d <- validate_data(if_missing = 3), 
                 "Omitting the 'data' argument is not recommended")
  expect_equal(d, 3)
})

test_that("array1D_check works", {
  array1D_check <- rstanarm:::array1D_check
  y1 <- rnorm(10)
  expect_equal(array1D_check(y1), y1)
  names(y1) <- rep_len(letters, length(y1))
  expect_equal(array1D_check(y1), y1)
  expect_identical(array1D_check(as.array(y1)), y1)
  
  y2 <- cbind(1:10, 11:20)
  expect_equal(array1D_check(y2), y2)
})

test_that("fac2bin works", {
  fac2bin <- rstanarm:::fac2bin
  y <- gl(2, 2, 20, labels = c("lo", "hi"))
  expect_identical(fac2bin(y), rep_len(c(0L, 0L, 1L, 1L), 20))
  y <- gl(2, 8, labels = c("Control", "Treat"))
  expect_identical(fac2bin(y), rep(c(0L, 1L), each = 8))
  expect_identical(fac2bin(factor(c(1,2))), c(0L, 1L))
  expect_error(fac2bin(rnorm(10)))
  expect_error(fac2bin(factor(c(1,2,3))))
  expect_error(fac2bin(factor(mtcars$cyl, labels = c("lo", "mid", "hi"))))
})

test_that("check_constant_vars works", {
  check_constant_vars <- rstanarm:::check_constant_vars
  mf <- model.frame(glm(mpg ~ ., data = mtcars))
  mf2 <- mf
  mf2$wt <- 2
  expect_equal(check_constant_vars(mf), mf)
  expect_error(check_constant_vars(mf2), "wt")
  mf2$gear <- 3
  expect_error(check_constant_vars(mf2), "wt, gear")
  expect_error(stan_glm(mpg ~ ., data = mf2), "wt, gear")

  
  SW(fit1 <- stan_glm(mpg ~ ., data = mf, algorithm = "optimizing", seed = SEED, refresh = 0))
  SW(fit2 <- stan_glm(mpg ~ ., data = mf, weights = rep(2, nrow(mf)), seed = SEED,
                     offset = rep(1, nrow(mf)), algorithm = "optimizing", refresh = 0))
  expect_stanreg(fit1)
  expect_stanreg(fit2)
  
  esoph2 <- esoph
  esoph2$agegp[1:nrow(esoph2)] <- "75+"
  expect_error(stan_polr(tobgp ~ agegp, data = esoph2, iter = 10,
                         prior = R2(0.2, "mean"), init_r = 0.1, seed = SEED, 
                         refresh = 0), 
               regexp = "agegp")
})

test_that("linear_predictor methods work", {
  linpred_vec <- rstanarm:::linear_predictor.default
  linpred_mat <- rstanarm:::linear_predictor.matrix
  x <- cbind(1, 1:4)
  bmat <- matrix(c(-0.5, 0, 0.5, 1), nrow = 2, ncol = 2)
  bvec <- bmat[1, ]
  vec_ans <- seq(0, 1.5, 0.5)
  mat_ans <- rbind(vec_ans, 1:4)
  offset <- rep(2, nrow(x))
  expect_equivalent(linpred_vec(bvec, x), vec_ans)
  expect_equivalent(linpred_vec(bvec, x, offset = NULL), vec_ans)
  expect_equivalent(linpred_vec(bvec, x, offset), vec_ans + offset)
  expect_equivalent(linpred_mat(bmat, x), mat_ans)
  expect_equivalent(linpred_mat(bmat, x, offset = NULL), mat_ans)
  expect_equivalent(linpred_mat(bmat, x, offset), mat_ans + offset)
})

# fits to use in multiple calls to test_that below
SW({
  fit <- stan_glm(mpg ~ wt, data = mtcars, iter = ITER, 
                     chains = CHAINS, seed = SEED, refresh = 0)
  fit2 <- stan_glmer(mpg ~ wt + (1|cyl), data = mtcars, 
                        iter = ITER, chains = CHAINS, seed = SEED, refresh = 0)
  fito <- stan_glm(mpg ~ wt, data = mtcars, algorithm = "optimizing", seed = SEED)
  fitvb <- update(fito, algorithm = "meanfield", seed = SEED)
  fitvb2 <- update(fitvb, algorithm = "fullrank", seed = SEED)
})

test_that("validate_stanreg_object works", {
  validate_stanreg_object <- rstanarm:::validate_stanreg_object
  expect_silent(validate_stanreg_object(fit))
  expect_silent(validate_stanreg_object(fit2))
  expect_silent(validate_stanreg_object(fito))
  expect_silent(validate_stanreg_object(fitvb))
  expect_error(validate_stanreg_object(fit$stanfit), 
               "not a stanreg object")
})

test_that("used.sampling, used.optimizing, and used.variational work", {
  used.sampling <- rstanarm:::used.sampling
  used.optimizing <- rstanarm:::used.optimizing
  used.variational <- rstanarm:::used.variational
  expect_true(used.sampling(fit))
  expect_true(used.sampling(fit2))
  expect_false(used.optimizing(fit))
  expect_false(used.optimizing(fit2))
  expect_false(used.variational(fit))
  expect_false(used.variational(fit2))
  
  expect_true(used.optimizing(fito))
  expect_false(used.sampling(fito))
  expect_false(used.variational(fito))
  
  expect_true(used.variational(fitvb))
  expect_true(used.variational(fitvb2))
  expect_false(used.sampling(fitvb))
  expect_false(used.sampling(fitvb2))
  expect_false(used.optimizing(fitvb))
  expect_false(used.optimizing(fitvb2))
  
  # should return error if passed anything but a stanreg object
  expect_error(used.sampling(fit$stanfit))
  expect_error(used.variational(fitvb$stanfit))
  expect_error(used.optimizing(fito$stanfit))
})

test_that("is.mer works", {
  is.mer <- rstanarm:::is.mer
  bad1 <- bad2 <- example_model
  bad1$glmod <- NULL
  class(bad2) <- "stanreg"
  
  expect_true(is.mer(example_model))
  expect_true(is.mer(fit2))
  expect_false(is.mer(fit))
  expect_false(is.mer(fito))
  expect_false(is.mer(fitvb))
  expect_false(is.mer(fitvb2))
  expect_error(is.mer(bad1), regexp = "Bug found")
  expect_error(is.mer(bad2), regexp = "Bug found")
})

test_that("get_x, get_y, get_z work", {
  x_ans <- cbind("(Intercept)" = 1, wt = mtcars$wt)
  y_ans <- mtcars$mpg
  expect_equivalent(get_x(fit), x_ans)
  expect_equivalent(get_y(fit), y_ans)
  expect_error(get_z(fit), "no applicable method")
  
  z_ans2 <- model.matrix(mpg ~ -1 + factor(cyl), data = mtcars)
  expect_equivalent(get_x(fit2), x_ans)
  expect_equivalent(get_y(fit2), y_ans)
  expect_equivalent(as.matrix(get_z(fit2)), z_ans2)
  
  SW(
    fit3 <- stan_glmer(mpg ~ wt + (1 + wt|cyl), data = mtcars, refresh = 0,
                       iter = 10, chains = 1, refresh = 5, seed = SEED)
  )
  z_ans3 <- mat.or.vec(nr = nrow(mtcars), nc = 6)
  z_ans3[, c(1, 3, 5)] <- model.matrix(mpg ~ 0 + factor(cyl), data = mtcars)
  z_ans3[, c(2, 4, 6)] <- model.matrix(mpg ~ 0 + wt:factor(cyl), data = mtcars)
  expect_equivalent(get_x(fit3), x_ans)
  expect_equivalent(get_y(fit3), y_ans)
  expect_equivalent(as.matrix(get_z(fit3)), z_ans3)
})

test_that("set_sampling_args works", {
  set_sampling_args <- rstanarm:::set_sampling_args
  
  # user specifies stepsize and also overrides default max_treedepth
  control1 <- list(max_treedepth = 10, stepsize = 0.01)
  # user specifies control but doesn't override max_treedepth
  control2 <- list(stepsize = 0.01)
  # no user 'control' argument 
  no_control <- list()
  
  # normal prior --> adapt_delta = 0.95
  val1 <- set_sampling_args(fit, prior = normal(),
                            user_dots = list(control = control1, iter = 100),  
                            user_adapt_delta = NULL)
  # use fit2 instead of fit to check that it doesn't matter which fit object is used
  val1b <- set_sampling_args(fit2,
                             prior = normal(),
                             user_dots = list(control = control1, iter = 100),  
                             user_adapt_delta = NULL)
  # normal prior --> adapt_delta = 0.95, but user override to 0.9
  val2 <- set_sampling_args(fit, prior = normal(), 
                            user_dots = list(control = control1),  
                            user_adapt_delta = 0.9)
  # cauchy/t_1 prior --> adapt_delta = 0.95
  val3 <- set_sampling_args(fit, prior = student_t(1), 
                            user_dots = list(control = control1),  
                            user_adapt_delta = NULL)
  # cauchy/t_1 prior --> adapt_delta = 0.95, but user override to 0.8
  val4 <- set_sampling_args(fit, prior = cauchy(),
                            user_dots = list(control = control2),  
                            user_adapt_delta = 0.8)
  # hs prior --> adapt_delta = 0.99
  val5 <- set_sampling_args(fit, prior = hs(), 
                            user_dots = no_control,
                            user_adapt_delta = NULL)
  val6 <- set_sampling_args(fit, prior = hs_plus(), 
                            user_dots = no_control,
                            user_adapt_delta = NULL)
  expect_equal(val1$control, c(control1, adapt_delta = 0.95))
  expect_equal(val1$iter, 100)
  expect_equal(val1$control, val1b$control)
  expect_equal(val2$control, c(control1, adapt_delta = 0.9))
  expect_equal(val3$control, c(control1, adapt_delta = 0.95))
  expect_equal(val4$control, c(control2, adapt_delta = 0.8, max_treedepth = 15))
  expect_equal(val5$control, list(adapt_delta = 0.99, max_treedepth = 15))
  expect_equal(val6$control, list(adapt_delta = 0.99, max_treedepth = 15))
})

test_that("linkinv methods work", {
  linkinv.stanreg <- rstanarm:::linkinv.stanreg
  linkinv.character <- rstanarm:::linkinv.character
  linkinv.family <- rstanarm:::linkinv.family
  
  expect_identical(linkinv.family(gaussian()), gaussian()$linkinv)
  expect_identical(linkinv.family(neg_binomial_2()), neg_binomial_2()$linkinv)
  expect_identical(linkinv.family(binomial(link = "probit")), 
                   binomial(link = "probit")$linkinv)
  
  SW(
    fit_polr <- stan_polr(tobgp ~ agegp, data = esoph, method = "loglog",
                          prior = R2(0.2, "mean"), init_r = 0.1, 
                          chains = CHAINS, iter = ITER, seed = SEED, 
                          refresh = 0)
  )
  expect_identical(linkinv.stanreg(fit_polr), rstanarm:::pgumbel)
  expect_identical(linkinv.character(fit_polr$family), rstanarm:::pgumbel)
  expect_identical(linkinv.stanreg(example_model), binomial()$linkinv)
  expect_identical(linkinv.stanreg(fit), gaussian()$linkinv)
  expect_error(rstanarm:::polr_linkinv(example_model), 
               regexp = "should be a stanreg object created by stan_polr")
})

test_that("collect_pars and grep_for_pars work", {
  fit <- example_model
  collect_pars <- rstanarm:::collect_pars
  grep_for_pars <- rstanarm:::grep_for_pars
  
  all_period <- paste0("period", 2:4)
  all_varying <- rstanarm:::b_names(rownames(fit$stan_summary), value = TRUE)
  
  expect_identical(grep_for_pars(fit, "period"), all_period)
  expect_identical(grep_for_pars(fit, c("period", "size")), c(all_period, "size"))
  expect_identical(grep_for_pars(fit, "period|size"), c("size", all_period))
  expect_identical(grep_for_pars(fit, "(2|3)$"), all_period[1:2])
  expect_identical(grep_for_pars(fit, "b\\["), all_varying)
  expect_identical(grep_for_pars(fit, "herd"), c(all_varying, "Sigma[herd:(Intercept),(Intercept)]"))
  expect_identical(grep_for_pars(fit, "Intercept"),
                   c("(Intercept)", all_varying, "Sigma[herd:(Intercept),(Intercept)]"))
  expect_identical(grep_for_pars(fit, "herd:[3,5]"), all_varying[c(3,5)])
  expect_identical(grep_for_pars(fit, "herd:[3-5]"), all_varying[3:5])
  expect_error(grep_for_pars(fit, "NOT A PARAMETER"), regexp = "No matches")
  expect_error(grep_for_pars(fit, "b["))
  
  expect_identical(collect_pars(fit, regex_pars = "period"), all_period)
  expect_identical(collect_pars(fit, pars = "size", regex_pars = "period"), 
                   c("size", all_period))
  expect_identical(collect_pars(fit, pars = c("(Intercept)", "size")), 
                   c("(Intercept)", "size"))
  expect_identical(collect_pars(fit, pars = "period2", regex_pars = "herd:[[1]]"), 
                   c("period2", all_varying[1]))
  expect_identical(collect_pars(fit, pars = "size", regex_pars = "size"), "size")
  expect_identical(collect_pars(fit, regex_pars = c("period", "herd")), 
                   c(all_period, all_varying, "Sigma[herd:(Intercept),(Intercept)]"))
})

test_that("posterior_sample_size works", {
  pss <- rstanarm:::posterior_sample_size
  expect_equal(pss(example_model), 1000)
  expect_equal(pss(fit), nrow(as.matrix(fit)))
  expect_equal(pss(fit2), ITER * CHAINS / 2)
  expect_equal(pss(fitvb), 1000)
  expect_equal(pss(fitvb2), 1000)
  expect_equal(pss(fito), nrow(as.matrix(fito)))
  
  SW(fit3 <- stan_glm(mpg ~ wt, data = mtcars, iter = 20, chains = 1, thin = 2, refresh = 0))
  expect_equal(pss(fit3), nrow(as.matrix(fit3)))
})

test_that("last_dimnames works", {
  a <- array(rnorm(300), dim = c(10, 3, 10), 
             dimnames = list(A = NULL, B = NULL, C = letters[1:10]))
  last_dimnames <- rstanarm:::last_dimnames
  expect_identical(last_dimnames(a), letters[1:10])
  
  m <- a[1,,, drop=TRUE]
  expect_identical(last_dimnames(m), letters[1:10])
  expect_identical(last_dimnames(m), colnames(m))
  
  d <- as.data.frame(m)
  expect_identical(last_dimnames(d), last_dimnames(m))
  
  expect_null(last_dimnames(m[1,]))
})

test_that("validate_newdata works", {
  fit <- example_model
  newd <- fit$data
  validate_newdata <- rstanarm:::validate_newdata
  
  expect_error(validate_newdata(fit, newdata = 1:10), "must be a data frame")
  expect_null(validate_newdata(fit, newdata = NULL))
  expect_equal(newd, validate_newdata(fit, newdata = newd))
  
  # doesn't complain about NAs in unused variables
  newd2 <- newd
  newd2$banana <- NA
  expect_silent(validate_newdata(fit, newdata = newd2))
  expect_equal(validate_newdata(fit, newdata = newd2), newd2)
  
  newd$period[3] <- NA
  expect_error(validate_newdata(fit, newdata = newd), "NAs are not allowed")
})
stan-dev/rstanarm documentation built on April 15, 2024, 11:11 p.m.