Nothing
# Part of the rstanarm package for estimating model parameters
# Copyright (C) 2015, 2016, 2017 Trustees of Columbia University
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
context("stan_glm")
suppressPackageStartupMessages(library(rstanarm))
SEED <- 12345
set.seed(SEED)
CHAINS <- 2
ITER <- 40 # small iter for speed but large enough for psis
REFRESH <- 0
SW(
fit_gaus <- stan_glm(mpg ~ wt, data = mtcars,
chains = CHAINS, iter = ITER,
seed = SEED, refresh = 0)
)
dat <- data.frame(ldose = rep(0:5, 2),
sex = factor(rep(c("M", "F"), c(6, 6))))
numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16)
SF <- cbind(numdead, numalive = 20-numdead)
SW(
fit_binom <- stan_glm(SF ~ sex*ldose, data = dat, family = binomial,
chains = CHAINS, iter = ITER, seed = SEED,
refresh = 0)
)
dead <- rbinom(length(numdead), 1, prob = 0.5)
SW(fit_binom2 <- update(fit_binom, formula = factor(dead) ~ .))
d.AD <- data.frame(treatment = gl(3,3), outcome = gl(3,1,9),
counts = c(18,17,15,20,10,20,25,13,12))
SW(fit_pois <- stan_glm(counts ~ outcome + treatment, data = d.AD,
family = poisson, chains = CHAINS, iter = 10 * ITER,
seed = SEED, refresh = 0))
SW(fit_negbin <- update(fit_pois, family = neg_binomial_2))
clotting <- data.frame(log_u = log(c(5,10,15,20,30,40,60,80,100)),
lot1 = c(118,58,42,35,27,25,21,19,18),
lot2 = c(69,35,26,21,18,16,13,12,12))
SW(fit_gamma <- stan_glm(lot1 ~ log_u, data = clotting, family = Gamma,
chains = CHAINS, iter = ITER, seed = SEED,
refresh = 0))
SW(fit_igaus <- update(fit_gamma, family = inverse.gaussian))
test_that("loo/waic for stan_glm works", {
ll_fun <- rstanarm:::ll_fun
# gaussian
expect_equivalent_loo(fit_gaus)
expect_identical(ll_fun(fit_gaus), rstanarm:::.ll_gaussian_i)
# binomial
expect_equivalent_loo(fit_binom)
expect_equivalent_loo(fit_binom2)
expect_identical(ll_fun(fit_binom), rstanarm:::.ll_binomial_i)
expect_identical(ll_fun(fit_binom2), rstanarm:::.ll_binomial_i)
# poisson
expect_equivalent_loo(fit_pois)
expect_identical(ll_fun(fit_pois), rstanarm:::.ll_poisson_i)
# negative binomial
expect_equivalent_loo(fit_negbin)
expect_identical(ll_fun(fit_negbin), rstanarm:::.ll_neg_binomial_2_i)
# gamma
expect_equivalent_loo(fit_gamma)
expect_identical(ll_fun(fit_gamma), rstanarm:::.ll_Gamma_i)
# inverse gaussian
expect_equivalent_loo(fit_igaus)
expect_identical(ll_fun(fit_igaus), rstanarm:::.ll_inverse.gaussian_i)
})
test_that("stan_glm throws appropriate errors, warnings, and messages", {
counts <- c(18,17,15,20,10,20,25,13,12)
outcome <- gl(3,1,9)
treatment <- gl(3,3)
dat <- data.frame(counts, outcome, treatment)
f <- as.formula(counts ~ outcome + treatment)
# error: glmer syntax
expect_error(stan_glm(counts ~ treatment + (1|outcome), data = dat),
regexp = "model formula not allowed")
# error: empty model
expect_error(stan_glm(counts ~ 0, data = dat),
regexp = "No intercept or predictors specified")
# error: stan_glm.nb with family argument
expect_error(stan_glm.nb(f, data = dat, family = "neg_binomial_2"),
regexp = "'family' should not be specified.")
# error: prior and prior_intercept not lists
expect_error(stan_glm(f, data = dat, family = "poisson", prior = normal),
regexp = "should be a named list")
expect_error(stan_glm(f, data = dat, family = "poisson", prior_intercept = normal),
regexp = "should be a named list")
# error: QR only with more than 1 predictor
expect_error(stan_glm(counts ~ 1, data = dat, family = "poisson", QR = TRUE),
regexp = "'QR' can only be specified when there are multiple predictors")
# error: QR and sparse
expect_error(stan_glm(f, data = dat, family = "poisson", QR = TRUE, sparse = TRUE),
regexp = "'QR' and 'sparse' cannot both be TRUE")
# require intercept for certain family and link combinations
expect_error(stan_glm(counts ~ -1 + outcome + treatment, data = dat,
family = poisson(link="identity"), seed = SEED),
regexp = "model must have an intercept")
expect_error(stan_glm(I(counts > 20) ~ -1 + outcome + treatment, data = dat,
family = binomial(link="log"), seed = SEED),
regexp = "model must have an intercept")
# support of outcome variable
expect_error(stan_glm(cbind(1:10, runif(10)) ~ 1, data = dat, family = "binomial"),
"outcome values must be counts")
expect_error(stan_glm(c(1,2,1,2) ~ 1, data = dat, family = "binomial"),
"outcome values must be 0 or 1")
expect_error(stan_glm((-1):3 ~ 1, data = dat, family = "poisson"),
"outcome values must be counts")
expect_error(stan_glm.nb(runif(3) ~ 1, data = dat),
"outcome values must be counts")
expect_error(stan_glm(0:3 ~ 1, data = dat, family = "Gamma"),
"outcome values must be positive")
expect_error(stan_glm(runif(3, -2, -1) ~ 1, data = dat, family = "inverse.gaussian"),
"outcome values must be positive")
expect_error(stan_glm(cbind(1:10, 1:10) ~ 1, data = dat, family = "gaussian"),
"should not have multiple columns")
# prior_aux can't be NULL if prior_PD is TRUE
expect_error(stan_glm(mpg ~ wt, data = mtcars, prior_aux = NULL, prior_PD = TRUE),
"'prior_aux' cannot be NULL if 'prior_PD' is TRUE")
})
test_that("gaussian returns expected result for trees example", {
# example using trees dataset
links <- c("identity", "log", "inverse")
for (i in 1:length(links)) {
if (links[i] == "inverse") next # unreliable
fit <- stan_glm(Volume ~ log(Girth) + log(Height), data = trees,
family = gaussian(link = links[i]), algorithm = "optimizing",
prior = NULL, prior_intercept = NULL, refresh = 0,
QR = TRUE, tol_rel_grad = 1e-16, seed = SEED)
expect_stanreg(fit)
ans <- glm(Volume ~ log(Girth) + log(Height),data = trees,
family = gaussian(link = links[i]))
expect_equal(coef(fit), coef(ans), tol = 0.021)
}
expect_error(update(fit, prior = dnorm),
regexp = "should be a named list")
expect_error(update(fit, prior_intercept = dnorm),
regexp = "should be a named list")
expect_error(update(fit, prior = R2(0.5)),
regexp = "should be one of")
expect_error(update(fit, prior_intercept = R2(0.5)),
regexp = "should be one of")
})
links <- c("log", "identity", "sqrt")
test_that("stan_glm returns expected result for glm poisson example", {
# example from help("glm")
for (i in 1:length(links)) {
SW(fit <- stan_glm(counts ~ outcome + treatment, data = d.AD,
family = poisson(links[i]), refresh = 0,
prior = NULL, prior_intercept = NULL, QR = TRUE,
algorithm = "optimizing", tol_rel_grad = 1e-16, seed = SEED))
expect_stanreg(fit)
ans <- glm(counts ~ outcome + treatment, data = d.AD,
family = poisson(links[i]), start = coef(fit))
if (links[i] == "log") expect_equal(coef(fit), coef(ans), tol = 0.03)
# if (links[i] == "identity") expect_equal(coef(fit)[-1], coef(ans)[-1], tol = 0.03)
if (links[i] == "sqrt") { # this is weird
if (coef(ans)[1] > 0)
expect_equal(coef(fit)[-1], coef(ans)[-1], tol = 0.1)
else
expect_equal(-coef(fit)[-1], coef(ans)[-1], tol = 0.04)
}
}
})
test_that("stan_glm returns something for glm negative binomial example", {
skip_if_not_installed("MASS")
for (i in 1:length(links)) {
SW(fit1 <- stan_glm(Days ~ Sex/(Age + Eth*Lrn), data = MASS::quine,
family = neg_binomial_2(links[i]),
seed = SEED, chains = 1, iter = 100,
QR = TRUE, refresh = 0))
SW(fit2 <- stan_glm.nb(Days ~ Sex/(Age + Eth*Lrn), data = MASS::quine,
link = links[i],
seed = SEED, chains = 1, iter = 100,
QR = TRUE, refresh = 0))
expect_stanreg(fit1)
expect_stanreg(fit2)
expect_equal(as.matrix(fit1), as.matrix(fit2))
}
# testing results against MASS::glm.nb is unreliable
})
test_that("stan_glm returns expected result for cars example", {
fit <- stan_glm(log(dist) ~ log(speed), data = cars, sparse = TRUE,
family = gaussian(link = "identity"), seed = SEED,
prior = NULL, prior_intercept = NULL, refresh = 0,
tol_rel_obj = .Machine$double.eps, algorithm = "optimizing")
expect_stanreg(fit)
ans <- glm(log(dist) ~ log(speed), data = cars, family = gaussian(link = "identity"))
expect_equal(coef(fit), coef(ans), tol = 0.1)
})
test_that("stan_glm returns expected result with no intercept for mtcars example", {
f <- as.formula(mpg ~ -1 + wt + cyl + disp + am + carb)
fit <- stan_glm(f, data = mtcars, refresh = 0,
prior = NULL, prior_intercept = NULL,
tol_rel_obj = .Machine$double.eps, algorithm = "optimizing",
seed = SEED, sparse = TRUE)
expect_stanreg(fit)
ans <- glm(f, data = mtcars, family = gaussian(link = "identity"))
expect_equal(coef(fit), coef(ans), tol = 0.04)
})
links <- c("logit", "probit", "cauchit", "log", "cloglog")
test_that("stan_glm returns expected result for bernoulli", {
# bernoulli example
sd1 <- 1; sd2 <- 0.5; corr_12 <- -0.4
Sigma <- matrix(c(sd1^2, rep(prod(corr_12, sd1, sd2), 2), sd2^2), 2, 2)
x <- t(t(chol(Sigma)) %*% matrix(rnorm(50), 2, 250))
b <- c(2, 1) / 10
for (i in 1:length(links)) {
fam <- binomial(links[i])
theta <- fam$linkinv(-1 + x %*% b)
y <- rbinom(length(theta), size = 1, prob = theta)
dat <- data.frame(y, x)
SW(
fit <- stan_glm(y ~ x, data = dat, family = fam, seed = SEED, QR = TRUE,
prior = NULL, prior_intercept = NULL, refresh = 0,
tol_rel_obj = .Machine$double.eps, algorithm = "optimizing")
)
expect_stanreg(fit)
val <- coef(fit)
if (links[i] != "log") {
ans <- coef(glm(y ~ x, family = fam, etastart = theta))
expect_equal(val, ans, 0.09, info = links[i])
}
# else expect_equal(val[-1], ans[-1], 0.06, info = links[i])
}
})
test_that("stan_glm returns expected result for binomial example", {
# example using simulated data
N <- 200
trials <- rpois(N, lambda = 30)
trials <<- trials
X <- cbind(1, matrix(rnorm(N * 3, sd = 0.5), N, 3))
for (i in 1:length(links)) {
fam <- binomial(links[i])
if (i == 4) {
b <- c(0, 0.5, 0.1, -1.0)
eta <- X %*% b
b[1] <- -max(eta) - 0.05
}
else b <- c(0, 0.5, 0.1, -1.0)
yes <- rbinom(N, size = trials, prob = fam$linkinv(X %*% b))
y <- cbind(yes, trials - yes)
dat <- data.frame(yes, trials, x1 = X[,2], x2 = X[,3], x3 = X[,4])
SW(
fit <- stan_glm(cbind(yes, trials - yes) ~ x1 + x2 + x3, data = dat,
family = fam, seed = SEED, QR = TRUE,
prior = NULL, prior_intercept = NULL, refresh = 0,
tol_rel_obj = .Machine$double.eps, algorithm = "optimizing")
)
expect_stanreg(fit)
val <- coef(fit)
ans <- coef(glm(y ~ x1 + x2 + x3, data = dat, family = fam, start = b))
if (links[i] != "log") expect_equal(val, ans, 0.02, info = links[i])
# else expect_equal(val[-1], ans[-1], 0.02, info = links[i]) # unstable
prop <- yes / trials
dat$prop <- prop
SW(
fit2 <- stan_glm(prop ~ x1 + x2 + x3, data = dat, weights = trials, family = fam,
seed = SEED, refresh = 0, prior = NULL, prior_intercept = NULL,
tol_rel_obj = .Machine$double.eps, algorithm = "optimizing")
)
expect_stanreg(fit2)
val2 <- coef(fit2)
if (links[i] != "log") expect_equal(val2, ans, 0.02, info = links[i])
else expect_equal(val2[-1], ans[-1], 0.02, info = links[i])
}
})
test_that("model with hs prior doesn't error", {
SW(fit <- stan_glm(mpg ~ ., data = mtcars, prior = hs(4, 2, .5),
seed = SEED, algorithm = "meanfield", QR = TRUE, refresh = 0))
expect_output(print(prior_summary(fit)), "~ hs(df = ", fixed = TRUE)
})
test_that("model with hs_plus prior doesn't error", {
# this works except on 32bit Windows
skip_on_os("windows")
SW(fit <- stan_glm(mpg ~ ., data = mtcars, prior = hs_plus(4, 1, 2, .5),
seed = SEED, algorithm = "meanfield", QR = TRUE))
expect_output(print(prior_summary(fit)), "~ hs_plus(df1 = ", fixed = TRUE)
})
test_that("model with laplace prior doesn't error", {
SW(fit <- stan_glm(mpg ~ ., data = mtcars, prior = laplace(),
seed = SEED, algorithm = "meanfield", refresh = 0))
expect_output(print(prior_summary(fit)),
"~ laplace(", fixed = TRUE)
})
test_that("model with lasso prior doesn't error", {
SW(fit <- stan_glm(mpg ~ ., data = mtcars, prior = lasso(),
seed = SEED, algorithm = "meanfield", refresh = 0))
expect_output(print(prior_summary(fit)),
"~ lasso(", fixed = TRUE)
})
test_that("model with product_normal prior doesn't error", {
SW(fit <- stan_glm(mpg ~ ., data = mtcars,
prior = product_normal(df = 3, scale = 0.5),
seed = SEED, algorithm = "meanfield", refresh = 0))
expect_output(print(prior_summary(fit)), "~ product_normal(df = ", fixed = TRUE)
})
test_that("prior_aux argument is detected properly", {
SW(fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 10, chains = 1, seed = SEED,
refresh = 0, prior_aux = exponential(5),
prior = normal(autoscale=FALSE),
prior_intercept = normal(autoscale=FALSE)))
expect_identical(
fit$prior.info$prior_aux,
list(dist = "exponential",
location = NULL, scale = NULL,
adjusted_scale = NULL, #1/5 * sd(mtcars$mpg),
df = NULL, rate = 5,
aux_name = "sigma")
)
expect_output(print(prior_summary(fit)),
"~ exponential(rate = ", fixed = TRUE)
})
test_that("prior_aux can be NULL", {
SW(fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 10, chains = 1, seed = SEED,
refresh = 0, prior_aux = NULL))
expect_output(print(prior_summary(fit)),
"~ flat", fixed = TRUE)
})
test_that("autoscale works (insofar as it's reported by prior_summary)", {
SW(fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 5,
prior = normal(autoscale=FALSE),
prior_intercept = normal(autoscale=FALSE),
prior_aux = cauchy(autoscale=FALSE)))
out <- capture.output(print(prior_summary(fit)))
expect_false(any(grepl("adjusted", out)))
SW(fit2 <- update(fit, prior = normal(autoscale=TRUE)))
out <- capture.output(print(prior_summary(fit2)))
expect_true(any(grepl("Adjusted", out)))
})
test_that("prior_options is deprecated", {
expect_warning(
ops <- prior_options(scaled = FALSE, prior_scale_for_dispersion = 3),
"deprecated and will be removed"
)
expect_warning(
capture.output(fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 5, prior_ops = ops)),
"Setting prior scale for aux to value specified in 'prior_options'"
)
expect_output(
print(prior_summary(fit)),
"~ exponential(rate = 0.33)",
fixed = TRUE
)
})
test_that("empty interaction levels dropped", {
x1 <- gl(3, 5, 100)
x2 <- gl(4, 6, 100)
x1[x2 == 1] <- 1
x1[x2 == 2] <- 1
y <- rnorm(100)
expect_warning(stan_glm(y ~ x1*x2, chains = 1, iter = 20, refresh = 0),
regexp = "Dropped empty interaction levels")
})
test_that("posterior_predict compatible with glms", {
check_for_pp_errors(fit_gaus)
expect_linpred_equal(fit_gaus)
mtcars2 <- mtcars
mtcars2$offs <- runif(nrow(mtcars))
SW(fit2 <- stan_glm(mpg ~ wt + offset(offs), data = mtcars2,
prior_intercept = NULL, prior = NULL, prior_aux = NULL,
iter = ITER, chains = CHAINS, seed = SEED, refresh = 0))
expect_warning(posterior_predict(fit2, newdata = mtcars2[1:5, ]),
"offset")
check_for_pp_errors(fit_gaus, data = mtcars2, offset = mtcars2$offs)
check_for_pp_errors(fit2, data = mtcars2, offset = mtcars2$offs)
expect_linpred_equal(fit_gaus)
# expect_linpred_equal(fit2)
check_for_pp_errors(fit_pois)
check_for_pp_errors(fit_negbin)
expect_linpred_equal(fit_pois)
expect_linpred_equal(fit_negbin)
check_for_pp_errors(fit_gamma)
check_for_pp_errors(fit_igaus)
expect_linpred_equal(fit_gamma)
expect_linpred_equal(fit_igaus)
})
test_that("contrasts attribute isn't dropped", {
contrasts <- list(wool = "contr.sum", tension = "contr.sum")
SW(fit <- stan_glm(breaks ~ wool * tension, data = warpbreaks,
contrasts = contrasts,
chains = 1, refresh = 0))
expect_equal(fit$contrasts, contrasts)
})
test_that("QR recommended if VB and at least 2 predictors", {
expect_message(
SW(stan_glm(mpg ~ wt + cyl, data = mtcars, algorithm = "meanfield", refresh = 0)),
"Setting 'QR' to TRUE can often be helpful when using one of the variational inference algorithms"
)
# no message if QR already specified
expect_message(
SW(stan_glm(mpg ~ wt + cyl, data = mtcars, algorithm = "meanfield", refresh = 0, QR = TRUE)),
NA
)
# no message if only 1 predictor
expect_message(
SW(stan_glm(mpg ~ wt, data = mtcars, algorithm = "meanfield", refresh = 0)),
NA
)
})
test_that("QR errors if only 1 predictor", {
expect_error(
stan_glm(mpg ~ wt, data = mtcars, QR = TRUE),
"can only be specified when there are multiple predictors"
)
})
test_that("returns something with collinear predictors", {
N <- 100
y <- rnorm(N)
z <- sample(c(0,1), N, replace=TRUE)
x1 <- rnorm(N)
x2 <- 2*x1
fit_1 <- stan_glm(
y ~ z * (x1 + x2),
data = data.frame(y, z, x1, x2),
prior = normal(location = 0, scale = 0.1),
prior_intercept = normal(location = 0, scale = 0.1),
chains = CHAINS, iter = ITER, refresh = REFRESH
)
expect_stanreg(fit_1)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.