Nothing
## Simulation infrastructure -----------------------------------------------------------------------
## data generating process (dgp)
dgp <- function(nid = 100L, nround = 5L,
coef = c(0, 0.85, 0.5, 0.7), rho = 0.5, xrho = 0.5,
dist = "gaussian", type = "copula", link = NULL, ...)
{
## match distribution and type of correlation
dist <- match.arg(dist, c("gaussian", "poisson", "zeroinfl", "hurdle", "betareg", "binomial(logit)", "zerotrunc"))
type <- match.arg(type, c("copula", "copula-ar1", "ranef"))
## sample size
n <- nid * nround
## experimental design variables
d <- data.frame(
id = rep(1L:nid, each = nround),
round = rep(1L:nround, nid)
)
## subject covariates plus random effect
make_x <- function(corr) {
rnorm(nid, mean = 0, sd = sqrt(xrho))[d$id] +
rnorm(n, mean = 0, sd = sqrt(1 - xrho))
}
d$x1 <- make_x(corr)
d$x2 <- rnorm(nid, mean = 0, sd = 1)[d$id]
d$x3 <- rnorm(n, mean = 0, sd = 1)
d$ranef <- if(type == "ranef") rnorm(nid, mean = 0, sd = sqrt(rho/(1 - rho)))[d$id] else 0
## draw from a normal copula
if(type == "copula") {
nc <- copula::normalCopula(rho, dim = nround)
rcopula <- function(n) {
rval <- copula::rCopula(n/nround, nc)
as.vector(t(rval))
}
}
## draw from a normal copula with AR(1) structure
if(type == "copula-ar1") {
nc <- copula::normalCopula(rho, dim = nround, dispstr = "ar1")
rcopula <- function(n) {
rval <- copula::rCopula(n/nround, nc)
as.vector(t(rval))
}
}
## response distribution and link function
if(is.character(dist)) {
switch(dist,
"gaussian" = {
dist <- if(type == "ranef") {
function(n, mu, ...) rnorm(n, mean = mu, ...)
} else {
function(n, mu, ...) qnorm(rcopula(n), mean = mu, ...)
}
if(is.null(link)) link <- "identity"
},
"poisson" = {
dist <- if(type == "ranef") {
function(n, mu, ...) rpois(n, lambda = mu)
} else {
function(n, mu, ...) qpois(rcopula(n), lambda = mu)
}
if(is.null(link)) link <- "log"
},
"zeroinfl" = {
dist <- if(type == "ranef") {
function(n, mu, ...) countreg::rzipois(n, lambda = mu, pi = 0.3)
} else {
function(n, mu, ...) countreg::qzipois(rcopula(n), lambda = mu, pi = 0.3)
}
if(is.null(link)) link <- "log"
},
"hurdle" = {
dist <- if(type == "ranef") {
function(n, mu, ...) countreg::rhpois(n, lambda = mu, pi = 0.7)
} else {
function(n, mu, ...) countreg::qhpois(rcopula(n), lambda = mu, pi = 0.7)
}
if(is.null(link)) link <- "log"
},
"betareg" = {
dist <- if(type == "ranef") {
function(n, mu, ...) rbeta(n, shape1 = mu * 10, shape2 = (1 - mu) * 10)
} else {
function(n, mu, ...) qbeta(rcopula(n), shape1 = mu * 10, shape2 = (1 - mu) * 10)
}
if(is.null(link)) link <- "logit"
},
"binomial(logit)" = {
dist <- if(type == "ranef") {
function(n, mu, ...) rbinom(n, size = 1, prob = mu)
} else {
function(n, mu, ...) qbinom(rcopula(n), size = 1, prob = mu)
}
if(is.null(link)) link <- "logit"
},
"zerotrunc" = {
dist <- if(type == "ranef") {
function(n, mu, ...) countreg::rztpois(n, lambda = mu)
} else {
function(n, mu, ...) countreg::qztpois(rcopula(n), lambda = mu)
}
if(is.null(link)) link <- "log"
}
)}
if(is.null(link)) link <- "identity"
if(is.character(link)) link <- make.link(link)
if(inherits(link, "link-glm")) link <- link$linkinv
## compute linear predictor, expectation mu, and response
d$eta <- coef[1L] + coef[2L] * d$x1 + coef[3L] * d$x2 + coef[4L] * d$x3 + d$ranef
d$mu <- link(d$eta)
d$response <- dist(n, d$mu, ...)
## categorical variables
d <- transform(d,
id = factor(id),
round = factor(round)
)
## store coefficients for future reference
names(coef) <- c("(Intercept)", "x1", "x2", "x3")
attr(d, "coef") <- coef
return(d)
}
## model fitting and covariances
fit <- function(data,
formula = response ~ x1 + x2 + x3,
dist = c("gaussian", "poisson", "zeroinfl", "hurdle", "betareg", "binomial(logit)", "zerotrunc"),
vcov = c("standard", "basic", "HC1", "HC2", "HC3", "CL-0", "CL-1", "CL-2", "CL-3", "fixed", "random", "gee", "PL", "PC", "BS"),
level = 0.95)
{
## assure formula to be in current environment
environment(formula) <- environment(response ~ x1 + x2 + x3)
## response distributions and vcov types
dist <- match.arg(dist)
## pooled model
m <- switch(dist,
"gaussian" = lm(formula, data = data),
"poisson" = glm(formula, data = data, family = poisson),
"zeroinfl" = countreg::zeroinfl(formula, data = data),
"hurdle" = countreg::hurdle(formula, data = data),
"betareg" = betareg::betareg(formula, data = data, phi = FALSE),
"binomial(logit)" = glm(formula, data = data, family = binomial),
"zerotrunc" = countreg::zerotrunc(formula, data = data, dist = "poisson")
)
## fixed effects
if("fixed" %in% vcov) {
formula_fe <- update(formula, . ~ . + id)
m_fe <- switch(dist,
"gaussian" = lm(formula_fe, data = data),
"poisson" = glm(formula_fe, data = data, family = poisson),
"zeroinfl" = countreg::zeroinfl(formula_fe, data = data),
"hurdle" = countreg::hurdle(formula_fe, data = data),
"betareg" = betareg::betareg(formula_fe, data = data, phi = FALSE),
"binomial(logit)" = glm(formula_fe, data, family = binomial),
"zerotrunc" = countreg::zerotrunc(formula_fe, data = data, dist = "poisson")
)
} else {
m_fe <- NULL
}
## random effects
if("random" %in% vcov) {
formula_re <- update(formula, . ~ . + (1 | id))
m_re <- switch(dist,
"gaussian" = lme4::lmer(formula_re, data = data, REML = FALSE),
"poisson" = lme4::glmer(formula_re, data = data, family = poisson),
"zeroinfl" = NULL,
"hurdle" = NULL,
"betareg" = NULL,
"binomial(logit)" = lme4::glmer(formula_re, data = data, family = binomial),
"zerotrunc" = NULL
)
} else {
m_re <- NULL
}
## GEE
if("gee" %in% vcov) {
m_gee <- switch(dist,
"gaussian" = geepack::geeglm(formula, data = data, id = id, corstr = "exchangeable", family = gaussian),
"poisson" = geepack::geeglm(formula, data = data, id = id, corstr = "exchangeable", family = poisson),
"zeroinfl" = NULL,
"hurdle" = NULL,
"betareg" = NULL,
"binomial(logit)" = geepack::geeglm(formula, data = data, id = id, corstr = "exchangeable", family = binomial("logit")),
"zerotrunc" = NULL
)
} else {
m_gee <- NULL
}
## return value: collect coefficients and standard errors
rval <- data.frame(coef = numeric(0), se = numeric(0), par = character(0),
vcov = character(0), stringsAsFactors = FALSE)
if("standard" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcov(m))), par = names(coef(m)),
vcov = "standard", stringsAsFactors = FALSE))
}
if("basic" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(sandwich(m))), par = names(coef(m)),
vcov = "basic", stringsAsFactors = FALSE))
}
if("HC1" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcovHC(m, type = "HC1"))), par = names(coef(m)),
vcov = "HC1", stringsAsFactors = FALSE))
}
if("HC2" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcovHC(m, type = "HC2"))), par = names(coef(m)),
vcov = "HC2", stringsAsFactors = FALSE))
}
if("HC3" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcovHC(m, type = "HC3"))), par = names(coef(m)),
vcov = "HC3", stringsAsFactors = FALSE))
}
if("CL-0" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcovCL(m, cluster = data$id, type = "HC0"))), par = names(coef(m)),
vcov = "CL-0", stringsAsFactors = FALSE))
}
if("CL-1" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcovCL(m, cluster = data$id, type = "HC1"))), par = names(coef(m)),
vcov = "CL-1", stringsAsFactors = FALSE))
}
if("CL-2" %in% vcov & dist != "zeroinfl") {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcovCL(m, cluster = data$id, type = "HC2"))), par = names(coef(m)),
vcov = "CL-2", stringsAsFactors = FALSE))
}
if("CL-3" %in% vcov & dist != "zeroinfl") {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcovCL(m, cluster = data$id, type = "HC3"))), par = names(coef(m)),
vcov = "CL-3", stringsAsFactors = FALSE))
}
if("fixed" %in% vcov) {
k <- length(coef(m))
rval <- rbind(rval, data.frame(
coef = coef(m_fe)[1L:k], se = sqrt(diag(vcov(m_fe)))[1L:k], par = names(coef(m_fe))[1L:k],
vcov = "fixed", stringsAsFactors = FALSE))
}
if("random" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = fixef(m_re), se = sqrt(diag(vcov(m_re))), par = names(fixef(m_re)),
vcov = "random", stringsAsFactors = FALSE))
}
if("gee" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m_gee), se = sqrt(diag(m_gee$geese$vbeta)), par = names(coef(m_gee)),
vcov = "gee", stringsAsFactors = FALSE))
}
if("PL" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcovPL(m, cluster = data$id, lag = "NW1987", adjust = FALSE))), par = names(coef(m)),
vcov = "PL", stringsAsFactors = FALSE))
}
if("PC" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcovPC(m, cluster = data$id, order.by = data$round))), par = names(coef(m)),
vcov = "PC", stringsAsFactors = FALSE))
}
if("BS" %in% vcov) {
rval <- rbind(rval, data.frame(
coef = coef(m), se = sqrt(diag(vcovBS(m, cluster = data$id))), par = names(coef(m)),
vcov = "BS", stringsAsFactors = FALSE))
}
## reorder columns
rownames(rval) <- NULL
rval <- rval[, c(4, 3, 1, 2)]
## FIXME
rval$par <- gsub("count_", "", rval$par, fixed = TRUE)
## further outcomes
cr <- qnorm((1 - level)/2, lower.tail = FALSE)
cf <- attr(data, "coef")[rval$par]
rval$bias <- rval$coef - cf
rval$mad <- abs(rval$coef - cf)
rval$power <- as.numeric(abs(rval$coef/rval$se) > cr)
rval$coverage <- as.numeric(abs(cf - rval$coef)/rval$se < cr)
return(rval)
}
## loop over simulation scenarios
sim <- function(nrep = 1000, nid = 100L, nround = 5L,
dist = "gaussian", rho = 0.5, xrho = 0.5,
coef = c(0, 0.85, 0.5, 0.7), formula = response ~ x1 + x2 + x3,
vcov = c("standard", "basic", "HC1", "HC2", "HC3", "CL-0", "CL-1", "CL-2", "CL-3", "fixed", "random", "gee", "PL", "PC", "BS"),
...,
cores = NULL)
{
## parallelization support
applyfun <- if(is.null(cores)) {
lapply
} else {
function(X, FUN, ...) parallel::mclapply(X, FUN, ..., mc.cores = cores)
}
## all factorial combinations of experimental conditions
par <- expand.grid(nid = nid, nround = nround, dist = dist, rho = rho, xrho = xrho,
stringsAsFactors = FALSE)
## conduct all simulations
rval <- lapply(1L:nrow(par), function(i) {
rvali <- applyfun(1L:nrep, function(j) {
d <- dgp(nid = par$nid[i], nround = par$nround[i], dist = par$dist[i],
rho = par$rho[i], xrho = par$xrho[i], coef = coef, ...)
ff <- formula
try(fit(d, formula = ff, dist = par$dist[i], vcov = vcov))
})
rvali <- rvali[sapply(rvali, class) == "data.frame"]
rvali[[1L]][, -(1L:2L)] <- Reduce("+", lapply(rvali, "[", , -(1:2)))/length(rvali)
rvali <- rvali[[1L]]
rvali$nid <- par$nid[i]
rvali$nround <- par$nround[i]
rvali$dist <- par$dist[i]
rvali$rho <- par$rho[i]
rvali$xrho <- par$xrho[i]
return(rvali)
})
rval <- do.call("rbind", rval)
## turn all experimental condition variables into factors
rval$dist <- factor(rval$dist)
rval$vcov <- factor(rval$vcov)
rval$par <- factor(rval$par)
rval$nid <- factor(rval$nid)
rval$nround <- factor(rval$nround)
rval$rho <- factor(rval$rho)
rval$xrho <- factor(rval$xrho)
return(rval)
}
## Bootstrap for InstInnovation hurdle model -------------------------------------------------------
library("sandwich")
library("pscl")
data("InstInnovation", package = "sandwich")
h_innov <- hurdle(
cites ~ institutions + log(capital/employment) + log(sales),
data = InstInnovation, dist = "negbin")
suppressWarnings(RNGversion("3.5.0"))
set.seed(0)
vc_innov <- list(
"standard" = vcov(h_innov),
"basic" = sandwich(h_innov),
"CL-1" = vcovCL(h_innov, cluster = InstInnovation$company),
"boot" = vcovBS(h_innov, cluster = InstInnovation$company)
)
## Simulation study --------------------------------------------------------------------------------
library("copula")
library("lme4")
library("geepack")
library("countreg")
library("betareg")
RNGkind(kind = "L'Ecuyer-CMRG")
set.seed(1)
s01 <- sim(nrep = 10000, nid = 100, nround = 5,
dist = "gaussian", rho = seq(0, 0.9, by = 0.1), xrho = 0.25,
coef = c(0, 0.85, 0.5, 0.7), formula = response ~ x1 + x2 + x3,
vcov = c("standard", "basic", "CL-0", "random", "gee", "PC", "PL", "BS"),
type = "copula", cores = 16)
set.seed(2)
s02 <- sim(nrep = 10000, nid = 100, nround = 5,
dist = c("gaussian", "binomial(logit)", "poisson"),
rho = seq(0, 0.9, by = 0.1), xrho = 0.25,
coef = c(0, 0.85, 0, 0), formula = response ~ x1,
vcov = c("standard", "basic", "CL-0", "random", "gee", "PC", "PL", "BS"),
type = "copula", cores = 16)
set.seed(3)
s03 <- sim(nrep = 10000, nid = 100, nround = 5,
dist = c("zerotrunc", "zeroinfl", "betareg"),
rho = seq(0, 0.9, by = 0.1), xrho = 0.25,
coef = c(0, 0.85, 0, 0), formula = response ~ x1,
vcov = c("standard", "basic", "CL-0"), ## BS separately below (s33)
type = "copula", cores = 16)
set.seed(4)
s04 <- sim(nrep = 10000, nid = c(10, seq(50, 250, by = 50)), nround = 5,
dist = c("gaussian","poisson", "binomial(logit)"),
rho = 0.25, xrho = 0.25,
coef = c(0, 0.85, 0, 0), formula = response ~ x1,
vcov = c("CL-0", "CL-1", "CL-2", "CL-3", "BS"),
type = "copula", cores = 16)
set.seed(6)
s06 <- sim(nrep = 10000, nround = c(5, 10, 20, 50), nid = 100,
dist = "gaussian", rho = 0.25, xrho = 0.25,
coef = c(0, 0.85, 0.5, 0.7), formula = response ~ x1 + x2 + x3,
vcov = c("CL-0", "PC", "PL"),
type = "copula", cores = 16)
set.seed(7)
s07 <- sim(nrep = 10000, nround = c(5, 10, 20, 50), nid = 100,
dist = "gaussian", rho = 0.25, xrho = 0.25,
coef = c(0, 0.85, 0.5, 0.7), formula = response ~ x1 + x2 + x3,
vcov = c("CL-0", "PC", "PL"),
type = "copula-ar1", cores = 16)
set.seed(8)
s08 <- sim(nrep = 10000, nround = c(5, 10, 20, 50), nid = 100,
dist = c("binomial(logit)", "poisson"), rho = 0.25, xrho = 0.25,
coef = c(0, 0.85, 0.5, 0.7), formula = response ~ x1 + x2 + x3,
vcov = c("CL-0", "PC", "PL"),
type = "copula-ar1", cores = 16)
set.seed(33)
s33 <- sim(nrep = 10000, nid = 100, nround = 5,
dist = c("zerotrunc", "zeroinfl", "betareg"),
rho = seq(0, 0.9, by = 0.1), xrho = 0.25,
coef = c(0, 0.85, 0, 0), formula = response ~ x1,
vcov = "BS",
type = "copula", cores = 16)
s06$copula <- factor(rep.int("copula", nrow(s06)), levels = c("copula-ar1", "copula"), labels = c("AR(1)", "Exchangeable"))
s07$copula <- factor(rep.int("copula-ar1", nrow(s07)), levels = c("copula-ar1", "copula"), labels = c("AR(1)", "Exchangeable"))
s0607 <- rbind(s06, s07)
s03$vcov <- as.character(s03$vcov)
s33$vcov <- as.character(s33$vcov)
s33 <- rbind(s03, s33)
s33$vcov <- factor(s33$vcov)
save(s01, s02, s03, s04, s06, s07, s0607, s08, vc_innov, s33, file = "sim-CL.rda")
## -------------------------------------------------------------------------------------------------
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.