knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
Ben bolker, one of the main creators of the glmmTMB
R package, wrote this:
https://ms.mcmaster.ca/~bolker/emdbook/book.pdf
I also think you should own Advanced Statistics with Application in R by Demidenko
Links:
Illustrate how the Negative Binomial distribution
Conduct a simulation study with a unique set of conditions and compute the Type I error and Power. Remember to use the save()
and load()
functions. Use the R code provided below.
library(SPR) library(MASS) N = 1e5 #this should be divisible by however many groups you use! number.groups <- 2 number.timepoints <- 1 set.seed(2012021) dat <- data.frame( 'USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints), 'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints), #'Y_nb' = rep(NA, N*number.timepoints), #'Y_pois' = rep(NA, N*number.timepoints), #'Bio' = rep(rnorm(N, mean = 0, sd = 1), number.timepoints), 'Time' = rep(paste0('Time_', 1:number.timepoints), each = N), stringsAsFactors=F) # Design Matrix X <- model.matrix( ~ Group , data = dat) Beta <- matrix(0, nrow = ncol(X), dimnames=list(colnames(X), 'param')) Beta[] <- c(0.2, 1) # Parameters: mu <- exp(X %*% Beta) dat$mu <- as.vector(mu) theta <- 1 # dispersion parameter dat$theta <- as.vector(theta) upsilon <- mu + (mu^2)/theta dat$upsilon <- as.vector(upsilon) prob <- theta/(theta + mu) dat$Y_nb <- rnbinom(n = N, size = theta, mu = mu) hist.nb <- hist(dat$Y_nb, plot = F) hist.nb$counts <- 100*hist.nb$counts/N plot(hist.nb, ylim = c(0, 100), ylab = 'Percentage', col = 'grey', main = 'Negative Binomial') dat$Y_pois <- rpois(n = N, lambda = mu) hist.pois <- hist(dat$Y_pois, plot = F) hist.pois$counts <- 100*hist.pois$counts/N plot(hist.pois, ylim = c(0, 100), ylab = 'Percentage', col = 'grey', main = 'Poisson') # Poisson: # Mean aggregate(Y_pois ~ Group, FUN = mean, data = dat) aggregate(mu ~ Group, FUN = mean, data = dat) # Variance aggregate(Y_pois ~ Group, FUN = var, data = dat) # Negative Binomial: # Mean: aggregate(Y_nb ~ Group, FUN = mean, data = dat) aggregate(mu ~ Group, FUN = mean, data = dat) # Variance: aggregate(Y_nb ~ Group, FUN = var, data = dat) aggregate(upsilon ~ Group, FUN = mean, data = dat) # Fit Models: mod.pois <- glm(Y_pois ~ Group, data = dat, family = 'poisson') summary(mod.pois) mod.nb <- MASS::glm.nb(Y_nb ~ Group, data = dat) summary(mod.nb) # Fit Models using glmmTMB R package: # Poisson mp <- glmmTMB(Y_pois ~ Group, family=poisson, data=dat) summary(mp) # NB mnp <- glmmTMB(Y_nb ~ Group, family=nbinom2, data=dat) summary(mnp)
This R code helps to illustrate the relationship between the two distributions.
library(SPR) library(MASS) library(ggplot2) library(gridExtra) N = 1e5 #this should be divisible by however many groups you use! number.groups <- 2 number.timepoints <- 1 set.seed(4202021) dat <- data.frame( 'USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints), 'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints), #'Y_nb' = rep(NA, N*number.timepoints), #'Y_pois' = rep(NA, N*number.timepoints), #'Bio' = rep(rnorm(N, mean = 0, sd = 1), number.timepoints), 'Time' = rep(paste0('Time_', 1:number.timepoints), each = N), stringsAsFactors=F) # Design Matrix X <- model.matrix( ~ Group , data = dat) Beta <- matrix(0, nrow = ncol(X), dimnames=list(colnames(X), 'param')) Beta[] <- c(0.2, 1) # Parameters: mu <- exp(X %*% Beta) dat$mu <- as.vector(mu) for (theta in c(1000, 100, 10, 1, 0.1)) { #theta <- 1000# dispersion parameter dat$theta <- as.vector(theta) upsilon <- mu + (mu^2)/theta dat$upsilon <- as.vector(upsilon) prob <- theta/(theta + mu) # Generate Data: dat$Y_nb <- rnbinom(n = N, size = theta, mu = mu) dat$Y_pois <- rpois(n = N, lambda = mu) p1 <- ggplot2::ggplot(dat, aes(x = Y_nb)) + geom_bar(aes(y = 100*(..count..)/sum(..count..))) + labs(subtitle = paste0('Theta = ', theta), title='Negative Binomial', x ="Value", y = "Percent") + ylim(c(0, 25)) + theme_minimal() + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) p2 <- ggplot2::ggplot(dat, aes(x = Y_pois)) + geom_bar(aes(y = 100*(..count..)/sum(..count..))) + labs(subtitle = paste0('Theta = ', theta), title='Poisson', x ="Value", y = "Percent") + ylim(c(0, 50)) + theme_minimal() + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) gridExtra::grid.arrange(p1, p2, ncol = 2) # Poisson: # Mean aggregate(Y_pois ~ Group, FUN = mean, data = dat) aggregate(mu ~ Group, FUN = mean, data = dat) # Variance aggregate(Y_pois ~ Group, FUN = var, data = dat) # Negative Binomial: # Mean: aggregate(Y_nb ~ Group, FUN = mean, data = dat) aggregate(mu ~ Group, FUN = mean, data = dat) # Variance: aggregate(Y_nb ~ Group, FUN = var, data = dat) aggregate(upsilon ~ Group, FUN = mean, data = dat) }
Custom estimation code helps to understand how the model is estimated.
# Custom written estimation routine: Neg_Binom_loglike <- function(vP, dat, Y){ # the 'par' (parameters to estimate) are only passed as a vector # re-create what you need from that vector Beta <- vP[c('b0', 'b1')] Beta <- matrix(Beta, ncol = 1) size <- vP['theta'] X <- model.matrix( ~ Group , data = dat) Y <- dat[ , Y] # Parameters: mu <- exp(X %*% Beta) prob <- size/(size + mu) loglike <- lgamma(Y + size) - lgamma(size) - lgamma(Y + 1) + size*log(prob) + (Y)*log(1-prob) loglike <- -1*loglike loglike <- sum(loglike) return(loglike) } # optimize Beta; theta # Gen param vP <- c('b0' = 0.2, 'b1' = 1, 'theta' = 1) out <- optim(par = vP, fn = Neg_Binom_loglike, method = 'BFGS', dat = dat, Y = 'Y_nb') # Compare Generating Parameter to R package estimate and the custom code estimate cbind('Gen Param' = c(as.vector(Beta), theta), 'Custom' = out$par, 'R package' = c(coef(mod.nb), mod.nb$theta)) # Custom written estimation routine: Poisson_loglike <- function(vP, dat, Y){ # the 'par' (parameters to estimate) are only passed as a vector # re-create what you need from that vector Beta <- vP[c('b0', 'b1')] Beta <- matrix(Beta, ncol = 1) X <- model.matrix( ~ Group , data = dat) Y <- dat[ , Y] # Parameters: mu <- exp(X %*% Beta) loglike <- Y * log(mu) - mu - lgamma(Y + 1) loglike <- -1*loglike loglike <- sum(loglike) return(loglike) } # optimize vP <- c('b0' = 0, 'b1' = 0.5) start <- Sys.time() out <- optim(par = vP, fn = Poisson_loglike, method = 'BFGS', dat = dat, Y = 'Y_pois') end <- Sys.time() end - start # Compare Generating Parameter to R package estimate and the custom code estimate cbind('**Gen Param**' = c(as.vector(Beta)), '**Custom**' = out$par, '**R package**' = c(coef(mod.pois)))
Conduct a simulation study with a unique set of conditions and compute the Type I error and Power. Remember to use the save()
and load()
functions. Use the R code provided below.
library(MASS) N = 50 #this should be divisible by however many groups you use! number.groups <- 2 number.timepoints <- 1 set.seed(2012021) dat <- data.frame( 'USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints), 'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints), 'Time' = rep(paste0('Time_', 1:number.timepoints), each = N), stringsAsFactors=F) # Design Matrix X <- model.matrix( ~ Group , data = dat) Beta <- matrix(0, nrow = ncol(X), dimnames=list(colnames(X), 'param')) #Beta[] <- c(0.2, 0) # Type I error Beta[] <- c(0.2, 1) # Power # Parameters: mu <- exp(X %*% Beta) dat$mu <- as.vector(mu) theta <- 0.5 # dispersion parameter dat$theta <- as.vector(theta) upsilon <- mu + (mu^2)/theta dat$upsilon <- as.vector(upsilon) ####### # Simulation out <- vector() for(repl in 1:1000){ # Generate Data: dat$Y_nb <- rnbinom(n = N, size = theta, mu = mu) #dat$Y_pois <- rpois(n = N, lambda = mu) # Fit Models -both Poisson and NB to data that is NB mod.pois <- glm(Y_nb ~ Group, data = dat, family = 'poisson') mod.nb <- MASS::glm.nb(Y_nb ~ Group, data = dat) out <- rbind(out, c( summary(mod.pois)$coef['GroupGroup_2', 'Pr(>|z|)'], summary(mod.nb)$coef['GroupGroup_2', 'Pr(>|z|)'] )) cat(paste0('Replication: ', repl, '\n')) } colMeans(out < 0.05)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.