ciTools-glm-vignette.R
In ciTools: Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

## ----message=FALSE------------------------------------------------------------
library(dplyr)
library(ggplot2)
library(ciTools)
library(MASS)
library(arm)
set.seed(20171102)

## -----------------------------------------------------------------------------
x <- rnorm(100, mean = 5)
y <- rbinom(n = 100, size = 1, prob = invlogit(-20 + 4*x))
df <- data.frame(x = x, y = y)
fit <- glm(y ~ x, family = binomial)

## -----------------------------------------------------------------------------
df1 <- add_ci(df, fit, names = c("lwr", "upr"), alpha = 0.1) %>%
    mutate(type = "parametric")

df2 <- add_ci(df, fit, type = "boot", names = c("lwr", "upr"), alpha = 0.1, nSims = 500) %>%
    mutate(type = "bootstrap")

df <- bind_rows(df1, df2)

## ----fig.width = 6, fig.heither = 4, fig.align = "center"---------------------
ggplot(df, aes(x = x, y = y)) +
    ggtitle("Logistic Regression", subtitle = "Model fit (black line) and confidence intervals (gray)") +
    geom_jitter(height = 0.01) +
    geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.4) +
    geom_line(aes(x =x , y = pred), size = 2) +
    facet_grid(~type)

## ----echo = F-----------------------------------------------------------------
df3 <- filter(df, type == "parametric")

df4 <- filter(df, type == "bootstrap") %>%
    rename(lboot = lwr, uboot = upr) %>%
    bind_cols(df3)

## ----fig.width = 6, fig.heither = 4, fig.align = "center", echo = F-----------
ggplot(df4, aes(x = x, y = y)) +
  ggtitle("Logistic Regression") +
  geom_jitter(height = 0.01) +
  geom_ribbon(aes(ymin = lboot, ymax = uboot), alpha = 0.4, fill = "red") +
  geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.4, fill = "royalblue") +
  geom_line(aes(x = x , y = pred...3), size = 2)

## -----------------------------------------------------------------------------
x <- rnorm(100, mean = 0)
y <- rpois(n = 100, lambda = exp(1.5 + 0.5*x))
df <- data.frame(x = x, y = y)
fit <- glm(y ~ x , family = poisson(link = "log"))

## -----------------------------------------------------------------------------
df_ints <- df %>%
  add_ci(fit, names = c("lcb", "ucb"), alpha = 0.1) %>%
  add_pi(fit, names = c("lpb", "upb"), alpha = 0.1, nSims = 20000) %>%
  head()

## ----fig.width = 6, fig.heither = 4, fig.align = "center"---------------------
df_ints %>%
    ggplot(aes(x = x, y = y)) +
    geom_point(size = 2) +
    ggtitle("Poisson Regression", subtitle = "Model fit (black line), with prediction intervals (gray), confidence intervals (dark gray)") +
    geom_line(aes(x = x, y = pred), size = 1.2) +
    geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.4) +
    geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.2)

## -----------------------------------------------------------------------------
df %>%
  add_probs(fit, q = 10) %>%
  add_quantile(fit, p = 0.4) %>%
  head()

## -----------------------------------------------------------------------------
x <- runif(n = 100, min = 0, max = 2)
mu <- exp(1 + x)
y <- rnegbin(n = 100, mu = mu, theta = mu/(5 - 1))

## -----------------------------------------------------------------------------
df <- data.frame(x = x, y = y)
fit <- glm(y ~ x, family = quasipoisson(link = "log"))
summary(fit)$dispersion

## -----------------------------------------------------------------------------
df_ints <- add_ci(df, fit, names = c("lcb", "ucb"), alpha = 0.05) %>%
    add_pi(fit, names = c("lpb", "upb"), alpha = 0.1, nSims = 20000)

## ----fig.width = 6, fig.heither = 4, fig.align="center"-----------------------
ggplot(df_ints, aes(x = x, y = y)) +
    ggtitle("Quasipoisson Regression", subtitle = "Model fit (black line), with Prediction intervals (gray), Confidence intervals (dark gray)") +
    geom_point(size = 2) +
    geom_line(aes(x = x, y = pred), size = 1.2) +
    geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.4) +
    geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.2)

## ----message=FALSE------------------------------------------------------------
pois <- read.csv("pois_pi_results.csv") %>%
  dplyr::select(-total_time) %>%
  rename(nominal_level = level) %>%
  dplyr::select(sample_size, everything())
knitr::kable(pois)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(pois, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    ylim(0, 1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Poisson Regression -- P.I. Simulation", subtitle = "Coverage Probabilities on [0,1] scale") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(pois, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Poisson Regression -- P.I. Simulation", subtitle = "Coverage Probabilities (zoom in)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(pois, aes(x=sample_size, y=int_width)) +
    geom_errorbar(aes(ymin=int_width - 2*int_width_se, ymax = int_width + 2*int_width_se), width=.1) +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("Poisson Regression -- P.I. Simulation", subtitle = "Interval Widths") +
    geom_line(size = 1.5) +
    geom_point(size = 1.5)

## ----message=FALSE------------------------------------------------------------
neg_bin <- read.csv("negbin_pi_results.csv") %>%
  dplyr::select(-total_time) %>%
  rename(nominal_level = level) %>%
  dplyr::select(sample_size, everything())
knitr::kable(neg_bin)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(neg_bin, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    ylim(0, 1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Negative Binomial Regression -- P.I. Simulation", subtitle = "Coverage Probabilities on [0,1] scale") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 150, 200, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(neg_bin, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Negative Binomial Regression -- P.I. Simulation", subtitle = "Coverage Probabilities (zoom in)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 150, 200, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(neg_bin, aes(x=sample_size, y=int_width)) +
    geom_errorbar(aes(ymin=int_width - 2*int_width_se, ymax = int_width + 2*int_width_se), width=.1) +
    scale_x_log10(breaks = c(20, 30, 50, 100, 150, 200, 250, 500, 1000, 2000)) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("Negative Binomial Regression -- P.I. Simulation", subtitle = "Interval Widths") +
    geom_line(size = 1.5) +
    geom_point(size = 1.5)

## ----message=FALSE------------------------------------------------------------
gam <- read.csv("gamma_pi_results.csv") %>%
  dplyr::select(-total_time) %>%
  rename(nominal_level = level) %>%
  dplyr::select(sample_size, everything())

knitr::kable(gam)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(gam, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    ylim(0, 1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Gamma Regression -- P.I. Simulation", subtitle = "Coverage Probabilities on [0,1] scale") +
    scale_x_log10(breaks = c(100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(gam, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Gamma Regression -- P.I. Simulation", subtitle = "Coverage Probabilities (zoom in)") +
    scale_x_log10(breaks = c(100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(gam, aes(x=sample_size, y=int_width)) +
    geom_errorbar(aes(ymin=int_width - 2*int_width_se, ymax = int_width + 2*int_width_se), width=.1) +
    scale_x_log10(breaks = c(100, 250, 500, 1000, 2000)) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("Gamma Regression -- P.I. Simulation", subtitle = "Interval Widths") +
    geom_line(size = 1.5) +
    geom_point(size = 1.5)

## ----message=FALSE------------------------------------------------------------
norm_log <- read.csv("gaussian_pi_loglink_results.csv") %>%
  dplyr::select(-total_time) %>%
  rename(nominal_level = level) %>%
  dplyr::select(sample_size, everything())
knitr::kable(norm_log)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(norm_log, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    ylim(0, 1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Gaussian Regression -- P.I. Simulation", subtitle = "Coverage Probabilities on [0,1] scale") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(norm_log, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Gaussian Regression -- P.I. Simulation", subtitle = "Coverage Probabilities (zoom in)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(norm_log, aes(x=sample_size, y=int_width)) +
    geom_errorbar(aes(ymin=int_width - 2*int_width_se, ymax = int_width + 2*int_width_se), width=.1) +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("Gaussian Regression -- P.I. Simulation", subtitle = "Interval Widths") +
    geom_line(size = 1.5) +
    geom_point(size = 1.5)

## -----------------------------------------------------------------------------
sessionInfo()

Any scripts or data that you put into this service are public.

ciTools documentation built on Jan. 13, 2021, 7 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ciTools
Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

inst/doc/ciTools-glm-vignette.R
In ciTools: Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

Try the ciTools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ciTools Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

inst/doc/ciTools-glm-vignette.R In ciTools: Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

Try the ciTools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ciTools
Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

inst/doc/ciTools-glm-vignette.R
In ciTools: Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models