ciTools_survreg_vignette.R
In ciTools: Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

## ---- message = FALSE---------------------------------------------------------
library(dplyr)
library(ggplot2)
library(knitr)
library(ciTools)
library(here)
set.seed(20180925)

## ----include = FALSE, cache = TRUE--------------------------------------------
library(SPREDA)
car <- rep(c("suv", "sedan"), 25)
temp <- seq(40,100, length.out = 50)
time <- exp(0.33 + 0.2 * ifelse(car == "sedan", 0, 1) + 0.08 * temp + rsev(50))
time <- ifelse(time < 2000, time, 2000)
failure <- ifelse(time < 2000, 1, 0)
dat <- data.frame(temp = temp,
                  car = car,
                  time = time,
                  failure = failure)

## ----cache = TRUE-------------------------------------------------------------
kable(head(dat))

## ----cache = TRUE, fig.width = 5, fig.height= 5, warning = FALSE--------------
ggplot(dat, aes(x = temp, y = time)) +
    geom_point(aes(color = factor(failure)))+
    ggtitle("Censored obs. in red") +
    theme_bw()

## ----cache = TRUE-------------------------------------------------------------
(fit <- survreg(Surv(time, failure) ~ temp + car, data = dat)) ## weibull dist is default

## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE-------------
with_ints <- ciTools::add_ci(dat,fit, names = c("lcb", "ucb")) %>%
    ciTools::add_pi(fit, names = c("lpb", "upb"))

## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE-------------
kable(head(with_ints))

## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE-------------
ggplot(with_ints, aes(x = temp, y = time)) +
    geom_point(aes(color = car)) +
    facet_wrap(~car)+
    theme_bw() +
    ggtitle("Model fit with 95% CIs and PIs",
            "solid line = mean, dotted line = median") +
    geom_line(aes(y = mean_pred), linetype = 1) +
    geom_line(aes(y = median_pred), linetype = 2) +
    geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.5) +
    geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.1)

## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE-------------
probs <- ciTools::add_probs(dat, fit, q = 500,
                            name = c("prob", "lcb", "ucb"),
                            comparison = ">")

## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE-------------
ggplot(probs, aes(x = temp, y = prob)) +
    ggtitle("Estimated prob. of avg. spring lasting longer than 500 hrs.") +
    ylim(c(0,1)) +
    facet_wrap(~car)+
    theme_bw() +
    geom_line(aes(y = prob)) +
    geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.5)

## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE-------------
quants <- ciTools::add_quantile(dat, fit, p = 0.90,
                                name = c("quant", "lcb", "ucb"))

## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE-------------
ggplot(quants, aes(x = temp, y = time)) +
    geom_point(aes(color = car)) +
    ggtitle("Estimated 90th percentile of condtional failure distribution, with CI") +
    facet_wrap(~car)+
    theme_bw() +
    geom_line(aes(y = quant)) +
    geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.5)

## ----include=FALSE, cache= TRUE-----------------------------------------------
path <- "./survreg_data"
exp0 <- read.csv(paste0(path,"/","exponential_sim_expect_ci_censoredp00.csv"))
exp1 <- read.csv(paste0(path,"/","exponential_sim_expect_ci_censoredp03.csv"))
exp2 <- read.csv(paste0(path,"/","exponential_sim_expect_ci_censoredp05.csv"))
exp_dat <- rbind(exp0, exp1, exp2) %>%
    mutate(distr = "Exponential")
loglog0 <- read.csv(paste0(path,"/",
                           "loglogistic_sim_expect_ci_scale025_censoredp00.csv"))
loglog1 <- read.csv(paste0(path,"/",
                           "loglogistic_sim_expect_ci_scale025_censoredp03.csv"))
loglog2 <- read.csv(paste0(path,"/",
                           "loglogistic_sim_expect_ci_scale025_censoredp05.csv"))
loglog_dat <- rbind(loglog0, loglog1, loglog2) %>%
    mutate(distr = "Loglogistic") %>%
    dplyr::select(-c(scale))
lognorm0 <- read.csv(paste0(path,"/","lognormal_sim_expect_ci_scale2_censoredp00.csv"))
lognorm1 <- read.csv(paste0(path,"/","lognormal_sim_expect_ci_scale2_censoredp03.csv"))
lognorm2 <- read.csv(paste0(path,"/","lognormal_sim_expect_ci_scale2_censoredp05.csv"))
lognorm_dat <- rbind(lognorm0, lognorm1, lognorm2) %>%
    mutate(distr = "Lognormal") %>%
    dplyr::select(-c(scale))
weibull0 <- read.csv(paste0(path,"/","weibull_sim_expect_ci_scale2_censoredp00.csv"))
weibull1 <- read.csv(paste0(path,"/","weibull_sim_expect_ci_scale2_censoredp03.csv"))
weibull2 <- read.csv(paste0(path,"/","weibull_sim_expect_ci_scale2_censoredp05.csv"))
weibull_dat <- rbind(weibull0, weibull1, weibull2) %>%
    mutate(distr = "Weibull") %>%
    dplyr::select(-c(scale))
ci_dat <- rbind(exp_dat, loglog_dat, lognorm_dat, weibull_dat)

## ----fig.width=8, fig.height=8, echo = FALSE, cache= TRUE---------------------
ggplot(filter(ci_dat), aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se,
                      ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("C.I. Simulation, by Distribution and Censoring",
            subtitle = "Coverage Probabilities (zoomed in)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) +
    facet_grid(distr~censoredp)


## ----fig.width=8, fig.height=8, echo=FALSE, cache= TRUE-----------------------
ggplot(filter(ci_dat), aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    ylim(0,1) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se,
                      ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("C.I. Simulation, by Distribution and Censoring",
            subtitle = "Coverage Probabilities (zoomed out)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) +
    facet_grid(distr~censoredp)

## ----fig.width=8, fig.height=8, echo = FALSE, cache= TRUE---------------------
ggplot(filter(ci_dat), aes(x=sample_size, y=int_width)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=int_width - 2*int_width_se,
                      ymax = int_width + 2*int_width_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("C.I Simulation, by Distribution and Censoring",
            subtitle = "Interval Widths") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.0), colour="blue", linetype="dashed", size = 1) +
    facet_grid(distr~censoredp)

## ---- include = FALSE, cache= TRUE, warning = FALSE---------------------------
expp0 <- read.csv(paste0(path,"/","exponential_sim_prob_ci_censoredp00.csv"))
expp1 <- read.csv(paste0(path,"/","exponential_sim_prob_ci_censoredp03.csv"))
expp2 <- read.csv(paste0(path,"/","exponential_sim_prob_ci_censoredp05.csv"))
expp_dat <- rbind(expp0, expp1, expp2) %>%
    mutate(distr = "Exponential")
loglogp0 <- read.csv(paste0(path,"/",
                            "loglogistic_sim_prob_ci_scale025_censoredp00.csv"))
loglogp1 <- read.csv(paste0(path,"/",
                            "loglogistic_sim_prob_ci_scale025_censoredp03.csv"))
loglogp2 <- read.csv(paste0(path,"/",
                            "loglogistic_sim_prob_ci_scale025_censoredp05.csv"))
loglogp_dat <- rbind(loglogp0, loglogp1, loglogp2) %>%
    mutate(distr = "Loglogisic") %>%
    dplyr::select(-c(scale))
lognormp0 <- read.csv(paste0(path,"/","lognormal_sim_prob_ci_scale2_censoredp00.csv"))
lognormp1 <- read.csv(paste0(path,"/","lognormal_sim_prob_ci_scale2_censoredp03.csv"))
lognormp2 <- read.csv(paste0(path,"/","lognormal_sim_prob_ci_scale2_censoredp05.csv"))
lognormp_dat <- rbind(lognormp0, lognormp1, lognormp2) %>%
    mutate(distr = "Lognormal") %>%
    dplyr::select(-c(scale))
weibullp0 <- read.csv(paste0(path,"/","weibull_sim_prob_ci_scale2_censoredp00.csv"))
weibullp1 <- read.csv(paste0(path,"/","weibull_sim_prob_ci_scale2_censoredp03.csv"))
weibullp2 <- read.csv(paste0(path,"/","weibull_sim_prob_ci_scale2_censoredp05.csv"))
weibullp_dat <- rbind(weibullp0, weibullp1, weibullp2) %>%
    mutate(distr = "Weibull") %>%
    dplyr::select(-c(scale))
prob_dat <- rbind(expp_dat, loglogp_dat, lognormp_dat, weibullp_dat)

## ---- fig.width=8, fig.height=8, echo = FALSE, cache= TRUE, warning = FALSE----
ggplot(filter(prob_dat), aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se,
                      ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("C.I. for Survival Probability Simulation, by Distribution and Censoring",
            subtitle = "Coverage Probabilities (zoomed in)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) +
    facet_grid(distr~censoredp)

## ---- fig.width=8, fig.height=8, echo = FALSE, cache= TRUE, warning = FALSE----
ggplot(filter(prob_dat), aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    ylim(0,1) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se,
                      ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Survival Probability Simulation, by Distribution and Censoring",
            subtitle = "Coverage Probabilities (zoomed out)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) +
    facet_grid(distr~censoredp)

## ----fig.width=8, fig.height=8, echo = FALSE, cache= TRUE, warning = FALSE----
ggplot(filter(prob_dat), aes(x=sample_size, y=int_width)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=int_width - 2*int_width_se,
                      ymax = int_width + 2*int_width_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("Survival Probability Simulation, by Distribution and Censoring",
            subtitle = "Interval Widths") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.0), colour="blue", linetype="dashed", size = 1) +
    facet_grid(distr~censoredp)

## ----include=FALSE, cache= TRUE-----------------------------------------------
exppi0 <- read.csv(paste0(path,"/","exponential_sim_pi_censoredp00.csv"))
exppi1 <- read.csv(paste0(path,"/","exponential_sim_pi_censoredp03.csv"))
exppi2 <- read.csv(paste0(path,"/","exponential_sim_pi_censoredp05.csv"))
exppi_dat <- rbind(exppi0, exppi1, exppi2) %>%
    mutate(distr = "Exponential")
loglogpi0 <- read.csv(paste0(path,"/",
                             "loglogistic_sim_pi_scale025_censoredp00.csv"))
loglogpi1 <- read.csv(paste0(path,"/",
                             "loglogistic_sim_pi_scale025_censoredp03.csv"))
loglogpi2 <- read.csv(paste0(path,"/",
                             "loglogistic_sim_pi_scale025_censoredp05.csv"))
loglogpi_dat <- rbind(loglogpi0, loglogpi1, loglogpi2) %>%
    mutate(distr = "Loglogistic") %>%
    dplyr::select(-c(scale))
lognormpi0 <- read.csv(paste0(path,"/","lognormal_sim_pi_scale2_censoredp00.csv"))
lognormpi1 <- read.csv(paste0(path,"/","lognormal_sim_pi_scale2_censoredp03.csv"))
lognormpi2 <- read.csv(paste0(path,"/","lognormal_sim_pi_scale2_censoredp05.csv"))
lognormpi_dat <- rbind(lognormpi0, lognormpi1, lognormpi2) %>%
    mutate(distr = "Lognormal") %>%
    dplyr::select(-c(scale))
weibullpi0 <- read.csv(paste0(path,"/","weibull_sim_pi_scale2_censoredp00.csv"))
weibullpi1 <- read.csv(paste0(path,"/","weibull_sim_pi_scale2_censoredp03.csv"))
weibullpi2 <- read.csv(paste0(path,"/","weibull_sim_pi_scale2_censoredp05.csv"))
weibullpi_dat <- rbind(weibullpi0, weibullpi1, weibullpi2) %>%
    mutate(distr = "Weibull") %>%
    dplyr::select(-c(scale))
pi_dat <- rbind(exppi_dat, loglogpi_dat, lognormpi_dat, weibullpi_dat)

## ----fig.width=8, fig.height=8, echo=FALSE, cache= TRUE-----------------------
ggplot(filter(pi_dat), aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_line(aes(y = cov_prob_boot), size = 1.5, color = "blue") +
    ## geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se,
    ##                   ymax = cov_prob + 2*cov_prob_se), width=.1) +
    ## geom_errorbar(aes(ymin=cov_prob_boot - 2*cov_prob_boot_se,
    ##                   ymax = cov_prob_boot + 2*cov_prob_boot_se),
    ##               width=.1, color = "blue") +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("P.I. Simulation, by Distribution and Censoring",
            subtitle = "Coverage Probabilities. Naive method (black). Simulation method (blue) ") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) +
    facet_grid(distr~censoredp)

## ----fig.width=8, fig.height=8, echo = FALSE, cache= TRUE---------------------
ggplot(filter(pi_dat), aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    ylim(0,1) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se,
                      ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("P.I. Simulation, by Distribution and Censoring",
            subtitle = "Coverage Probabilities (zoomed out)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) +
    facet_grid(distr~censoredp)

## ----fig.width=8, fig.height=5, echo = FALSE, cache= TRUE---------------------
ggplot(filter(pi_dat, distr == "Exponential"), aes(x=sample_size, y=int_width)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_line(aes(y = int_width_boot), color = "blue", size = 1.5) +
    ## geom_errorbar(aes(ymin=int_width - 2*int_width_se,
    ##                       ymax = int_width + 2*int_width_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("P.I Simulation, Exponential Distribution",
            subtitle = "Interval Widths. naive method (black), simulation method (blue)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    facet_wrap(~censoredp)

## ----fig.width=8, fig.height=5, echo = FALSE, cache= TRUE---------------------
ggplot(filter(pi_dat, distr == "Loglogistic"), aes(x=sample_size, y=int_width)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_line(aes(y = int_width_boot), color = "blue", size = 1.5) +
    ## geom_errorbar(aes(ymin=int_width - 2*int_width_se,
    ##                   ymax = int_width + 2*int_width_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("P.I Simulation, Loglogistic Distribution",
            subtitle = "Interval Widths. naive method (black), simulation method (blue)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    facet_wrap(~censoredp)

## ----fig.width=8, fig.height=5, echo = FALSE, cache= TRUE---------------------
ggplot(filter(pi_dat, distr == "Lognormal"), aes(x=sample_size, y=int_width)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_line(aes(y = int_width_boot), color = "blue", size = 1.5) +
    ## geom_errorbar(aes(ymin=int_width - 2*int_width_se,
    ##                   ymax = int_width + 2*int_width_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("P.I Simulation, Lognormal Distribution",
            subtitle = "Interval Widths. naive method (black), simulation method (blue)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    facet_wrap(~censoredp)

## ----fig.width=8, fig.height=5, echo = FALSE, cache= TRUE---------------------
ggplot(filter(pi_dat, distr == "Weibull"), aes(x=sample_size, y=int_width)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_line(aes(y = int_width_boot), color = "blue", size = 1.5) +
    ## geom_errorbar(aes(ymin=int_width - 2*int_width_se,
    ##                   ymax = int_width + 2*int_width_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("P.I Simulation, Weibull Distribution",
            subtitle = "Interval Widths. naive method (black), simulation method (blue)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    facet_wrap(~censoredp)

## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
ciTools documentation built on Jan. 13, 2021, 7 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ciTools
Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

inst/doc/ciTools_survreg_vignette.R
In ciTools: Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

Try the ciTools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ciTools Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

inst/doc/ciTools_survreg_vignette.R In ciTools: Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

Try the ciTools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ciTools
Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models

inst/doc/ciTools_survreg_vignette.R
In ciTools: Confidence or Prediction Intervals, Quantiles, and Probabilities for Statistical Models