inst/doc/eesim.R

## ---- echo = FALSE, message = FALSE--------------------------------------
library(eesim)
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)

## ----message = FALSE, echo = FALSE, fig.width = 5, fig.align = "center"----
library(dlnm)
data("chicagoNMMAPS")
chicagoNMMAPS %>% 
  tbl_df() %>% 
  filter(year >= 1996) %>% 
  select(date, cvd, o3) %>% 
  rename(`Cardiovascular deaths` = cvd,
         `Ozone concentration (ppb)` = o3) %>% 
  gather(variable, value, -date) %>% 
  ggplot(aes(x = date, y = value)) + 
  geom_point(alpha = 0.5, size = 1) + 
  geom_smooth(se = FALSE, span = 0.1, method = "loess", color = "red") +
  facet_wrap(~ variable, ncol = 1, scales = "free_y") + 
  theme_classic() + 
  labs(x = "Date", y = "Variable value")

## ----message = FALSE, echo = FALSE, results='hide'-----------------------
chicagoNMMAPS %>% 
  tbl_df() %>% 
  filter(year >= 1996) %>% 
  select(date, cvd, o3) %>% 
  gather(variable, value, -date) %>% 
  group_by(variable) %>% 
  summarize(mean = mean(value, na.rm = TRUE),
            sd = sd(value, na.rm = TRUE))

library(splines)
a <- chicagoNMMAPS %>% 
  tbl_df() %>% 
  filter(year >= 1996) %>% select(date, o3)
mod <- lm(o3 ~ ns(scale(time, scale = FALSE, center = TRUE), 7 * 14), data = chicagoNMMAPS)
sd(residuals(mod))

## ---- warning = FALSE, message = FALSE-----------------------------------
sim_chicago <- create_sims(n_reps = 1, n = 365 * 5, central = 20, sd = 7,
                           exposure_type = "continuous", exposure_trend = "cos1",
                           exposure_amp = -.6, average_outcome = 50,
                           outcome_trend = "cos1", outcome_amp = 0.2, 
                           rr = 1.0005, start.date = "1996-01-01")
head(sim_chicago[[1]])

## ----echo = FALSE, message = FALSE, echo = FALSE, fig.width = 5, fig.align = "center"----
sim_chicago[[1]] %>% 
  rename(Exposure = x,
         Outcome = outcome) %>% 
  gather(variable, value, -date) %>% 
  mutate(variable = factor(variable, levels = c("Outcome", "Exposure"))) %>% 
  ggplot(aes(x = date, y = value)) + 
  geom_point(alpha = 0.5, size = 1) + 
  facet_wrap(~ variable, ncol = 1, scales = "free_y") + 
  theme_classic() + 
  geom_smooth(se = FALSE, method = "loess", color = "red", span = 0.1)

## ----fig.align = "center", fig.width = 8, fig.height = 7.5---------------
a <- calendar_plot(sim_chicago[[1]] %>% select(date, outcome), type = "continuous", 
                   legend_name = "Outcome") + 
  ggtitle("Outcome")
b <- calendar_plot(sim_chicago[[1]] %>% select(date, x), type = "continuous") + 
  ggtitle("Exposure")
grid.arrange(a, b, ncol = 1)

## ----echo = FALSE--------------------------------------------------------
set.seed(16)

## ---- warning = FALSE, message = FALSE-----------------------------------
ex_sim <- eesim(n_reps = 100, n = 365 * 5, central = 20, sd = 7,
                exposure_type = "continuous", exposure_trend = "cos1",
                exposure_amp = -.6, average_outcome = 50,
                outcome_trend = "cos1", outcome_amp = 0.2, 
                rr = 1.2, start.date = "1996-01-01",
                custom_model = spline_mod, custom_model_args = list(df_year = 7))

## ------------------------------------------------------------------------
names(ex_sim)

## ----fig.height = 3, fig.width = 7, fig.align = "center"-----------------
calendar_plot(ex_sim[["simulated_datasets"]][[1]] %>% select(date, x), type = "continuous")

## ------------------------------------------------------------------------
head(ex_sim[["indiv_performance"]])

## ----fig.width = 4, fig.height = 5, fig.align = "center"-----------------
coverage_plot(ex_sim[["indiv_performance"]], true_param = 1.2)

## ----echo = FALSE--------------------------------------------------------
eesim_overall_output <- data_frame(element = paste0("`",
                                                    colnames(ex_sim[[3]]),
                                                    "`"),
                                   desc = c("**Mean estimate**: The mean of the estimated log relative rate over all simulations.",
                                            "**Mean estimated relative rate**: The mean of the estimated relative rate over all simulations.",
                                            "**Variance across estimates**: Variance of the point estimates (estimated log relative risk) over all simulations.",
                                            "**Mean variance of estimate**: The mean of the variances of the estimated effect (estimated log relative risk) across all simulations.",
                                            "**Relative bias**: Difference between the estimated log relative risk and true log relative risk as a proportion of the true log relative risk.",
                                            "**95% confidence inverval coverage**: Percent of simulations for which the 95% confidence interval estimate of log relative risk includes the true value of log relative risk.",
                                            "**Power**: Percent of simulations for which the null hypothesis that the log relative risk equals zero is rejected based on a p-value of 0.05."))
knitr::kable(eesim_overall_output, col.names = c("Variable", "Description"))

## ------------------------------------------------------------------------
ex_sim[["overall_performance"]]

## ----echo = FALSE, message = FALSE, echo = FALSE, fig.width = 5, fig.align = "center", fig.height = 2----
chicagoNMMAPS %>% 
  mutate(temp = temp >= quantile(temp, probs = 0.98)) %>% 
  tbl_df() %>% 
  filter(year >= 1996) %>% 
  select(date, temp) %>% 
  rename(`Extreme heat day` = temp) %>% 
  gather(variable, value, -date) %>% 
  ggplot(aes(x = date, y = value)) + 
  geom_jitter(alpha = 0.5, size = 0.7, fill = NA, width = 0, height = 0.1) + 
  facet_wrap(~ variable, ncol = 1, scales = "free_y") + 
  theme_classic() + 
  labs(x = "Date", y = "Variable value")

## ----echo = FALSE, results="hide"----------------------------------------
chicagoNMMAPS %>% 
  mutate(temp = temp >= quantile(temp, probs = 0.98)) %>% 
  tbl_df() %>% 
  filter(year >= 1996) %>% 
  group_by(month) %>% 
  summarize(p_heat = mean(temp))

## ------------------------------------------------------------------------
sim_chicago2 <- create_sims(n_reps = 1, n = 365 * 5, sd = 1,
                            central = c(0, 0, 0, 0, 0, 0.05, 0.12, 0.02, 0, 0, 0, 0),
                            exposure_type = "binary", exposure_trend = "monthly",
                            exposure_amp = -.6, average_outcome = 50,
                            outcome_trend = "cos1", outcome_amp = 0.2, 
                            rr = 1.05, start.date = "1996-01-01")

## ----echo = FALSE, message = FALSE, echo = FALSE, fig.width = 5, fig.align = "center", fig.height = 2----
sim_chicago2[[1]] %>% 
  select(date, x) %>% 
  rename(`Extreme heat day` = x) %>% 
  gather(variable, value, -date) %>% 
  ggplot(aes(x = date, y = value)) + 
  geom_jitter(alpha = 0.5, size = 0.7, fill = NA, width = 0, height = 0.1) + 
  facet_wrap(~ variable, ncol = 1, scales = "free_y") + 
  theme_classic() + 
  labs(x = "Date", y = "Variable value")

## ----fig.align = "center", fig.width = 8, fig.height = 7-----------------
a <- chicagoNMMAPS %>% 
  mutate(temp = temp >= quantile(temp, probs = 0.98)) %>% 
  tbl_df() %>% 
  filter(year >= 1996) %>% 
  select(date, temp) %>% 
  calendar_plot(type = "discrete", labels = c("Extreme heat day", "Other day")) + 
  ggtitle("Observed exposure data")
b <- sim_chicago2[[1]] %>% 
  select(date, x) %>% 
  calendar_plot(type = "discrete", labels = c("Extreme heat day", "Other day")) + 
  ggtitle("Simulated exposure data")
grid.arrange(a, b, ncol = 1)

## ----echo = FALSE--------------------------------------------------------
set.seed(2416)

## ------------------------------------------------------------------------
ex_sim2 <- eesim(n_reps = 100, n = 365 * 5, 
                 central = c(0, 0, 0, 0, 0, 0.05, 0.12, 0.02, 0, 0, 0, 0),
                 exposure_type = "binary", exposure_trend = "monthly",
                 exposure_amp = -.6, average_outcome = 50,
                 outcome_trend = "cos1", outcome_amp = 0.2, 
                 rr = 1.05, start.date = "1996-01-01",
                 custom_model = spline_mod, custom_model_args = list(df_year = 7))

## ----fig.width = 4, fig.height = 5, fig.align = "center"-----------------
coverage_plot(ex_sim2[["indiv_performance"]], true_param = 1.05)

## ------------------------------------------------------------------------
ex_sim2[["overall_performance"]]

## ----fig.width = 5, fig.height = 3.5, fig.align = "center"---------------
ex_power_calc <- power_calc(varying = "n", values = floor(365.25 * seq(1, 20, by = 5)),
                            n_reps = 100, rr = 1.05,
                            central = c(0, 0, 0, 0, 0, 0.05, 0.12, 0.02, 0, 0, 0, 0),
                            exposure_type = "binary", exposure_trend = "monthly", 
                            exposure_amp = -.6, average_outcome = 50,
                            outcome_trend = "cos1", outcome_amp = 0.2, 
                            custom_model = spline_mod, custom_model_args = list(df_year = 7),
                            plot = FALSE)
ex_power_calc %>% 
  ggplot(aes(x = values, y = power)) + 
  geom_line() + 
  ylim(0, 1) + 
  labs(x = "Number of days in the study", y = "Power") + 
  theme_bw()

## ------------------------------------------------------------------------
x_cont <- sim_exposure(n = 1000, central = 50, sd = 5, exposure_type = "continuous") 
x_cont %>% slice(1:5)

## ----fig.width = 4, fig.height = 2, fig.align = "center"-----------------
ggplot(x_cont, aes(x = date, y = x)) + geom_point(alpha = 0.2) + 
  theme_classic()

## ----fig.align = "center", fig.width = 8, fig.height = 2.5---------------
calendar_plot(x_cont, type = "continuous")

## ------------------------------------------------------------------------
x_bin <- sim_exposure(n = 1000, central = 0.05, exposure_type = "binary")
x_bin %>% slice(1:5)

## ----fig.align = "center", fig.width = 8, fig.height = 2.25--------------
calendar_plot(x_bin, type = "discrete", labels = c("Not exposed", "Exposed"))

## ----fig.width = 7, fig.height = 4, echo = FALSE, message = FALSE--------
data.frame(day = 1:1000) %>%
  mutate(`"no trend"` = calc_t(n = 1000, trend = "no trend"),
         `"cos1"` = calc_t(n = 1000, trend = "cos1"),
         `"cos2"` = calc_t(n = 1000, trend = "cos2"),
         `"cos3"` = calc_t(n = 1000, trend = "cos3"),
         `"linear"` = calc_t(n = 1000, trend = "linear"),
         `"curvilinear"` = calc_t(n = 1000, trend = "curvilinear"),
         `"cos1linear"` = calc_t(n = 1000, trend = "cos1linear")) %>%
  gather(trend_method, trend_value, -day) %>%
  mutate(trend_method = factor(trend_method,
                               levels = c('"no trend"', 
                                          '"linear"',
                                          '"curvilinear"',
                                          '"cos1"',
                                          '"cos1linear"',
                                          '"cos2"',
                                          '"cos3"'))) %>%
  ggplot(aes(x = day, y = trend_value)) + 
  geom_line() + facet_wrap(~ trend_method, ncol = 4) + 
  theme_bw()

## ----fig.width = 7, fig.height = 2, echo = FALSE, message = FALSE--------
data.frame(day = 1:1000) %>%
  mutate(`"amp = 0.5"` = calc_t(n = 1000, trend = "cos1linear", amp = 0.5),
         `"amp = 0.1"` = calc_t(n = 1000, trend = "cos1linear", amp = 0.1),
         `"amp = 0.9"` = calc_t(n = 1000, trend = "cos1linear", amp = 0.9),
         `"amp = -0.5"` = calc_t(n = 1000, trend = "cos1linear", amp = -0.5)) %>%
  gather(trend_method, trend_value, -day) %>%
  ggplot(aes(x = day, y = trend_value)) + 
  geom_line() + facet_wrap(~ trend_method, ncol = 4) + 
  theme_bw()

## ---- fig.width = 5.5, fig.height = 5, fig.align = "center"--------------
testexp <- sim_exposure(n = 365 * 3, central = 50, sd = 10, trend = "cos1linear",
                        exposure_type = "continuous")
a <- ggplot(testexp, aes(x = date, y = x)) +  
  geom_point(alpha = 0.5, size = 0.8) + 
  coord_cartesian(ylim = c(0,110)) + 
  labs(title = "Exposure with a 'cos1linear' trend", x = "Date", y="Exposure") + 
  theme_classic()
b <- calendar_plot(testexp, type = "continuous") + 
  ggtitle("Calendar plot of simulated exposure data") + 
  theme(legend.position = "bottom")
grid.arrange(a, b, ncol = 1)

## ---- fig.width = 5.5, fig.height = 5,  fig.align = "center"-------------
small_amp <- sim_exposure(n = 365 * 3, central = 50, sd = 10, trend = "cos1linear",
                        amp = -0.3, exposure_type = "continuous")
a <- ggplot(small_amp, aes(x = date, y = x)) +  
  geom_point(alpha = 0.5, size = 0.8) + 
  coord_cartesian(ylim = c(0,110)) + 
  labs(title = "Exposure with a 'cos1linear' trend", x = "Date", y="Exposure") + 
  theme_classic()
b <- calendar_plot(small_amp, type = "continuous") + 
  ggtitle("Calendar plot of simulated exposure data") + 
  theme(legend.position = "bottom")
grid.arrange(a, b, ncol = 1)

## ---- message = FALSE, fig.width = 5.5, fig.height = 5,  fig.align = "center"----
testbin <- sim_exposure(n=1000, central = c(.05, .05, .1, .2, .4, .4, .5, .7, .5, .2, .1, .05),
                        trend = "monthly", exposure_type = "binary", 
                        start.date = "2002-06-01")
a <- testbin %>% 
  mutate(x = factor(x, levels = c(0, 1), labels = c("Not exposed", "Exposed"))) %>% 
  ggplot(aes(x = date, y = x)) + 
  geom_jitter(alpha = 0.5, size = 0.7, fill = NA, width = 0, height = 0.1) + 
  theme_classic() + 
  labs(x = "Date", y = "Exposure")
b <- calendar_plot(testbin, type = "discrete", labels = c("Not exposed", "Exposed")) + 
  ggtitle("Calendar plot of simulated exposure data") + 
  theme(legend.position = "bottom")
grid.arrange(a, b, ncol = 1)

## ----echo = FALSE, fig.width = 8, fig.height = 3-------------------------
continuous_trend <- data_frame(t = calc_t(n = 1000, trend = "cos1")) %>% 
  mutate(x = 100 * t,
         date = seq(from = as.Date("2000-01-01"),
                    by = 1, length.out = 1000)) 
a <- continuous_trend %>%
  ggplot(aes(x = date, y = x)) + geom_line(color = "red") + theme_classic() + 
  ylim(c(0, 200))

continuous_simulated <- continuous_exposure(n = 1000, mu = 100,
                                            sd = 10, trend = "cos1") 
b <- continuous_simulated %>%
  ggplot(aes(x = date, y = x)) + geom_point() + 
  geom_line(data = continuous_trend, color = "red") + theme_classic() + 
  ylim(c(0, 200))

grid.arrange(a, b, ncol = 2)

## ------------------------------------------------------------------------
testexp2 <- sim_exposure(n = 1000, central = 100, sd = 10, trend = "cos1",
                         exposure_type = "continuous")
testout <- sim_outcome(exposure = testexp2, average_outcome = 22,
                       trend = "linear", rr = 1.01)

## ----echo = FALSE, fig.width = 5.5, fig.height = 5, fig.align = "center"----
a <- ggplot(testout, aes(x = date, y = outcome)) + 
  geom_point(alpha = 0.5, size = 0.8) +
  labs(title = "Health outcomes with a linear trend", x = "Date", y = "Outcome") + 
  theme_classic()
b <- calendar_plot(testout %>% select(date, outcome), type = "continuous", 
                   legend_name = "Outcome") + 
  theme(legend.position = "bottom") + 
  ggtitle("Calendar plot of simulated outcome data")
grid.arrange(a, b, ncol = 1)

## ------------------------------------------------------------------------
spline_mod

## ------------------------------------------------------------------------
# Create simulated data
sims <- create_sims(n_reps = 10, n = 100, central = 100, sd = 10,
             exposure_type="continuous", exposure_trend = "cos1",
             exposure_amp = .6, average_outcome = 22,
             outcome_trend = "no trend", outcome_amp = .6, rr = 1.01)
head(sims[[1]])

## ------------------------------------------------------------------------
# Apply `spline_mod` to the data
spline_mod(df = sims[[1]])
spline_mod(df = sims[[1]], df_year = 6)

## ----eval = FALSE--------------------------------------------------------
#  ex_sim2 <- eesim(n_reps = 100, n = 365 * 5,
#                   central = c(0, 0, 0, 0, 0, 0.05, 0.12, 0.02, 0, 0, 0, 0),
#                   exposure_type = "binary", exposure_trend = "monthly",
#                   exposure_amp = -.6, average_outcome = 50,
#                   outcome_trend = "cos1", outcome_amp = 0.2,
#                   rr = 1.05, start.date = "1996-01-01",
#                   custom_model = spline_mod, custom_model_args = list(df_year = 7))

## ------------------------------------------------------------------------
fits <- fit_mods(data = sims, custom_model = spline_mod, 
                 custom_model_args = list(df_year = 7))
fits

## ------------------------------------------------------------------------
check_sims(fits, true_rr = 1.01)

## ----fig.align = "center", fig.width = 5, fig.height = 3-----------------
pow <- power_calc(varying = "n", values = floor(365.25 * seq(1, 21, by = 5)), n_reps = 20,
                  central = 100, sd = 10, rr = 1.001, exposure_type = "continuous",
                  exposure_trend = "cos1", exposure_amp = .6, average_outcome = 22,
                  outcome_trend = "no trend", outcome_amp = .6,
                  custom_model = spline_mod, plot = TRUE)

## ------------------------------------------------------------------------
pow

## ----fig.align = "center", fig.width = 5, fig.height = 3-----------------
pow2 <- power_calc(varying = "average_outcome", values = c(1, 5, 10, 20, 30, 40),
                   n_reps = 20,
                   central = 100, sd = 10, rr = 1.001, exposure_type = "continuous",
                   exposure_trend = "cos1", exposure_amp = .6, n = 4000,
                   outcome_trend = "no trend", outcome_amp = .6,
                   custom_model = spline_mod, plot = TRUE)

## ------------------------------------------------------------------------
pow2

## ------------------------------------------------------------------------
above_min_trend <- function(n, mean, sd_of_sqrt, minimum = 0){
  day <- c(1:n)
  
  ## Calculate a baseline exposure for each day
  base <- mean + -10 * cos(2 * pi * (day / 365))
  base[base < minimum] <- minimum            # Reset any values below 0 to 0
  
  ## Simulate exposure values from the baseline
  sqrt_base <- sqrt(base)                   # Transform to square root
  sqrt_sim <- rnorm(n, mean = sqrt_base, sd = sd_of_sqrt)
  sqrt_sim ^ 2                              # Transform back
}

## ----fig.width = 5, fig.height = 2, fig.align = "center"-----------------
above_min_trend(n = 365.25 * 5, mean = 20, minimum = 0, sd_of_sqrt = 0.9) %>% 
  tbl_df() %>% 
  mutate(day = 1:n()) %>% 
  ggplot(aes(x = day, y = value)) + 
  geom_point(alpha = 0.5, size = 0.8) + 
  theme_classic() + 
  geom_smooth(se = FALSE, span = 0.1, method = "loess", color = "red")

## ----message = FALSE-----------------------------------------------------
ex_sim2 <- eesim(n_reps = 1, n = round(365.25 * 5), 
                 exposure_type = "continuous",
                 cust_exp_func = above_min_trend,
                 cust_exp_args = list(mean = 20, minimum = 0, sd_of_sqrt = 0.9),
                 average_outcome = 50, rr = 1.01, 
                 custom_model = spline_mod, custom_model_args = list(df_year = 7))

## ----fig.width = 5, fig.height = 2, fig.align = "center"-----------------
custombase <- function(n, slope, intercept){
  day <- c(1:n)
  baseline <- day * slope + intercept
  return(baseline)
}

#Example:
custombase(n=5, slope = .3, intercept = 55)

ex_sim3 <- eesim(n_reps = 3, n = 1000, central = 100, sd = 10,
                exposure_type = "continuous", exposure_trend = "cos1",
                exposure_amp = .6, average_outcome = 22, rr = 1.01, 
                cust_base_func = custombase,
                cust_base_args = list(n=1000, slope = 5, intercept = 12),
                custom_model = spline_mod, custom_model_args = list(df_year = 2))
ggplot(ex_sim3$simulated_datasets[[1]], aes(x=date, y=outcome))+ geom_point() + geom_point(alpha = 0.5, size = 0.8) + 
  theme_classic() + 
  geom_smooth(se = FALSE, span = 0.1, method = "loess", color = "red")


## ---- warning = F--------------------------------------------------------
customlambda <- function(exposure, rr, constant, baseline){
  log_lambda <- log(baseline) + log(rr) * exposure + constant
  lambda <- exp(log_lambda)
  return(lambda)
}

ex_sim4 <- eesim(n_reps = 3, n = 1000, central = 100, sd = 10,
                exposure_type = "continuous", exposure_trend = "cos1",
                exposure_amp = .6, average_outcome = 22, rr = 1.01, 
                cust_base_func = custombase,
                cust_base_args = list(n=1000, slope = .5, intercept = 12),
                cust_lambda_func = customlambda, cust_lambda_args = list(constant=10),
                custom_model = spline_mod, custom_model_args = list(df_year = 2))

## ------------------------------------------------------------------------
custnbinom <- function(n, lambda, prob){
  out <- rnbinom(n=n, size=lambda, prob=prob)
  return(out)
}

ex_sim5 <- eesim(n_reps = 3, n = 1000, central = 100, sd = 10,
                exposure_type = "continuous", exposure_trend = "cos1",
                exposure_amp = .6, average_outcome = 22, rr = 1.01, 
                cust_base_func = custombase,
                cust_base_args = list(n=1000, slope = .5, intercept = 12),
                cust_lambda_func = customlambda, cust_lambda_args = list(constant=10),
                cust_outdraw = custnbinom, cust_outdraw_args = list(prob=.3), 
                custom_model = spline_mod, custom_model_args = list(df_year = 2))

Try the eesim package in your browser

Any scripts or data that you put into this service are public.

eesim documentation built on May 2, 2019, 7:30 a.m.