Nothing
## ---- echo = FALSE----------------------------------------
options(width = 60)
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
# tidy.opts = list(width.cutoff = 60,
# args.newline = TRUE),
# tidy = T
)
## ----installation, eval = F-------------------------------
# install.packages("riskCommunicator")
## ----setup------------------------------------------------
library(riskCommunicator)
library(tidyverse)
library(printr)
library(formatR)
library(sandwich)
library(stringr)
library(ggpubr)
## ---- printr.help.sections = c('usage','arguments')-------
?gComp
## ---------------------------------------------------------
data(cvdd)
## ----binary_outcome, paged.print = FALSE, cache=T---------
## Specify the regression formula
cvdd.formula <- cvd_dth ~ DIABETES + AGE + SEX + BMI + CURSMOKE + PREVHYP
## For reproducibility, we should always set the seed since the g-computation uses
## random resampling of the data to calculate confidence intervals and random
## sampling of the distribution when predicting outcomes.
set.seed(1298)
## Call the gComp function
binary.res <- gComp(data = cvdd,
formula = cvdd.formula,
outcome.type = "binary",
R = 200)
binary.res
## ----gComp_class_explaination-----------------------------
class(binary.res)
## The names of the different items in the list:
names(binary.res)
## Sample size of the original data:
binary.res$n
## Contrast being compared in the analysis:
binary.res$contrast
## ---------------------------------------------------------
summary(binary.res)
## ----binary_outcome_plot, fig.width = 12, fig.height = 9, out.width = "100%"----
plot(binary.res)
## ----std-regression-odds-ratio, echo = F------------------
std.reg.or = glm(formula = cvd_dth ~ DIABETES + AGE + SEX + BMI + CURSMOKE + PREVHYP,
data = cvdd,
family=binomial(link='logit'))
std.reg.or
df.std.reg.or = data.frame(exp(cbind(OR = coef(std.reg.or), confint.default(std.reg.or, level = 0.95)))) %>%
# and we pull out only the estimate we are concerned with for the predictor DIABETES
filter(rownames(.) == "DIABETES1") %>%
rename(LL = `X2.5..`, UL = `X97.5..`)
## ----std-regression-risk-ratio, echo = F------------------
std.reg.rr = glm(formula = cvd_dth ~ DIABETES + AGE + SEX + BMI + CURSMOKE + PREVHYP,
## to use Poisson regression, we need to change DIABETES from a factor to a numeric (0,1) variable
data = cvdd %>%
mutate(cvd_dth = ifelse(cvd_dth == "0", 0, ifelse(cvd_dth == "1", 1, NA))),
family="poisson")
std.reg.rr
## calculate robust Standard errors
cov.std.reg.rr = sandwich::vcovHC(std.reg.rr)
std.err <- sqrt(diag(cov.std.reg.rr))
df.std.reg.rr = data.frame(Estimate = exp(coef(std.reg.rr)[2]),
Robust_SE = exp(std.err[2]),
LL = exp(coef(std.reg.rr)[2] - 1.96 * std.err[2]),
UL = exp(coef(std.reg.rr)[2] + 1.96 * std.err[2]))
## ----std-regression-risk-difference, eval = F-------------
# std.reg.rd = glm(formula = cvd_dth ~ DIABETES + AGE + SEX + BMI + CURSMOKE + PREVHYP,
# data = cvdd %>%
# ## To use linear regression, we need to change DIABETES from a
# ## factor to a numeric (0,1) variable
# mutate(cvd_dth = ifelse(cvd_dth == "0", 0,
# ifelse(cvd_dth == "1", 1, NA))),
# family=gaussian(link = 'log'))
## ----table 2----------------------------------------------
# combine standard regression results
std.regression.res = df.std.reg.or %>%
mutate(Parameter = "Odds Ratio",
Std_regression = paste0(round(OR, 2),
" (",
round(LL, 2),
", ",
round(UL, 2),
")")) %>%
bind_rows(df.std.reg.rr %>%
mutate(Parameter = "Risk Ratio",
Std_regression = paste0(round(Estimate, 2),
" (",
round(LL, 2),
", ",
round(UL, 2),
")"))) %>%
select(Parameter, Std_regression) %>%
rename(`Standard regression` = Std_regression)
rownames(std.regression.res) = NULL
(table2 = binary.res$results.df %>%
mutate(riskCommunicator = paste0(format(round(Estimate, 2), 2),
" (",
format(round(`2.5% CL`, 2), 2),
", ",
format(round(`97.5% CL`, 2), 2),
")"),
riskCommunicator = ifelse(Parameter == "Number needed to treat/harm",
round(Estimate, 2), riskCommunicator)) %>%
select(Parameter, riskCommunicator) %>%
left_join(std.regression.res, by = "Parameter"))
## ----rate_outcome, cache = T------------------------------
## Modify the dataset to change the variable cvd_dth from a factor
## to a numeric variable since the outcome for Poisson
## regression must be numeric.
cvdd.t <- cvdd %>%
dplyr::mutate(cvd_dth = as.numeric(as.character(cvd_dth)),
timeout = as.numeric(timeout))
set.seed(6534)
rate.res <- gComp(data = cvdd.t,
Y = "cvd_dth",
X = "DIABETES",
Z = c("AGE", "SEX", "BMI", "CURSMOKE", "PREVHYP"),
outcome.type = "rate",
rate.multiplier = 365.25*100,
offset = "timeout",
R = 200)
rate.res
## ----rate_outcome_subgroup, cache = T---------------------
rate.res.subgroup <- gComp(data = cvdd.t,
Y = "cvd_dth",
X = "DIABETES",
Z = c("AGE", "SEX", "BMI", "CURSMOKE", "PREVHYP"),
subgroup = "SEX",
outcome.type = "rate",
rate.multiplier = 365.25*100,
offset = "timeout",
R = 200)
rate.res.subgroup
## ----plot-rate-outcome-results, warning = F---------------
# Saving the output and modifying the labels of the SEX variable to specify
# male/female instead of 0/1
# Also adding a new variable to show the line indicating no difference found
# (0 for rate difference, 1 for rate ratio)
df = rate.res.subgroup$results.df %>%
mutate(Subgroup = ifelse(Subgroup == "SEX0", "Male", "Female"),
hline = ifelse(Parameter == "Incidence Rate Ratio", 1, 0))
ggplot(df, aes(x = Subgroup, y = Estimate)) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = `2.5% CL`, ymax = `97.5% CL`),
width = 0.2) +
facet_wrap(~Parameter) +
theme_bw() +
labs(x = "", color = "") +
geom_hline(aes(yintercept = hline),
color = "red",
linetype = "dashed",
alpha = 0.3)
## ----std-regression-models-rate---------------------------
# Standard Poisson regression (spr) for the rate question, across both sexes
spr.rate = glm(
formula = cvd_dth ~ DIABETES + AGE + SEX + BMI + CURSMOKE + PREVHYP +
offset(log(timeout+0.001)),
data = cvdd.t,
family = "poisson"
)
# get the parameter estimate and CI from the model object
df.spr.rate = as.data.frame(
exp(cbind(Estimate = coef(spr.rate), confint.default(spr.rate, level = 0.95)))
) %>%
rename(`2.5% CL` = `2.5 %`, `97.5% CL` = `97.5 %`) %>%
filter(rownames(.) == "DIABETES1") %>%
mutate(Subgroup = "All")
# Standard Poisson regression (spr) for the rate question, by subgroup (SEX)
spr.rate.subgroup = glm(
formula = cvd_dth ~ DIABETES + AGE + SEX + BMI + CURSMOKE + PREVHYP +
DIABETES*SEX + offset(log(timeout+0.001)),
data = cvdd.t,
family = "poisson"
)
# Get the variance-covariance matrix so we can calculate standard errors
se.subgroup = vcov(spr.rate.subgroup)
# Get estimates and CIs
df.spr.rate.subgroup = data.frame(
Subgroup = c("Male", "Female"),
raw.est = c(coef(spr.rate.subgroup)[2],
coef(spr.rate.subgroup)[2] + coef(spr.rate.subgroup)[8]),
SE = c(sqrt(se.subgroup[2,2]),
sqrt(se.subgroup[2,2] + se.subgroup[8,8] + 2*se.subgroup[2,8]))) %>%
mutate(Estimate = exp(raw.est),
`2.5% CL` = exp(raw.est - 1.96 * SE),
`97.5% CL` = exp(raw.est + 1.96 * SE))
# Combine the results from the subgroup and full model into a single data.frame
combined.std.reg = df.spr.rate %>%
bind_rows(df.spr.rate.subgroup %>%
select(Subgroup, Estimate:`97.5% CL`)) %>%
mutate(Parameter = "Incidence Rate Ratio",
model = "Standard Poisson Regression")
## ----plot-rate-outcome-results-v-std-regression, warning = F, fig.width = 8, fig.height = 4----
# Combine the riskCommunicator results with the standard Poisson regression results
df.combined = rate.res$results.df %>%
mutate(Subgroup = "All") %>%
bind_rows(df) %>%
mutate(model = "riskCommunicator") %>%
select(-Outcome, -Comparison) %>%
bind_rows(combined.std.reg) %>%
mutate(hline = ifelse(Parameter == "Incidence Rate Ratio", 1, 0))
rate.diff = ggplot(df.combined %>%
filter(Parameter == "Incidence Rate Difference"),
aes(x = Subgroup, y = Estimate, color = model)) +
geom_point(size = 2, position = position_dodge(width = .5)) +
geom_errorbar(aes(ymin = `2.5% CL`, ymax = `97.5% CL`),
width = 0.2,
position = position_dodge(width = .5)) +
scale_color_manual(values = c("#481567FF", "#3CBB75FF")) +
theme_bw() +
labs(x = "",
y = str_wrap('Incidence rate difference of
cardiovascular disease or death
(cases/100 person-years)', width = 32),
color = "") +
geom_hline(aes(yintercept = hline),
color = "red",
linetype = "dashed",
alpha = 0.3) +
theme(legend.position = "none")
rate.ratio = ggplot(df.combined %>%
filter(Parameter == "Incidence Rate Ratio"),
aes(x = Subgroup, y = Estimate, color = model)) +
geom_point(size = 2, position = position_dodge(width = .5)) +
geom_errorbar(aes(ymin = `2.5% CL`, ymax = `97.5% CL`),
width = 0.2,
position = position_dodge(width = .5)) +
scale_y_continuous(trans = "log2") +
scale_color_manual(values = c("#481567FF", "#3CBB75FF")) +
theme_bw() +
labs(x = "",
y = str_wrap('Incidence rate ratio of
cardiovascular disease or death
(shown on natural log scale)', width = 32),
color = "") +
geom_hline(aes(yintercept = hline),
color = "red",
linetype = "dashed",
alpha = 0.3) +
theme(legend.position = "bottom")
ggarrange(rate.ratio, rate.diff, ncol = 2, common.legend = T, legend = "bottom",
labels = c("A", "B"), widths = c(1, 1))
## ----save-Fig1, include = F, eval = F---------------------
# ggsave(filename = "~/Dropbox/Framingham teaching data set/Manuscript drafts/Fig1.tiff", device = "tiff", width = 10, height = 6)
## ---------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.