knitr::opts_chunk$set( collapse = TRUE, eval = TRUE, warning = FALSE, comment = "#>" )
# we do NOT want the vignette to build on CRAN...it's taking too long if (!identical(Sys.getenv("IN_PKGDOWN"), "true") && !tolower(as.list(Sys.info())$user) %in% c("sjobergd", "currym", "whitingk", "whiting")) { msg <- paste( "View this vignette on the", "[package website](https://www.danieldsjoberg.com/gtsummary/articles/gallery.html)." ) cat(msg) knitr::knit_exit() }
FAQ and Gallery showing various tables possible with the {gtsummary} package.
library(gtsummary) library(gt) library(survival) library(dplyr) library(stringr) library(purrr) library(forcats) library(tidyr)
Headers, Labels and Formatting
Adding and Modifying Statistics
How do I include a column for missing values of a grouping variable?
How do I summarize a continuous variable by one, two (or more) categorical variables?
How do I stratify a summary table by more than one variable?
How do I add a p-value for each group compared to a single reference group?
Statistical Tests
Headers, Labels and Formatting
Creating and Combining Tables
Adding and Modifying Statistics
Add a spanning header over the group columns for increased clarity, and modify column headers. Using bold_labels()
formats the labels as bold, but labels can also be italicized using italicize_labels()
, or combined to format with both bold and italics.
trial %>% select(trt, age, grade) %>% tbl_summary( by = trt, missing = "no", statistic = all_continuous() ~ "{median} ({p25}, {p75})" ) %>% modify_header(all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p)}%)") %>% add_n() %>% bold_labels() %>% modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**")
Show continuous summary statistics on multiple lines. Levels are italicized here using the italicize_levels()
function, but the bold_levels()
function can be used instead to create bold text, or both functions can be used together to get text that is both bold and in italics.
trial %>% select(trt, age, marker) %>% tbl_summary( by = trt, type = all_continuous() ~ "continuous2", statistic = all_continuous() ~ c( "{N_nonmiss}", "{mean} ({sd})", "{median} ({p25}, {p75})", "{min}, {max}" ), missing = "no" ) %>% italicize_levels()
Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.
trial %>% select(response, age, grade) %>% mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) %>% tbl_summary( by = response, missing = "no", label = list(age ~ "Patient Age", grade ~ "Tumor Grade") ) %>% add_p(pvalue_fun = ~ style_pvalue(.x, digits = 2)) %>% add_q()
Include missing tumor response as column using fct_explicit_na()
.
trial %>% select(response, age, grade) %>% mutate( response = factor(response, labels = c("No Tumor Response", "Tumor Responded")) %>% fct_explicit_na(na_level = "Missing Response Status") ) %>% tbl_summary( by = response, label = list(age ~ "Patient Age", grade ~ "Tumor Grade") )
Report treatment differences between two groups. This is often needed in randomized trials. In this example, we report the difference in tumor response and marker level between two chemotherapy treatments.
trial %>% select(response, marker, trt) %>% tbl_summary( by = trt, statistic = list( all_continuous() ~ "{mean} ({sd})", all_categorical() ~ "{p}%" ), missing = "no" ) %>% add_difference() %>% add_n() %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_footnote(all_stat_cols() ~ NA)
Paired t-test and McNemar's test. The data is expected in a long format with 2 rows per participant.
# imagine that each patient received Drug A and Drug B (adding ID showing their paired measurements) trial_paired <- trial %>% select(trt, marker, response) %>% group_by(trt) %>% mutate(id = row_number()) %>% ungroup() # you must first delete incomplete pairs from the data, then you can build the table trial_paired %>% # delete missing values filter(complete.cases(.)) %>% # keep IDs with both measurements group_by(id) %>% filter(n() == 2) %>% ungroup() %>% # summarize data tbl_summary(by = trt, include = -id) %>% add_p( test = list( marker ~ "paired.t.test", response ~ "mcnemar.test" ), group = id )
Include p-values comparing all groups to a single reference group.
# table summarizing data with no p-values small_trial <- trial %>% select(grade, age, response) t0 <- small_trial %>% tbl_summary(by = grade, missing = "no") %>% modify_header(all_stat_cols() ~ "**{level}**") # table comparing grade I and II t1 <- small_trial %>% filter(grade %in% c("I", "II")) %>% tbl_summary(by = grade, missing = "no") %>% add_p() %>% modify_header(p.value ~ md("**I vs. II**")) %>% # hide summary stat columns modify_column_hide(all_stat_cols()) # table comparing grade I and II t2 <- small_trial %>% filter(grade %in% c("I", "III")) %>% tbl_summary(by = grade, missing = "no") %>% add_p() %>% modify_header(p.value ~ md("**I vs. III**")) %>% # hide summary stat columns modify_column_hide(all_stat_cols()) # merging the 3 tables together, and adding additional gt formatting tbl_merge(list(t0, t1, t2)) %>% modify_spanning_header( list( all_stat_cols() ~ "**Tumor Grade**", starts_with("p.value") ~ "**p-values**" ) )
Add 95% confidence interval around the mean as an additional column
trial %>% select(age, marker) %>% tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", missing = "no") %>% modify_header(stat_0 ~ "**Mean (SD)**") %>% add_ci()
It's often needed to summarize a continuous variable by one, two, or more categorical variables. The example below shows a table summarizing a continuous variable by two categorical variables. To summarize by more than two categorical variables, use tbl_continuous
in conjunction with tbl_strata
(see an example of tbl_strata
here).
trial %>% select(trt, grade, marker) %>% tbl_continuous(variable = marker, by = trt) %>% modify_spanning_header(all_stat_cols() ~ "**Treatment Assignment**")
Build a summary table stratified by more than one variable.
trial %>% select(trt, grade, age, stage) %>% mutate(grade = paste("Grade", grade)) %>% tbl_strata( strata = grade, ~ .x %>% tbl_summary(by = trt, missing = "no") %>% modify_header(all_stat_cols() ~ "**{level}**") )
Include number of observations and the number of events in a univariate regression table.
trial %>% select(response, age, grade) %>% tbl_uvregression( method = glm, y = response, method.args = list(family = binomial), exponentiate = TRUE ) %>% add_nevent()
Include two related models side-by-side with descriptive statistics. We also use the compact table theme that reduces cell padding and font size.
gt_r1 <- glm(response ~ trt + grade, trial, family = binomial) %>% tbl_regression(exponentiate = TRUE) gt_r2 <- coxph(Surv(ttdeath, death) ~ trt + grade, trial) %>% tbl_regression(exponentiate = TRUE) gt_t1 <- trial[c("trt", "grade")] %>% tbl_summary(missing = "no") %>% add_n() %>% modify_header(stat_0 ~ "**n (%)**") %>% modify_footnote(stat_0 ~ NA_character_) theme_gtsummary_compact() tbl_merge( list(gt_t1, gt_r1, gt_r2), tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**") )
reset_gtsummary_theme()
Include the number of events at each level of a categorical predictor.
trial %>% select(ttdeath, death, stage, grade) %>% tbl_uvregression( method = coxph, y = Surv(ttdeath, death), exponentiate = TRUE, hide_n = TRUE ) %>% add_nevent(location = "level")
Regression model where the covariate remains the same, and the outcome changes.
trial %>% select(age, marker, trt) %>% tbl_uvregression( method = lm, x = trt, show_single_row = "trt", hide_n = TRUE ) %>% modify_header(list( label ~ "**Model Outcome**", estimate ~ "**Treatment Coef.**" )) %>% modify_footnote(estimate ~ "Values larger than 0 indicate larger values in the Drug B group.")
Implement a custom tidier to report Wald confidence intervals.
The Wald confidence intervals are calculated using confint.default()
.
my_tidy <- function(x, exponentiate = FALSE, conf.level = 0.95, ...) { dplyr::bind_cols( broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE), # calculate the confidence intervals, and save them in a tibble stats::confint.default(x) %>% tibble::as_tibble() %>% rlang::set_names(c("conf.low", "conf.high")) ) } lm(age ~ grade + marker, trial) %>% tbl_regression(tidy_fun = my_tidy)
Use significance stars on estimates with low p-values.
trial %>% select(ttdeath, death, stage, grade) %>% tbl_uvregression( method = coxph, y = Surv(ttdeath, death), exponentiate = TRUE, ) %>% add_significance_stars()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.